fix: log and async task issues with modal script

This commit is contained in:
bennykok 2024-02-01 13:42:01 +08:00
parent 38fea1e79f
commit d8951df35f

View File

@ -37,7 +37,9 @@ if not deploy_test:
# dockerfile_image = Image.from_dockerfile(f"{current_directory}/Dockerfile", context_mount=Mount.from_local_dir(f"{current_directory}/data", remote_path="/data"))
dockerfile_image = (
modal.Image.debian_slim()
modal.Image.debian_slim(
python_version="3.11",
)
.apt_install("git", "wget")
.pip_install(
"git+https://github.com/modal-labs/asgiproxy.git", "httpx", "tqdm"
@ -83,7 +85,7 @@ if not deploy_test:
# Time to wait between API check attempts in milliseconds
COMFY_API_AVAILABLE_INTERVAL_MS = 50
# Maximum number of API check attempts
COMFY_API_AVAILABLE_MAX_RETRIES = 500
COMFY_API_AVAILABLE_MAX_RETRIES = 1000
# Time to wait between poll attempts in milliseconds
COMFY_POLLING_INTERVAL_MS = 250
# Maximum number of poll attempts
@ -94,7 +96,8 @@ COMFY_HOST = "127.0.0.1:8188"
async def check_server(url, retries=50, delay=500):
import aiohttp
for i in range(retries):
# for i in range(retries):
while True:
try:
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
@ -157,6 +160,7 @@ class ComfyDeployRunner:
async def read_stream(self, stream, isStderr):
import time
while True:
try:
line = await stream.readline()
if line:
l = line.decode('utf-8').strip()
@ -181,6 +185,9 @@ class ComfyDeployRunner:
})
else:
break
except asyncio.CancelledError:
# Handle the cancellation here if needed
break # Break out of the loop on cancellation
@enter()
async def setup(self):
@ -197,24 +204,22 @@ class ComfyDeployRunner:
# env={**os.environ, "COLUMNS": "10000"}
)
stdout_task = asyncio.create_task(
self.read_stream(self.server_process.stdout, False))
stderr_task = asyncio.create_task(
self.read_stream(self.server_process.stderr, True))
await check_server(
f"http://{COMFY_HOST}",
COMFY_API_AVAILABLE_MAX_RETRIES,
COMFY_API_AVAILABLE_INTERVAL_MS,
)
stdout_task.cancel()
stderr_task.cancel()
@exit()
async def cleanup(self, exc_type, exc_value, traceback):
print(f"comfy-modal - cleanup", exc_type, exc_value, traceback)
# self.server_process.kill()
# Get the current event loop
loop = asyncio.get_event_loop()
# Check if the event loop is closed
if loop.is_closed():
print("The event loop is closed.")
else:
try:
self.server_process.terminate()
await self.server_process.wait()
except Exception as e:
print("Issues when cleaning up", e)
print("The event loop is open.")
@method()
async def run(self, input: Input):
@ -228,6 +233,7 @@ class ComfyDeployRunner:
stderr_task = asyncio.create_task(
self.read_stream(self.server_process.stderr, True))
try:
class TimeoutError(Exception):
pass
@ -244,9 +250,18 @@ class ComfyDeployRunner:
signal.signal(signal.SIGALRM, timeout_handler)
try:
# Set an alarm for some seconds in the future
signal.alarm(run_timeout) # 5 seconds timeout
ok = await check_server(
f"http://{COMFY_HOST}",
COMFY_API_AVAILABLE_MAX_RETRIES,
COMFY_API_AVAILABLE_INTERVAL_MS,
)
if not ok:
raise Exception("ComfyUI API is not available")
# Set an alarm for some seconds in the future
data = json.dumps({
"run_id": input.prompt_id,
"status": "started",
@ -295,6 +310,20 @@ class ComfyDeployRunner:
except TimeoutError:
print("Operation timed out")
return {"status": "failed"}
except Exception as e:
print(f"Unexpected error occurred: {str(e)}")
data = json.dumps({
"run_id": input.prompt_id,
"status": "failed",
"time": datetime.now().isoformat()
}).encode('utf-8')
async with aiohttp.ClientSession() as session:
async with session.post(input.status_endpoint, data=data) as response:
print("response", response)
self.machine_logs.append({
"logs": str(e),
"timestamp": time.time()
})
finally:
signal.alarm(0)
@ -312,9 +341,11 @@ class ComfyDeployRunner:
print("uploaded log_data")
# print(data)
self.machine_logs = []
finally:
stdout_task.cancel()
stderr_task.cancel()
await stdout_task
await stderr_task
return result