Retry logic when calling api (#57)

* fix: retry logic, bypass logfire, clean up log * fix: max_retries and retry_delay_multiplier, do not throw when pass the retry failed
2024-08-01 20:43:21 -07:00 · 2024-08-01 20:43:21 -07:00 · 8e12803ea1
commit 8e12803ea1
parent 7585d5049a
2 changed files with 160 additions and 91 deletions
--- a/custom_routes.py
+++ b/custom_routes.py
@ -22,17 +22,97 @@ from typing import Dict, List, Union, Any, Optional
 from PIL import Image
 import copy
 import struct
+from aiohttp import ClientError
+import atexit
+
+# Global session
+client_session = None
+
+# def create_client_session():
+#     global client_session
+#     if client_session is None:
+#         client_session = aiohttp.ClientSession()
+        
+async def ensure_client_session():
+    global client_session
+    if client_session is None:
+        client_session = aiohttp.ClientSession()
+
+async def cleanup():
+    global client_session
+    if client_session:
+        await client_session.close()
+        
+def exit_handler():
+    print("Exiting the application. Initiating cleanup...")
+    loop = asyncio.get_event_loop()
+    loop.run_until_complete(cleanup())
+
+atexit.register(exit_handler)
+
+max_retries = int(os.environ.get('MAX_RETRIES', '3'))
+retry_delay_multiplier = float(os.environ.get('RETRY_DELAY_MULTIPLIER', '2'))
+
+print(f"max_retries: {max_retries}, retry_delay_multiplier: {retry_delay_multiplier}")
+
+async def async_request_with_retry(method, url, **kwargs):
+    global client_session
+    await ensure_client_session()
+    retry_delay = 1  # Start with 1 second delay
+
+    for attempt in range(max_retries):
+        try:
+            async with client_session.request(method, url, **kwargs) as response:
+                response.raise_for_status()
+                return response
+        except ClientError as e:
+            if attempt == max_retries - 1:
+                logger.error(f"Request failed after {max_retries} attempts: {e}")
+                # raise
+            logger.warning(f"Request failed (attempt {attempt + 1}/{max_retries}): {e}")
+            await asyncio.sleep(retry_delay)
+            retry_delay *= retry_delay_multiplier  # Exponential backoff

 from logging import basicConfig, getLogger
-import logfire
-# if os.environ.get('LOGFIRE_TOKEN', None) is not None:
-logfire.configure(
+
+# Check for an environment variable to enable/disable Logfire
+use_logfire = os.environ.get('USE_LOGFIRE', 'false').lower() == 'true'
+
+if use_logfire:
+    try:
+        import logfire
+        logfire.configure(
            send_to_logfire="if-token-present"
-)
-# basicConfig(handlers=[logfire.LogfireLoggingHandler()])
-logfire_handler = logfire.LogfireLoggingHandler()
-logger = getLogger("comfy-deploy")
-logger.addHandler(logfire_handler)
+        )
+        logger = logfire
+    except ImportError:
+        print("Logfire not installed or disabled. Using standard Python logger.")
+        use_logfire = False
+
+if not use_logfire:
+    # Use a standard Python logger when Logfire is disabled or not available
+    logger = getLogger("comfy-deploy")
+    basicConfig(level="INFO")  # You can adjust the logging level as needed
+
+def log(level, message, **kwargs):
+    if use_logfire:
+        getattr(logger, level)(message, **kwargs)
+    else:
+        getattr(logger, level)(f"{message} {kwargs}")
+        
+# For a span, you might need to create a context manager
+from contextlib import contextmanager
+
+@contextmanager
+def log_span(name):
+    if use_logfire:
+        with logger.span(name):
+            yield
+    # else:
+    #     logger.info(f"Start: {name}")
+    #     yield
+    #     logger.info(f"End: {name}")
+

 from globals import StreamingPrompt, Status, sockets, SimplePrompt, streaming_prompt_metadata, prompt_metadata

@ -306,7 +386,7 @@ async def stream_prompt(data):
        workflow_api=workflow_api
    )

-    logfire.info("Begin prompt", prompt=prompt)
+    log('info', "Begin prompt", prompt=prompt)

    try:
        res = post_prompt(prompt)
@ -359,8 +439,8 @@ async def stream_response(request):
    prompt_id = data.get("prompt_id")
    comfy_message_queues[prompt_id] = asyncio.Queue()

-    with logfire.span('Streaming Run'):
-        logfire.info('Streaming prompt')
+    with log_span('Streaming Run'):
+        log('info', 'Streaming prompt')

        try:
            result = await stream_prompt(data=data)
@ -373,7 +453,7 @@ async def stream_response(request):
                    if not comfy_message_queues[prompt_id].empty():
                        data = await comfy_message_queues[prompt_id].get()

-                        logfire.info(data["event"], data=json.dumps(data))
+                        log('info', data["event"], data=json.dumps(data))
                        # logger.info("listener", data)
                        await response.write(f"event: event_update\ndata: {json.dumps(data)}\n\n".encode('utf-8'))
                        await response.drain()  # Ensure the buffer is flushed
@ -384,10 +464,10 @@ async def stream_response(request):

                await asyncio.sleep(0.1)  # Adjust the sleep duration as needed
        except asyncio.CancelledError:
-            logfire.info("Streaming was cancelled")
+            log('info', "Streaming was cancelled")
            raise
        except Exception as e:
-            logfire.error("Streaming error", error=e)
+            log('error', "Streaming error", error=e)
        finally:
            # event_emitter.off("send_json", task)
            await response.write_eof()
@ -482,10 +562,9 @@ async def upload_file_endpoint(request):

    if get_url:
        try:
-            async with aiohttp.ClientSession() as session:
            headers = {'Authorization': f'Bearer {token}'}
            params = {'file_size': file_size, 'type': file_type}
-                async with session.get(get_url, params=params, headers=headers) as response:
+            response = await async_request_with_retry('GET', get_url, params=params, headers=headers)
            if response.status == 200:
                content = await response.json()
                upload_url = content["upload_url"]
@ -496,7 +575,7 @@ async def upload_file_endpoint(request):
                        # "x-amz-acl": "public-read",
                        "Content-Length": str(file_size)
                    }
-                            async with session.put(upload_url, data=f, headers=headers) as upload_response:
+                    upload_response = await async_request_with_retry('PUT', upload_url, data=f, headers=headers)
                    if upload_response.status == 200:
                        return web.json_response({
                            "message": "File uploaded successfully",
@ -588,9 +667,7 @@ async def update_realtime_run_status(realtime_id: str, status_endpoint: str, sta
    if (status_endpoint is None):
        return
    # requests.post(status_endpoint, json=body)
-    async with aiohttp.ClientSession() as session:
-        async with session.post(status_endpoint, json=body) as response:
-            pass
+    await async_request_with_retry('POST', status_endpoint, json=body)

@server.PromptServer.instance.routes.get('/comfyui-deploy/ws')
 async def websocket_handler(request):
@ -611,9 +688,8 @@ async def websocket_handler(request):
    status_endpoint = request.rel_url.query.get('status_endpoint', None)

    if auth_token is not None and get_workflow_endpoint_url is not None:
-        async with aiohttp.ClientSession() as session:
        headers = {'Authorization': f'Bearer {auth_token}'}
-            async with session.get(get_workflow_endpoint_url, headers=headers) as response:
+        response = await async_request_with_retry('GET', get_workflow_endpoint_url, headers=headers)
        if response.status == 200:
            workflow = await response.json()

@ -805,13 +881,14 @@ async def send_json_override(self, event, data, sid=None):

            prompt_metadata[prompt_id].progress.add(node)
            calculated_progress = len(prompt_metadata[prompt_id].progress) / len(prompt_metadata[prompt_id].workflow_api)
+            calculated_progress = round(calculated_progress, 2)
            # logger.info("calculated_progress", calculated_progress)

            if prompt_metadata[prompt_id].last_updated_node is not None and prompt_metadata[prompt_id].last_updated_node == node:
                return
            prompt_metadata[prompt_id].last_updated_node = node
            class_type = prompt_metadata[prompt_id].workflow_api[node]['class_type']
-            logger.info(f"updating run live status {class_type}")
+            logger.info(f"At: {calculated_progress * 100}% - {class_type}")
            await send("live_status", {
                "prompt_id": prompt_id,
                "current_node": class_type,
@ -836,14 +913,15 @@ async def send_json_override(self, event, data, sid=None):
        # await update_run_with_output(prompt_id, data)

    if event == 'executed' and 'node' in data and 'output' in data:
-        logger.info(f"executed {data}")
        if prompt_id in prompt_metadata:
            node = data.get('node')
            class_type = prompt_metadata[prompt_id].workflow_api[node]['class_type']
-            logger.info(f"executed {class_type}")
+            logger.info(f"Executed {class_type} {data}")
            if class_type == "PreviewImage":
-                logger.info("skipping preview image")
+                logger.info("Skipping preview image")
                return
+        else:
+            logger.info(f"Executed {data}")
            
        await update_run_with_output(prompt_id, data.get('output'), node_id=data.get('node'))
        # await update_run_with_output(prompt_id, data.get('output'), node_id=data.get('node'))
@ -864,7 +942,7 @@ async def update_run_live_status(prompt_id, live_status, calculated_progress: fl
    if (status_endpoint is None):
        return

-    logger.info(f"progress {calculated_progress}")
+    # logger.info(f"progress {calculated_progress}")

    body = {
        "run_id": prompt_id,
@ -883,9 +961,7 @@ async def update_run_live_status(prompt_id, live_status, calculated_progress: fl
        })

    # requests.post(status_endpoint, json=body)
-    async with aiohttp.ClientSession() as session:
-        async with session.post(status_endpoint, json=body) as response:
-            pass
+    await async_request_with_retry('POST', status_endpoint, json=body)


 async def update_run(prompt_id: str, status: Status):
@ -916,9 +992,7 @@ async def update_run(prompt_id: str, status: Status):
        try:
            # requests.post(status_endpoint, json=body)
            if (status_endpoint is not None):
-                async with aiohttp.ClientSession() as session:
-                    async with session.post(status_endpoint, json=body) as response:
-                        pass
+                await async_request_with_retry('POST', status_endpoint, json=body)

            if (status_endpoint is not None) and cd_enable_run_log and (status == Status.SUCCESS or status == Status.FAILED):
                try:
@ -948,9 +1022,7 @@ async def update_run(prompt_id: str, status: Status):
                            ]
                        }

-                        async with aiohttp.ClientSession() as session:
-                            async with session.post(status_endpoint, json=body) as response:
-                                pass
+                        await async_request_with_retry('POST', status_endpoint, json=body)
                        # requests.post(status_endpoint, json=body)
                except Exception as log_error:
                    logger.info(f"Error reading log file: {log_error}")
@ -998,7 +1070,7 @@ async def upload_file(prompt_id, filename, subfolder=None, content_type="image/p
    filename = os.path.basename(filename)
    file = os.path.join(output_dir, filename)

-    logger.info(f"uploading file {file}")
+    logger.info(f"Uploading file {file}")

    file_upload_endpoint = prompt_metadata[prompt_id].file_upload_endpoint

@ -1024,18 +1096,17 @@ async def upload_file(prompt_id, filename, subfolder=None, content_type="image/p
            "Content-Length": str(len(data)),
        }
        # response = requests.put(ok.get("url"), headers=headers, data=data)
-        async with aiohttp.ClientSession() as session:
-            async with session.put(ok.get("url"), headers=headers, data=data) as response:
+        response = await async_request_with_retry('PUT', ok.get("url"), headers=headers, data=data)
        logger.info(f"Upload file response status: {response.status}, status text: {response.reason}")
        end_time = time.time()  # End timing after the request is complete
        logger.info("Upload time: {:.2f} seconds".format(end_time - start_time))

 def have_pending_upload(prompt_id):
    if prompt_id in prompt_metadata and len(prompt_metadata[prompt_id].uploading_nodes) > 0:
-        logger.info(f"have pending upload {len(prompt_metadata[prompt_id].uploading_nodes)}")
+        logger.info(f"Have pending upload {len(prompt_metadata[prompt_id].uploading_nodes)}")
        return True

-    logger.info("no pending upload")
+    logger.info("No pending upload")
    return False

 def mark_prompt_done(prompt_id):
@ -1093,7 +1164,7 @@ async def update_file_status(prompt_id: str, data, uploading, have_error=False,
        else:
            prompt_metadata[prompt_id].uploading_nodes.discard(node_id)

-    logger.info(prompt_metadata[prompt_id].uploading_nodes)
+    logger.info(f"Remaining uploads: {prompt_metadata[prompt_id].uploading_nodes}")
    # Update the remote status

    if have_error:
@ -1177,7 +1248,7 @@ async def update_run_with_output(prompt_id, data, node_id=None):

    if have_upload_media:
        try:
-            logger.info(f"\nhave_upload {have_upload_media} {node_id}")
+            logger.info(f"\nHave_upload {have_upload_media} Node Id: {node_id}")

            if have_upload_media:
                await update_file_status(prompt_id, data, True, node_id=node_id)
@ -1190,9 +1261,7 @@ async def update_run_with_output(prompt_id, data, node_id=None):

    # requests.post(status_endpoint, json=body)
    if status_endpoint is not None:
-        async with aiohttp.ClientSession() as session:
-            async with session.post(status_endpoint, json=body) as response:
-                pass
+        await async_request_with_retry('POST', status_endpoint, json=body)

    await send('outputs_uploaded', {
        "prompt_id": prompt_id
--- a/requirements.txt
+++ b/requirements.txt
@ -2,4 +2,4 @@ aiofiles
 pydantic
 opencv-python
 imageio-ffmpeg
-logfire
+# logfire