feat: add modal cloud builder

This commit is contained in:
BennyKok 2024-01-04 22:28:19 +08:00
parent d879889e1b
commit 314eb9fd16
17 changed files with 694 additions and 0 deletions

View File

@ -0,0 +1,3 @@
.env
__pycache__
venv

1
builder/modal-builder/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
.env

View File

@ -0,0 +1,16 @@
FROM python:3.10
WORKDIR /app
COPY ./requirements.txt ./
RUN pip install --no-cache-dir --upgrade -r ./requirements.txt
COPY ./src ./src
RUN mkdir builds
# CMD ["uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "80", "--lifespan", "on"]
CMD ["python", "src/main.py"]
# If running behind a proxy like Nginx or Traefik add --proxy-headers
# CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "80", "--proxy-headers"]

View File

@ -0,0 +1,17 @@
# fly.toml app configuration file generated for modal-builder on 2024-01-03T22:29:34+08:00
#
# See https://fly.io/docs/reference/configuration/ for information about how to use this file.
#
app = "modal-builder"
primary_region = "sea"
[build]
[http_service]
internal_port = 8080
force_https = true
auto_stop_machines = true
auto_start_machines = true
min_machines_running = 0
processes = ["app"]

View File

@ -0,0 +1,5 @@
modal
fastapi==0.108.0
pydantic==2.5.3
uvicorn[standard]==0.25.0
requests

BIN
builder/modal-builder/src/.DS_Store vendored Normal file

Binary file not shown.

View File

View File

@ -0,0 +1,315 @@
from typing import Union, Optional, Dict
from pydantic import BaseModel
from fastapi import FastAPI, HTTPException, WebSocket, BackgroundTasks, WebSocketDisconnect
from fastapi.responses import JSONResponse
from fastapi.logger import logger as fastapi_logger
import os
import json
import subprocess
import time
from contextlib import asynccontextmanager
import asyncio
import threading
import signal
import logging
from fastapi.logger import logger as fastapi_logger
import requests
from concurrent.futures import ThreadPoolExecutor
# executor = ThreadPoolExecutor(max_workers=5)
gunicorn_error_logger = logging.getLogger("gunicorn.error")
gunicorn_logger = logging.getLogger("gunicorn")
uvicorn_access_logger = logging.getLogger("uvicorn.access")
uvicorn_access_logger.handlers = gunicorn_error_logger.handlers
fastapi_logger.handlers = gunicorn_error_logger.handlers
if __name__ != "__main__":
fastapi_logger.setLevel(gunicorn_logger.level)
else:
fastapi_logger.setLevel(logging.DEBUG)
logger = logging.getLogger("uvicorn")
logger.setLevel(logging.INFO)
last_activity_time = time.time()
global_timeout = 60 * 4
machine_id_websocket_dict = {}
machine_id_status = {}
async def check_inactivity():
global last_activity_time
while True:
# logger.info("Checking inactivity...")
if time.time() - last_activity_time > global_timeout:
if len(machine_id_status) == 0:
# The application has been inactive for more than 60 seconds.
# Scale it down to zero here.
logger.info(f"No activity for {global_timeout} seconds, exiting...")
# os._exit(0)
os.kill(os.getpid(), signal.SIGINT)
break
else:
pass
# logger.info(f"Timeout but still in progress")
await asyncio.sleep(1) # Check every second
@asynccontextmanager
async def lifespan(app: FastAPI):
thread = run_in_new_thread(check_inactivity())
yield
logger.info("Cancelling")
#
app = FastAPI(lifespan=lifespan)
# MODAL_ORG = os.environ.get("MODAL_ORG")
@app.get("/")
def read_root():
global last_activity_time
last_activity_time = time.time()
logger.info(f"Extended inactivity time to {global_timeout}")
return {"Hello": "World"}
# create a post route called /create takes in a json of example
# {
# name: "my first image",
# deps: {
# "comfyui": "d0165d819afe76bd4e6bdd710eb5f3e571b6a804",
# "git_custom_nodes": {
# "https://github.com/cubiq/ComfyUI_IPAdapter_plus": {
# "hash": "2ca0c6dd0b2ad64b1c480828638914a564331dcd",
# "disabled": true
# },
# "https://github.com/ltdrdata/ComfyUI-Manager.git": {
# "hash": "9c86f62b912f4625fe2b929c7fc61deb9d16f6d3",
# "disabled": false
# },
# },
# "file_custom_nodes": []
# }
# }
class GitCustomNodes(BaseModel):
hash: str
disabled: bool
class Snapshot(BaseModel):
comfyui: str
git_custom_nodes: Dict[str, GitCustomNodes]
class Item(BaseModel):
machine_id: str
name: str
snapshot: Snapshot
callback_url: str
@app.websocket("/ws/{machine_id}")
async def websocket_endpoint(websocket: WebSocket, machine_id: str):
await websocket.accept()
machine_id_websocket_dict[machine_id] = websocket
try:
while True:
data = await websocket.receive_text()
global last_activity_time
last_activity_time = time.time()
logger.info(f"Extended inactivity time to {global_timeout}")
# You can handle received messages here if needed
except WebSocketDisconnect:
if machine_id in machine_id_websocket_dict:
machine_id_websocket_dict.pop(machine_id)
# @app.get("/test")
# async def test():
# machine_id_status["123"] = True
# global last_activity_time
# last_activity_time = time.time()
# logger.info(f"Extended inactivity time to {global_timeout}")
# await asyncio.sleep(10)
# machine_id_status["123"] = False
# machine_id_status.pop("123")
# return {"Hello": "World"}
@app.post("/create")
async def create_item(item: Item):
global last_activity_time
last_activity_time = time.time()
logger.info(f"Extended inactivity time to {global_timeout}")
if item.machine_id in machine_id_status and machine_id_status[item.machine_id]:
return JSONResponse(status_code=400, content={"error": "Build already in progress."})
# Run the building logic in a separate thread
# future = executor.submit(build_logic, item)
task = asyncio.create_task(build_logic(item))
return JSONResponse(status_code=200, content={"message": "Build Queued"})
async def build_logic(item: Item):
# Deploy to modal
folder_path = f"/app/builds/{item.machine_id}"
machine_id_status[item.machine_id] = True
# Ensure the os path is same as the current directory
# os.chdir(os.path.dirname(os.path.realpath(__file__)))
# print(
# f"builder - Current working directory: {os.getcwd()}"
# )
# Copy the app template
# os.system(f"cp -r template {folder_path}")
cp_process = await asyncio.subprocess.create_subprocess_exec("cp", "-r", "/app/src/template", folder_path)
await cp_process.wait()
# Write the config file
config = {
"name": item.name,
"deploy_test": os.environ.get("DEPLOY_TEST_FLAG", "False")
}
with open(f"{folder_path}/config.py", "w") as f:
f.write("config = " + json.dumps(config))
with open(f"{folder_path}/data/snapshot.json", "w") as f:
f.write(item.snapshot.json())
# os.chdir(folder_path)
# process = subprocess.Popen(f"modal deploy {folder_path}/app.py", stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True)
process = await asyncio.subprocess.create_subprocess_shell(
f"modal deploy app.py",
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=folder_path
)
url = None
# Initialize the logs cache
machine_logs_cache = []
# Stream the output
# Read output
while True:
line = await process.stdout.readline()
error = await process.stderr.readline()
if not line and not error:
break
l = line.decode('utf-8').strip()
e = error.decode('utf-8').strip()
if l != "":
logger.info(l)
machine_logs_cache.append({
"logs": l,
"timestamp": time.time()
})
if item.machine_id in machine_id_websocket_dict:
await machine_id_websocket_dict[item.machine_id].send_text(json.dumps({"event": "LOGS", "data": {
"machine_id": item.machine_id,
"logs": l,
"timestamp": time.time()
}}))
if "Created comfyui_app =>" in l:
url = l.split("=>")[1].strip()
# Some case it only prints the url on a blank line
if (l.startswith("https://") and l.endswith(".modal.run")):
url = l
if url:
# machine_logs_cache.append({
# "logs": f"App image built, url: {url}",
# "timestamp": time.time()
# })
if item.machine_id in machine_id_websocket_dict:
await machine_id_websocket_dict[item.machine_id].send_text(json.dumps({"event": "LOGS", "data": {
"machine_id": item.machine_id,
"logs": f"App image built, url: {url}",
"timestamp": time.time()
}}))
if e != "":
logger.info(e)
machine_logs_cache.append({
"logs": e,
"timestamp": time.time()
})
if item.machine_id in machine_id_websocket_dict:
await machine_id_websocket_dict[item.machine_id].send_text(json.dumps({"event": "LOGS", "data": {
"machine_id": item.machine_id,
"logs": e,
"timestamp": time.time()
}}))
# Wait for the subprocess to finish
await process.wait()
# Close the ws connection and also pop the item
if item.machine_id in machine_id_websocket_dict and machine_id_websocket_dict[item.machine_id] is not None:
await machine_id_websocket_dict[item.machine_id].close()
if item.machine_id in machine_id_websocket_dict:
machine_id_websocket_dict.pop(item.machine_id)
if item.machine_id in machine_id_status:
machine_id_status[item.machine_id] = False
# Check for errors
if process.returncode != 0:
logger.info("An error occurred.")
# Send a post request with the json body machine_id to the callback url
machine_logs_cache.append({
"logs": "Unable to build the app image.",
"timestamp": time.time()
})
requests.post(item.callback_url, json={"machine_id": item.machine_id, "build_log": json.dumps(machine_logs_cache)})
return
# return JSONResponse(status_code=400, content={"error": "Unable to build the app image."})
# app_suffix = "comfyui-app"
if url is None:
machine_logs_cache.append({
"logs": "App image built, but url is None, unable to parse the url.",
"timestamp": time.time()
})
requests.post(item.callback_url, json={"machine_id": item.machine_id, "build_log": json.dumps(machine_logs_cache)})
return
# return JSONResponse(status_code=400, content={"error": "App image built, but url is None, unable to parse the url."})
# example https://bennykok--my-app-comfyui-app.modal.run/
# my_url = f"https://{MODAL_ORG}--{item.container_id}-{app_suffix}.modal.run"
requests.post(item.callback_url, json={"machine_id": item.machine_id, "endpoint": url, "build_log": json.dumps(machine_logs_cache)})
logger.info("done")
logger.info(url)
def start_loop(loop):
asyncio.set_event_loop(loop)
loop.run_forever()
def run_in_new_thread(coroutine):
new_loop = asyncio.new_event_loop()
t = threading.Thread(target=start_loop, args=(new_loop,), daemon=True)
t.start()
asyncio.run_coroutine_threadsafe(coroutine, new_loop)
return t
if __name__ == "__main__":
import uvicorn
# , log_level="debug"
uvicorn.run("main:app", host="0.0.0.0", port=8080, lifespan="on")

View File

@ -0,0 +1 @@
venv

View File

@ -0,0 +1,71 @@
# Use Nvidia CUDA base image
FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04 as base
# Prevents prompts from packages asking for user input during installation
ENV DEBIAN_FRONTEND=noninteractive
# Prefer binary wheels over source distributions for faster pip installations
ENV PIP_PREFER_BINARY=1
# Ensures output from python is printed immediately to the terminal without buffering
ENV PYTHONUNBUFFERED=1
# Install Python, git and other necessary tools
RUN apt-get update && apt-get install -y \
python3.10 \
python3-pip \
git \
wget
RUN ln -s /usr/bin/python3 /usr/bin/python
# # Impact pack deps
# RUN apt-get install -y libgl1-mesa-glx libglib2.0-0
# Clean up to reduce image size
RUN apt-get autoremove -y && apt-get clean -y && rm -rf /var/lib/apt/lists/*
# Clone ComfyUI repository
RUN git clone https://github.com/comfyanonymous/ComfyUI.git /comfyui
# Force comfyui on a specific version
RUN cd /comfyui && git reset --hard b12b48e170ccff156dc6ec11242bb6af7d8437fd
# Change working directory to ComfyUI
WORKDIR /comfyui
# RUN python3 -m venv venv
# RUN /bin/bash -c "source venv/bin/activate"
# Install ComfyUI dependencies
RUN pip3 install --no-cache-dir torch==2.1.1 torchvision==0.16.1 torchaudio==2.1.1 --index-url https://download.pytorch.org/whl/cu121
RUN pip3 install --no-cache-dir xformers==0.0.23 --index-url https://download.pytorch.org/whl/cu121
RUN pip3 install -r requirements.txt
WORKDIR /comfyui/custom_nodes
RUN git clone --depth 1 https://github.com/ltdrdata/ComfyUI-Manager.git
RUN cd ComfyUI-Manager && pip3 install -r requirements.txt
# Copy the snapshot json in place
RUN mkdir ComfyUI-Manager/startup-scripts
COPY /data/snapshot.json ComfyUI-Manager/startup-scripts/restore-snapshot.json
WORKDIR /comfyui
COPY /data/extra_model_paths.yaml .
# ADD src/extra_model_paths.yaml ./
# Go back to the root
WORKDIR /
COPY /data/install_deps.py .
COPY /data/deps.json .
RUN python3 install_deps.py
WORKDIR /comfyui/custom_nodes
RUN git clone https://github.com/BennyKok/comfyui-deploy.git && cd comfyui-deploy && git reset --hard 744a222e2652014e4d09af6b54fc11263b15e2f7
WORKDIR /
COPY /data/start.sh /start.sh
RUN chmod +x /start.sh
ENTRYPOINT ["/start.sh"]

View File

@ -0,0 +1,188 @@
import modal
from modal import Image, Mount, web_endpoint, Stub, asgi_app
import json
import urllib.request
import urllib.parse
from pydantic import BaseModel
from fastapi import FastAPI, Request
from fastapi.responses import HTMLResponse
# deploy_test = False
import os
current_directory = os.path.dirname(os.path.realpath(__file__))
from config import config
deploy_test = config["deploy_test"] == "True"
# MODAL_IMAGE_ID = os.environ.get('MODAL_IMAGE_ID', None)
# print(MODAL_IMAGE_ID)
# config_file_path = current_directory if MODAL_IMAGE_ID is None else ""
# with open(f'{config_file_path}/data/config.json') as f:
# config = json.load(f)
# config["name"]
# print(config)
web_app = FastAPI()
print(config)
print("deploy_test ", deploy_test)
stub = Stub(name=config["name"])
if not deploy_test:
dockerfile_image = Image.from_dockerfile(f"{current_directory}/Dockerfile", context_mount=Mount.from_local_dir(f"{current_directory}/data", remote_path="/data"))
# Time to wait between API check attempts in milliseconds
COMFY_API_AVAILABLE_INTERVAL_MS = 50
# Maximum number of API check attempts
COMFY_API_AVAILABLE_MAX_RETRIES = 500
# Time to wait between poll attempts in milliseconds
COMFY_POLLING_INTERVAL_MS = 250
# Maximum number of poll attempts
COMFY_POLLING_MAX_RETRIES = 500
# Host where ComfyUI is running
COMFY_HOST = "127.0.0.1:8188"
def check_server(url, retries=50, delay=500):
import requests
import time
"""
Check if a server is reachable via HTTP GET request
Args:
- url (str): The URL to check
- retries (int, optional): The number of times to attempt connecting to the server. Default is 50
- delay (int, optional): The time in milliseconds to wait between retries. Default is 500
Returns:
bool: True if the server is reachable within the given number of retries, otherwise False
"""
for i in range(retries):
try:
response = requests.get(url)
# If the response status code is 200, the server is up and running
if response.status_code == 200:
print(f"runpod-worker-comfy - API is reachable")
return True
except requests.RequestException as e:
# If an exception occurs, the server may not be ready
pass
# print(f"runpod-worker-comfy - trying")
# Wait for the specified delay before retrying
time.sleep(delay / 1000)
print(
f"runpod-worker-comfy - Failed to connect to server at {url} after {retries} attempts."
)
return False
def check_status(prompt_id):
req = urllib.request.Request(f"http://{COMFY_HOST}/comfyui-deploy/check-status?prompt_id={prompt_id}")
return json.loads(urllib.request.urlopen(req).read())
class Input(BaseModel):
prompt_id: str
workflow_api: dict
status_endpoint: str
file_upload_endpoint: str
def queue_workflow_comfy_deploy(data: Input):
data_str = data.json()
data_bytes = data_str.encode('utf-8')
req = urllib.request.Request(f"http://{COMFY_HOST}/comfyui-deploy/run", data=data_bytes)
return json.loads(urllib.request.urlopen(req).read())
class RequestInput(BaseModel):
input: Input
image = Image.debian_slim()
target_image = image if deploy_test else dockerfile_image
@stub.function(image=target_image, gpu="T4")
def run(input: Input):
import subprocess
import time
# Make sure that the ComfyUI API is available
print(f"comfy-modal - check server")
command = ["python3", "/comfyui/main.py", "--disable-auto-launch", "--disable-metadata"]
server_process = subprocess.Popen(command)
check_server(
f"http://{COMFY_HOST}",
COMFY_API_AVAILABLE_MAX_RETRIES,
COMFY_API_AVAILABLE_INTERVAL_MS,
)
job_input = input
# print(f"comfy-modal - got input {job_input}")
# Queue the workflow
try:
# job_input is the json input
queued_workflow = queue_workflow_comfy_deploy(job_input) # queue_workflow(workflow)
prompt_id = queued_workflow["prompt_id"]
print(f"comfy-modal - queued workflow with ID {prompt_id}")
except Exception as e:
import traceback
print(traceback.format_exc())
return {"error": f"Error queuing workflow: {str(e)}"}
# Poll for completion
print(f"comfy-modal - wait until image generation is complete")
retries = 0
status = ""
try:
print("getting request")
while retries < COMFY_POLLING_MAX_RETRIES:
status_result = check_status(prompt_id=prompt_id)
# history = get_history(prompt_id)
# Exit the loop if we have found the history
# if prompt_id in history and history[prompt_id].get("outputs"):
# break
# Exit the loop if we have found the status both success or failed
if 'status' in status_result and (status_result['status'] == 'success' or status_result['status'] == 'failed'):
status = status_result['status']
print(status)
break
else:
# Wait before trying again
time.sleep(COMFY_POLLING_INTERVAL_MS / 1000)
retries += 1
else:
return {"error": "Max retries reached while waiting for image generation"}
except Exception as e:
return {"error": f"Error waiting for image generation: {str(e)}"}
print(f"comfy-modal - Finished, turning off")
server_process.terminate()
# Get the generated image and return it as URL in an AWS bucket or as base64
# images_result = process_output_images(history[prompt_id].get("outputs"), job["id"])
# result = {**images_result, "refresh_worker": REFRESH_WORKER}
result = { "status": status }
return result
print("Running remotely on Modal!")
@web_app.post("/run")
async def bar(request_input: RequestInput):
# print(request_input)
if not deploy_test:
return run.remote(request_input.input)
# pass
@stub.function(image=image)
@asgi_app()
def comfyui_app():
return web_app

View File

@ -0,0 +1 @@
config = {"name": "my-app", "deploy_test": "True"}

View File

@ -0,0 +1,3 @@
[
]

View File

@ -0,0 +1,11 @@
comfyui:
base_path: /runpod-volume/ComfyUI/
checkpoints: models/checkpoints/
clip: models/clip/
clip_vision: models/clip_vision/
configs: models/configs/
controlnet: models/controlnet/
embeddings: models/embeddings/
loras: models/loras/
upscale_models: models/upscale_models/
vae: models/vae/

View File

@ -0,0 +1,51 @@
import json
import requests
import time
import subprocess
command = ["python3", "/comfyui/main.py", "--disable-auto-launch", "--disable-metadata", "--cpu"]
# Start the server
server_process = subprocess.Popen(command)
def check_server(url, retries=50, delay=500):
for i in range(retries):
try:
response = requests.head(url)
# If the response status code is 200, the server is up and running
if response.status_code == 200:
print(f"builder - API is reachable")
return True
except requests.RequestException as e:
# If an exception occurs, the server may not be ready
pass
# Wait for the specified delay before retrying
time.sleep(delay / 1000)
print(
f"builder- Failed to connect to server at {url} after {retries} attempts."
)
return False
check_server("http://127.0.0.1:8188")
url = "http://127.0.0.1:8188/customnode/install"
headers = {"Content-Type": "application/json"}
# Load JSON array from deps.json
with open('deps.json') as f:
packages = json.load(f)
# Make a POST request for each package
for package in packages:
response = requests.request("POST", url, json=package, headers=headers)
print(response.text)
# restore_snapshot_url = "http://127.0.0.1:8188/snapshot/restore?target=snapshot"
# response = requests.request("GET", restore_snapshot_url, headers=headers)
# print(response.text)
# Close the server
server_process.terminate()
print("Finished installing dependencies.")

View File

@ -0,0 +1,5 @@
{
"comfyui": "d0165d819afe76bd4e6bdd710eb5f3e571b6a804",
"git_custom_nodes": {},
"file_custom_nodes": []
}

View File

@ -0,0 +1,6 @@
#!/usr/bin/env bash
# Your custom startup commands here.
echo "Starting modal"
exec "$@" # Runs the command passed to the entrypoint script.