diff --git a/app/Dockerfile b/app/Dockerfile index 7657b5e..3b79c54 100644 --- a/app/Dockerfile +++ b/app/Dockerfile @@ -5,7 +5,7 @@ RUN apt-get update \ && apt-get autoremove -y \ && apt-get clean -y \ && rm -rf /var/lib/apt/lists/* \ - && pip install -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu117 --no-cache-dir \ + && pip install -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu121 --no-cache-dir \ && mkdir -p /vol/cache/esrgan \ && wget --progress=dot:giga https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth -P /vol/cache/esrgan \ && wget --progress=dot:giga https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.1/RealESRNet_x4plus.pth -P /vol/cache/esrgan \ diff --git a/app/__main__.py b/app/__main__.py index 48130f2..77c07c0 100644 --- a/app/__main__.py +++ b/app/__main__.py @@ -2,10 +2,10 @@ from __future__ import annotations import stable_diffusion_1_5 import stable_diffusion_xl -from setup import stub +from setup import app -@stub.function(gpu="A10G") +@app.function(gpu="A10G") def main(): stable_diffusion_1_5.SD15 stable_diffusion_xl.SDXLTxt2Img diff --git a/app/requirements.txt b/app/requirements.txt index 7c774a5..2f53bcf 100644 --- a/app/requirements.txt +++ b/app/requirements.txt @@ -1,21 +1,21 @@ invisible_watermark accelerate -diffusers[torch]==0.24.0 -onnxruntime==1.16.3 -safetensors==0.4.1 -torch==2.1.0 -transformers==4.39.1 -xformers==0.0.22.post7 +diffusers[torch]==0.27.2 +onnxruntime==1.17.3 +safetensors==0.4.3 +torch==2.2.2 +transformers==4.39.3 +xformers==0.0.25.post1 realesrgan==0.3.0 basicsr>=1.4.2 facexlib>=0.3.0 gfpgan>=1.3.8 -scipy==1.12.0 +scipy==1.13.0 opencv-python Pillow pillow-avif-plugin -torchvision +torchvision==0.17.2 tqdm controlnet_aux diff --git a/app/setup.py b/app/setup.py index 8212b39..fd927f5 100644 --- a/app/setup.py +++ b/app/setup.py @@ -3,7 +3,7 @@ from __future__ import annotations import os import diffusers -from modal import Image, Mount, Secret, Stub +from modal import App, Image, Mount, Secret BASE_CACHE_PATH = "/vol/cache" BASE_CACHE_PATH_LORA = "/vol/cache/lora" @@ -58,7 +58,7 @@ def download_model(name: str, model_url: str, token: str): cache_path = os.path.join(BASE_CACHE_PATH, name) pipe = diffusers.StableDiffusionPipeline.from_single_file( pretrained_model_link_or_path=model_url, - use_auth_token=token, + token=token, cache_dir=cache_path, ) pipe.save_pretrained(cache_path, safe_serialization=True) @@ -131,12 +131,12 @@ def build_image(): ) -stub = Stub("stable-diffusion-cli") +app = App("stable-diffusion-cli") base_stub = Image.from_dockerfile( path="Dockerfile", context_mount=Mount.from_local_file("requirements.txt"), ) -stub.image = base_stub.dockerfile_commands( +app.image = base_stub.dockerfile_commands( "FROM base", "COPY config.yml /", context_mount=Mount.from_local_file("config.yml"), diff --git a/app/stable_diffusion_1_5.py b/app/stable_diffusion_1_5.py index 6c61f1d..b306714 100644 --- a/app/stable_diffusion_1_5.py +++ b/app/stable_diffusion_1_5.py @@ -10,11 +10,11 @@ from setup import ( BASE_CACHE_PATH_CONTROLNET, BASE_CACHE_PATH_LORA, BASE_CACHE_PATH_TEXTUAL_INVERSION, - stub, + app, ) -@stub.cls( +@app.cls( gpu="A10G", secrets=[Secret.from_dotenv(__file__)], ) @@ -187,15 +187,16 @@ class SD15: generated_images.extend(fixed_by_controlnet) base_images = fixed_by_controlnet - if upscaler != "": - upscaled = self._upscale( - base_images=base_images, - half_precision=False, - tile=700, - upscaler=upscaler, - use_face_enhancer=use_face_enhancer, - ) - generated_images.extend(upscaled) + # TODO: Upscaler stopped working due to update of dependent packages. Replace with diffusers upscaler. + # if upscaler != "": + # upscaled = self._upscale( + # base_images=base_images, + # half_precision=False, + # tile=700, + # upscaler=upscaler, + # use_face_enhancer=use_face_enhancer, + # ) + # generated_images.extend(upscaled) image_output = [] for image in generated_images: diff --git a/app/stable_diffusion_xl.py b/app/stable_diffusion_xl.py index 3cc5afb..23c3b49 100644 --- a/app/stable_diffusion_xl.py +++ b/app/stable_diffusion_xl.py @@ -5,10 +5,10 @@ import os import PIL.Image from modal import Secret, enter, method -from setup import BASE_CACHE_PATH, BASE_CACHE_PATH_CONTROLNET, stub +from setup import BASE_CACHE_PATH, app -@stub.cls( +@app.cls( gpu="A10G", secrets=[Secret.from_dotenv(__file__)], ) @@ -39,13 +39,13 @@ class SDXLTxt2Img: variant="fp16", ) - # self.refiner_cache_path = self.cache_path + "-refiner" - # self.refiner = diffusers.StableDiffusionXLImg2ImgPipeline.from_pretrained( - # self.refiner_cache_path, - # torch_dtype=torch.float16, - # use_safetensors=True, - # variant="fp16", - # ) + self.refiner_cache_path = self.cache_path + "-refiner" + self.refiner = diffusers.StableDiffusionXLImg2ImgPipeline.from_pretrained( + self.refiner_cache_path, + torch_dtype=torch.float16, + use_safetensors=True, + variant="fp16", + ) # controlnets = config.get("controlnets") # if controlnets is not None: @@ -94,12 +94,10 @@ class SDXLTxt2Img: n_prompt: str, height: int = 1024, width: int = 1024, - batch_size: int = 1, steps: int = 30, seed: int = 1, upscaler: str = "", use_face_enhancer: bool = False, - fix_by_controlnet_tile: bool = False, output_format: str = "png", ) -> list[bytes]: """ @@ -119,37 +117,33 @@ class SDXLTxt2Img: ).images base_images = generated_images - # for image in base_images: - # image = self._resize_image(image=image, scale_factor=2) - # self.refiner.to("cuda") - # refined_images = self.refiner( - # prompt=prompt, - # negative_prompt=n_prompt, - # num_inference_steps=steps, - # strength=0.1, - # # guidance_scale=7.5, - # generator=generator, - # image=image, - # ).images - # generated_images.extend(refined_images) - # base_images = refined_images + for image in base_images: + image = self._resize_image(image=image, scale_factor=2) + self.refiner.to("cuda") + refined_images = self.refiner( + prompt=prompt, + negative_prompt=n_prompt, + num_inference_steps=steps, + strength=0.1, + # guidance_scale=7.5, + generator=generator, + image=image, + ).images + generated_images.extend(refined_images) + base_images = refined_images + """ Fix the generated images by the control_v11f1e_sd15_tile when `fix_by_controlnet_tile` is `True`. https://huggingface.co/lllyasviel/control_v11f1e_sd15_tile """ # if fix_by_controlnet_tile: # max_embeddings_multiples = self._count_token(p=prompt, n=n_prompt) - # print("========================確認用========================") - # print("Step1") # self.controlnet_pipe.to("cuda") # self.controlnet_pipe.enable_vae_tiling() # self.controlnet_pipe.enable_xformers_memory_efficient_attention() - # print("Step2") # for image in base_images: # image = self._resize_image(image=image, scale_factor=2) - # print("Step3") # with torch.autocast("cuda"): - # print("Step4") # fixed_by_controlnet = self.controlnet_pipe( # prompt=prompt * batch_size, # negative_prompt=n_prompt * batch_size, @@ -160,7 +154,6 @@ class SDXLTxt2Img: # generator=generator, # image=image, # ).images - # print("Step5") # generated_images.extend(fixed_by_controlnet) # base_images = fixed_by_controlnet diff --git a/cmd/sd15_txt2img.py b/cmd/sd15_txt2img.py index 64131f4..821bdea 100644 --- a/cmd/sd15_txt2img.py +++ b/cmd/sd15_txt2img.py @@ -3,11 +3,11 @@ import time import modal import util -stub = modal.Stub("run-stable-diffusion-cli") -stub.run_inference = modal.Function.from_name("stable-diffusion-cli", "SD15.run_txt2img_inference") +app = modal.App("run-stable-diffusion-cli") +app.run_inference = modal.Function.from_name("stable-diffusion-cli", "SD15.run_txt2img_inference") -@stub.local_entrypoint() +@app.local_entrypoint() def main( prompt: str, n_prompt: str, @@ -33,7 +33,7 @@ def main( if seed == -1: seed_generated = util.generate_seed() start_time = time.time() - images = stub.run_inference.remote( + images = app.run_inference.remote( prompt=prompt, n_prompt=n_prompt, height=height, diff --git a/cmd/sdxl_txt2img.py b/cmd/sdxl_txt2img.py index 0b018a9..7a9f5f5 100644 --- a/cmd/sdxl_txt2img.py +++ b/cmd/sdxl_txt2img.py @@ -3,16 +3,18 @@ import time import modal import util -stub = modal.Stub("run-stable-diffusion-cli") -stub.run_inference = modal.Function.from_name("stable-diffusion-cli", "SDXLTxt2Img.run_inference") +app = modal.Stub("run-stable-diffusion-cli") +app.run_inference = modal.Function.from_name("stable-diffusion-cli", "SDXLTxt2Img.run_inference") -@stub.local_entrypoint() +@app.local_entrypoint() def main( prompt: str, + n_prompt: str, height: int = 1024, width: int = 1024, samples: int = 5, + steps: int = 20, seed: int = -1, upscaler: str = "", use_face_enhancer: str = "False", @@ -29,10 +31,12 @@ def main( if seed == -1: seed_generated = util.generate_seed() start_time = time.time() - images = stub.run_inference.remote( + images = app.run_inference.remote( prompt=prompt, + n_prompt=n_prompt, height=height, width=width, + steps=steps, seed=seed_generated, upscaler=upscaler, use_face_enhancer=use_face_enhancer == "True",