Update some dependencies. Repair some codes.

2024-04-21 12:40:44 +09:00 · 2024-04-21 12:40:44 +09:00 · e1639039d7
commit e1639039d7
parent 4fe518038d
8 changed files with 63 additions and 65 deletions
--- a/app/Dockerfile
+++ b/app/Dockerfile
@ -5,7 +5,7 @@ RUN apt-get update \
    && apt-get autoremove -y \
    && apt-get clean -y \
    && rm -rf /var/lib/apt/lists/* \
-    && pip install -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu117 --no-cache-dir \
+    && pip install -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu121 --no-cache-dir \
    && mkdir -p /vol/cache/esrgan \
    && wget --progress=dot:giga https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth -P /vol/cache/esrgan \
    && wget --progress=dot:giga https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.1/RealESRNet_x4plus.pth -P /vol/cache/esrgan \
--- a/app/main.py
+++ b/app/main.py
@ -2,10 +2,10 @@ from __future__ import annotations

 import stable_diffusion_1_5
 import stable_diffusion_xl
-from setup import stub
+from setup import app


-@stub.function(gpu="A10G")
+@app.function(gpu="A10G")
 def main():
    stable_diffusion_1_5.SD15
    stable_diffusion_xl.SDXLTxt2Img
--- a/app/requirements.txt
+++ b/app/requirements.txt
@ -1,21 +1,21 @@
 invisible_watermark
 accelerate
-diffusers[torch]==0.24.0
-onnxruntime==1.16.3
-safetensors==0.4.1
-torch==2.1.0
-transformers==4.39.1
-xformers==0.0.22.post7
+diffusers[torch]==0.27.2
+onnxruntime==1.17.3
+safetensors==0.4.3
+torch==2.2.2
+transformers==4.39.3
+xformers==0.0.25.post1

 realesrgan==0.3.0
 basicsr>=1.4.2
 facexlib>=0.3.0
 gfpgan>=1.3.8
-scipy==1.12.0
+scipy==1.13.0
 opencv-python
 Pillow
 pillow-avif-plugin
-torchvision
+torchvision==0.17.2
 tqdm

 controlnet_aux
--- a/app/setup.py
+++ b/app/setup.py
@ -3,7 +3,7 @@ from __future__ import annotations
 import os

 import diffusers
-from modal import Image, Mount, Secret, Stub
+from modal import App, Image, Mount, Secret

 BASE_CACHE_PATH = "/vol/cache"
 BASE_CACHE_PATH_LORA = "/vol/cache/lora"
@ -58,7 +58,7 @@ def download_model(name: str, model_url: str, token: str):
    cache_path = os.path.join(BASE_CACHE_PATH, name)
    pipe = diffusers.StableDiffusionPipeline.from_single_file(
        pretrained_model_link_or_path=model_url,
-        use_auth_token=token,
+        token=token,
        cache_dir=cache_path,
    )
    pipe.save_pretrained(cache_path, safe_serialization=True)
@ -131,12 +131,12 @@ def build_image():
            )


-stub = Stub("stable-diffusion-cli")
+app = App("stable-diffusion-cli")
 base_stub = Image.from_dockerfile(
    path="Dockerfile",
    context_mount=Mount.from_local_file("requirements.txt"),
 )
-stub.image = base_stub.dockerfile_commands(
+app.image = base_stub.dockerfile_commands(
    "FROM base",
    "COPY config.yml /",
    context_mount=Mount.from_local_file("config.yml"),
--- a/app/stable_diffusion_1_5.py
+++ b/app/stable_diffusion_1_5.py
@ -10,11 +10,11 @@ from setup import (
    BASE_CACHE_PATH_CONTROLNET,
    BASE_CACHE_PATH_LORA,
    BASE_CACHE_PATH_TEXTUAL_INVERSION,
-    stub,
+    app,
 )


-@stub.cls(
+@app.cls(
    gpu="A10G",
    secrets=[Secret.from_dotenv(__file__)],
 )
@ -187,15 +187,16 @@ class SD15:
            generated_images.extend(fixed_by_controlnet)
            base_images = fixed_by_controlnet

-        if upscaler != "":
-            upscaled = self._upscale(
-                base_images=base_images,
-                half_precision=False,
-                tile=700,
-                upscaler=upscaler,
-                use_face_enhancer=use_face_enhancer,
-            )
-            generated_images.extend(upscaled)
+        # TODO: Upscaler stopped working due to update of dependent packages. Replace with diffusers upscaler.
+        # if upscaler != "":
+        #     upscaled = self._upscale(
+        #         base_images=base_images,
+        #         half_precision=False,
+        #         tile=700,
+        #         upscaler=upscaler,
+        #         use_face_enhancer=use_face_enhancer,
+        #     )
+        #     generated_images.extend(upscaled)

        image_output = []
        for image in generated_images:
--- a/app/stable_diffusion_xl.py
+++ b/app/stable_diffusion_xl.py
@ -5,10 +5,10 @@ import os

 import PIL.Image
 from modal import Secret, enter, method
-from setup import BASE_CACHE_PATH, BASE_CACHE_PATH_CONTROLNET, stub
+from setup import BASE_CACHE_PATH, app


-@stub.cls(
+@app.cls(
    gpu="A10G",
    secrets=[Secret.from_dotenv(__file__)],
 )
@ -39,13 +39,13 @@ class SDXLTxt2Img:
            variant="fp16",
        )

-        # self.refiner_cache_path = self.cache_path + "-refiner"
-        # self.refiner = diffusers.StableDiffusionXLImg2ImgPipeline.from_pretrained(
-        #     self.refiner_cache_path,
-        #     torch_dtype=torch.float16,
-        #     use_safetensors=True,
-        #     variant="fp16",
-        # )
+        self.refiner_cache_path = self.cache_path + "-refiner"
+        self.refiner = diffusers.StableDiffusionXLImg2ImgPipeline.from_pretrained(
+            self.refiner_cache_path,
+            torch_dtype=torch.float16,
+            use_safetensors=True,
+            variant="fp16",
+        )

        # controlnets = config.get("controlnets")
        # if controlnets is not None:
@ -94,12 +94,10 @@ class SDXLTxt2Img:
        n_prompt: str,
        height: int = 1024,
        width: int = 1024,
-        batch_size: int = 1,
        steps: int = 30,
        seed: int = 1,
        upscaler: str = "",
        use_face_enhancer: bool = False,
-        fix_by_controlnet_tile: bool = False,
        output_format: str = "png",
    ) -> list[bytes]:
        """
@ -119,37 +117,33 @@ class SDXLTxt2Img:
        ).images
        base_images = generated_images

-        # for image in base_images:
-        #     image = self._resize_image(image=image, scale_factor=2)
-        #     self.refiner.to("cuda")
-        #     refined_images = self.refiner(
-        #         prompt=prompt,
-        #         negative_prompt=n_prompt,
-        #         num_inference_steps=steps,
-        #         strength=0.1,
-        #         # guidance_scale=7.5,
-        #         generator=generator,
-        #         image=image,
-        #     ).images
-        # generated_images.extend(refined_images)
-        # base_images = refined_images
+        for image in base_images:
+            image = self._resize_image(image=image, scale_factor=2)
+            self.refiner.to("cuda")
+            refined_images = self.refiner(
+                prompt=prompt,
+                negative_prompt=n_prompt,
+                num_inference_steps=steps,
+                strength=0.1,
+                # guidance_scale=7.5,
+                generator=generator,
+                image=image,
+            ).images
+        generated_images.extend(refined_images)
+        base_images = refined_images
+
        """
        Fix the generated images by the control_v11f1e_sd15_tile when `fix_by_controlnet_tile` is `True`.
        https://huggingface.co/lllyasviel/control_v11f1e_sd15_tile
        """
        # if fix_by_controlnet_tile:
        #     max_embeddings_multiples = self._count_token(p=prompt, n=n_prompt)
-        #     print("========================確認用========================")
-        #     print("Step1")
        #     self.controlnet_pipe.to("cuda")
        #     self.controlnet_pipe.enable_vae_tiling()
        #     self.controlnet_pipe.enable_xformers_memory_efficient_attention()
-        #     print("Step2")
        #     for image in base_images:
        #         image = self._resize_image(image=image, scale_factor=2)
-        #         print("Step3")
        #         with torch.autocast("cuda"):
-        #             print("Step4")
        #             fixed_by_controlnet = self.controlnet_pipe(
        #                 prompt=prompt * batch_size,
        #                 negative_prompt=n_prompt * batch_size,
@ -160,7 +154,6 @@ class SDXLTxt2Img:
        #                 generator=generator,
        #                 image=image,
        #             ).images
-        #     print("Step5")
        #     generated_images.extend(fixed_by_controlnet)
        #     base_images = fixed_by_controlnet

--- a/cmd/sd15_txt2img.py
+++ b/cmd/sd15_txt2img.py
@ -3,11 +3,11 @@ import time
 import modal
 import util

-stub = modal.Stub("run-stable-diffusion-cli")
-stub.run_inference = modal.Function.from_name("stable-diffusion-cli", "SD15.run_txt2img_inference")
+app = modal.App("run-stable-diffusion-cli")
+app.run_inference = modal.Function.from_name("stable-diffusion-cli", "SD15.run_txt2img_inference")


-@stub.local_entrypoint()
+@app.local_entrypoint()
 def main(
    prompt: str,
    n_prompt: str,
@ -33,7 +33,7 @@ def main(
        if seed == -1:
            seed_generated = util.generate_seed()
        start_time = time.time()
-        images = stub.run_inference.remote(
+        images = app.run_inference.remote(
            prompt=prompt,
            n_prompt=n_prompt,
            height=height,
--- a/cmd/sdxl_txt2img.py
+++ b/cmd/sdxl_txt2img.py
@ -3,16 +3,18 @@ import time
 import modal
 import util

-stub = modal.Stub("run-stable-diffusion-cli")
-stub.run_inference = modal.Function.from_name("stable-diffusion-cli", "SDXLTxt2Img.run_inference")
+app = modal.Stub("run-stable-diffusion-cli")
+app.run_inference = modal.Function.from_name("stable-diffusion-cli", "SDXLTxt2Img.run_inference")


-@stub.local_entrypoint()
+@app.local_entrypoint()
 def main(
    prompt: str,
+    n_prompt: str,
    height: int = 1024,
    width: int = 1024,
    samples: int = 5,
+    steps: int = 20,
    seed: int = -1,
    upscaler: str = "",
    use_face_enhancer: str = "False",
@ -29,10 +31,12 @@ def main(
        if seed == -1:
            seed_generated = util.generate_seed()
        start_time = time.time()
-        images = stub.run_inference.remote(
+        images = app.run_inference.remote(
            prompt=prompt,
+            n_prompt=n_prompt,
            height=height,
            width=width,
+            steps=steps,
            seed=seed_generated,
            upscaler=upscaler,
            use_face_enhancer=use_face_enhancer == "True",