Refactoring.

2023-07-02 22:44:32 +09:00 · 2023-07-02 22:44:32 +09:00 · d810577f3b
commit d810577f3b
parent 11e2a6b790
7 changed files with 202 additions and 91 deletions
--- a/.env.example
+++ b/.env.example
@ -1,22 +0,0 @@
 HUGGING_FACE_TOKEN=""
 MODEL_REPO_ID="stabilityai/stable-diffusion-2-1"
 MODEL_NAME="stable-diffusion-2-1"
 # Modify `USE_VAE` to `true` if you want to use VAE.
 USE_VAE="false"
 # Add LoRA if you want to use one. You can use a download link of civitai.
 # ex)
 #   - `LORA_NAMES="hogehoge.safetensors"`
 #   - `LORA_DOWNLOAD_URLS="https://civitai.com/api/download/models/xxxxxx"`
 #
 # If you have multiple LoRAs you want to use, separate by commas like the below:
 # ex)
 #   - `LORA_NAMES="hogehoge.safetensors,mogumogu.safetensors"`
 #   - `LORA_DOWNLOAD_URLS="https://civitai.com/api/download/models/xxxxxx,https://civitai.com/api/download/models/xxxxxx"`
 LORA_NAMES=""
 LORA_DOWNLOAD_URLS=""
 # Add Textual Inversion you wan to use. Usage is the same as `LORA_NAMES` and `LORA_DOWNLOAD_URLS`.
 TEXTUAL_INVERSION_NAMES=""
 TEXTUAL_INVERSION_DOWNLOAD_URLS=""
--- a/2
+++ b/2
@ -1,5 +1,5 @@
 deploy:
-	modal deploy setup.py
+	modal deploy ./setup_files/setup.py
 # `--upscaler` is a name of upscaler you want to use.
 # You can use upscalers the below:
--- a/setup_files/.env.example
+++ b/setup_files/.env.example
@ -0,0 +1,3 @@
 # `HUGGING_FACE_TOKEN` is the token for the Hugging Face API.
 # The token can be found at https://huggingface.co/settings/token.
 HUGGING_FACE_TOKEN=""
--- a/setup_files/Dockerfile
+++ b/setup_files/Dockerfile
@ -1,7 +1,9 @@
 FROM python:3.11.3-slim-bullseye
-COPY requirements.txt /
+COPY ./requirements.txt /
 RUN apt update \
    && apt install -y wget git libgl1-mesa-glx libglib2.0-0 \
    && apt autoremove -y \
    && apt clean -y \
    && pip install -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu117 \
    && mkdir -p /vol/cache/esrgan \
    && wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth -P /vol/cache/esrgan \
--- a/setup_files/config.sample.yml
+++ b/setup_files/config.sample.yml
@ -0,0 +1,38 @@
 ##########
 # This is the config file to set a base model, vae and some tools.
 # Rename the file to `config.yml` before running the script.
 # Execute `modal deploy ./setup_files/setup.py` every time modify this file.
 ##########
 ##########
 # You can use a diffusers model and VAE on hugging face.
 model:
  name: stable-diffusion-2-1
  repo_id: stabilityai/stable-diffusion-2-1
 vae:
  name: sd-vae-ft-mse
  repo_id: stabilityai/sd-vae-ft-mse
 ##########
 # Add LoRA if you want to use one. You can use a download url such as the below.
 # ex)
 # loras:
 #   - name: hogehoge.safetensors
 #     download_url: https://hogehoge/xxxx
 #   - name: fugafuga.safetensors
 #     download_url: https://fugafuga/xxxx
 ##########
 # You can use Textual Inversion and ControlNet also. Usage is the same as `loras`.
 # ex)
 # textual_inversions:
 #   - name: hogehoge
 #     download_url: https://hogehoge/xxxx
 #   - name: fugafuga
 #     download_url: https://fugafuga/xxxx
 # cotrolnets:
 #   - name: control_v11f1e_sd15_tile
 #     repo_id: lllyasviel/control_v11f1e_sd15_tile
 # upscaler:
 #   name: RealESRGAN_x2plus
 #   use_face_enhancer: false
 #   use_hires_fix: false
--- a/setup_files/requirements.txt
+++ b/setup_files/requirements.txt
@ -15,3 +15,6 @@ opencv-python
 Pillow
 torchvision
 tqdm
 controlnet_aux
 pyyaml
--- a/setup_files/setup.py
+++ b/setup_files/setup.py
@ -4,52 +4,63 @@ import io
 import os
 from urllib.request import Request, urlopen
 import diffusers
 import yaml
 from modal import Image, Mount, Secret, Stub, method
 from modal.cls import ClsMixin
 BASE_CACHE_PATH = "/vol/cache"
 BASE_CACHE_PATH_LORA = "/vol/cache/lora"
 BASE_CACHE_PATH_TEXTUAL_INVERSION = "/vol/cache/textual_inversion"
 BASE_CACHE_PATH_CONTROLNET = "/vol/cache/controlnet"
-def download_files(urls, file_names, file_path):
+def download_file(url, file_name, file_path):
    """
    Download files.
    """
-    file_names = file_names.split(",")
+    req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
-    urls = urls.split(",")
+    downloaded = urlopen(req).read()
-
+    dir_names = os.path.join(file_path, file_name)
-    for file_name, url in zip(file_names, urls):
+    os.makedirs(os.path.dirname(dir_names), exist_ok=True)
-        req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
+    with open(dir_names, mode="wb") as f:
-        downloaded = urlopen(req).read()
+        f.write(downloaded)
        dir_names = os.path.join(file_path, file_name)
        os.makedirs(os.path.dirname(dir_names), exist_ok=True)
        with open(dir_names, mode="wb") as f:
            f.write(downloaded)
-def download_models():
+def download_controlnet(name: str, repo_id: str, token: str):
    """
-    Downloads the model from Hugging Face and saves it to the cache path using
+    Download a controlnet.
    diffusers.StableDiffusionPipeline.from_pretrained().
    """
-    import diffusers
+    cache_path = os.path.join(BASE_CACHE_PATH_CONTROLNET, name)
    controlnet = diffusers.ControlNetModel.from_pretrained(
        repo_id,
        use_auth_token=token,
        cache_dir=cache_path,
    )
    controlnet.save_pretrained(cache_path, safe_serialization=True)
    hugging_face_token = os.environ["HUGGING_FACE_TOKEN"]
    model_repo_id = os.environ["MODEL_REPO_ID"]
    cache_path = os.path.join(BASE_CACHE_PATH, os.environ["MODEL_NAME"])
 def download_vae(name: str, repo_id: str, token: str):
    """
    Download a vae.
    """
    cache_path = os.path.join(BASE_CACHE_PATH, name)
    vae = diffusers.AutoencoderKL.from_pretrained(
-        "stabilityai/sd-vae-ft-mse",
+        repo_id,
-        use_auth_token=hugging_face_token,
+        use_auth_token=token,
        cache_dir=cache_path,
    )
    vae.save_pretrained(cache_path, safe_serialization=True)
 def download_model(name: str, repo_id: str, token: str):
    """
    Download a model.
    """
    cache_path = os.path.join(BASE_CACHE_PATH, name)
    pipe = diffusers.StableDiffusionPipeline.from_pretrained(
-        model_repo_id,
+        repo_id,
-        use_auth_token=hugging_face_token,
+        use_auth_token=token,
        cache_dir=cache_path,
    )
    pipe.save_pretrained(cache_path, safe_serialization=True)
@ -59,52 +70,82 @@ def build_image():
    """
    Build the Docker image.
    """
-    download_models()
+    token = os.environ["HUGGING_FACE_TOKEN"]
    config = {}
    with open("/config.yml", "r") as file:
        config = yaml.safe_load(file)
-    if os.environ["LORA_NAMES"] != "":
+    model = config.get("model")
-        download_files(
+    if model is not None:
-            os.getenv("LORA_DOWNLOAD_URLS"),
+        download_model(name=model["name"], repo_id=model["repo_id"], token=token)
            os.getenv("LORA_NAMES"),
            BASE_CACHE_PATH_LORA,
        )
-    if os.environ["TEXTUAL_INVERSION_NAMES"] != "":
+    vae = config.get("vae")
-        download_files(
+    if vae is not None:
-            os.getenv("TEXTUAL_INVERSION_DOWNLOAD_URLS"),
+        download_vae(name=model["name"], repo_id=vae["repo_id"], token=token)
-            os.getenv("TEXTUAL_INVERSION_NAMES"),
+
-            BASE_CACHE_PATH_TEXTUAL_INVERSION,
+    controlnets = config.get("controlnets")
-        )
+    if controlnets is not None:
        for controlnet in controlnets:
            download_controlnet(name=controlnet["name"], repo_id=controlnet["repo_id"], token=token)
    loras = config.get("loras")
    if loras is not None:
        for lora in loras:
            download_file(
                url=lora["download_url"],
                file_name=lora["name"],
                file_path=BASE_CACHE_PATH_LORA,
            )
    textual_inversions = config.get("textual_inversions")
    if textual_inversions is not None:
        for textual_inversion in textual_inversions:
            download_file(
                url=textual_inversion["download_url"],
                file_name=textual_inversion["name"],
                file_path=BASE_CACHE_PATH_TEXTUAL_INVERSION,
            )
-stub_image = Image.from_dockerfile(
+stub = Stub("stable-diffusion-cli")
-    path="./Dockerfile",
+base_stub = Image.from_dockerfile(
-    context_mount=Mount.from_local_file("./requirements.txt"),
+    path="./setup_files/Dockerfile",
    context_mount=Mount.from_local_file("./setup_files/requirements.txt"),
 )
 stub.image = base_stub.extend(
    dockerfile_commands=[
        "FROM base",
        "COPY ./config.yml /",
    ],
    context_mount=Mount.from_local_file("./setup_files/config.yml"),
 ).run_function(
    build_image,
    secrets=[Secret.from_dotenv(__file__)],
 )
 stub = Stub("stable-diffusion-cli")
 stub.image = stub_image
-@stub.cls(gpu="A10G", secrets=[Secret.from_dotenv(__file__)])
+@stub.cls(
    gpu="A10G",
    secrets=[Secret.from_dotenv(__file__)],
 )
 class StableDiffusion(ClsMixin):
    """
    A class that wraps the Stable Diffusion pipeline and scheduler.
    """
    def __enter__(self):
        import diffusers
        import torch
-        self.cache_path = os.path.join(BASE_CACHE_PATH, os.environ["MODEL_NAME"])
+        config = {}
        with open("/config.yml", "r") as file:
            config = yaml.safe_load(file)
        self.cache_path = os.path.join(BASE_CACHE_PATH, config["model"]["name"])
        if os.path.exists(self.cache_path):
            print(f"The directory '{self.cache_path}' exists.")
        else:
-            print(f"The directory '{self.cache_path}' does not exist. Download models...")
+            print(f"The directory '{self.cache_path}' does not exist.")
            download_models()
-        torch.backends.cuda.matmul.allow_tf32 = True
+        torch.cuda.memory._set_allocator_settings("max_split_size_mb:256")
        self.pipe = diffusers.StableDiffusionPipeline.from_pretrained(
            self.cache_path,
@ -119,40 +160,65 @@ class StableDiffusion(ClsMixin):
            subfolder="scheduler",
        )
-        if os.environ["USE_VAE"] == "true":
+        vae = config.get("vae")
        if vae is not None:
            self.pipe.vae = diffusers.AutoencoderKL.from_pretrained(
                self.cache_path,
                subfolder="vae",
            )
        self.pipe.to("cuda")
-        if os.environ["LORA_NAMES"] != "":
+        loras = config.get("loras")
-            names = os.environ["LORA_NAMES"].split(",")
+        if loras is not None:
-            urls = os.environ["LORA_DOWNLOAD_URLS"].split(",")
+            for lora in loras:
-            for name, url in zip(names, urls):
+                path = os.path.join(BASE_CACHE_PATH_LORA, lora["name"])
                path = os.path.join(BASE_CACHE_PATH_LORA, name)
                if os.path.exists(path):
                    print(f"The directory '{path}' exists.")
                else:
                    print(f"The directory '{path}' does not exist. Download it...")
-                    download_files(url, name, BASE_CACHE_PATH_LORA)
+                    download_file(lora["download_url"], lora["name"], BASE_CACHE_PATH_LORA)
                self.pipe.load_lora_weights(".", weight_name=path)
-        if os.environ["TEXTUAL_INVERSION_NAMES"] != "":
+        textual_inversions = config.get("textual_inversions")
-            names = os.environ["TEXTUAL_INVERSION_NAMES"].split(",")
+        if textual_inversions is not None:
-            urls = os.environ["TEXTUAL_INVERSION_DOWNLOAD_URLS"].split(",")
+            for textual_inversion in textual_inversions:
-            for name, url in zip(names, urls):
+                path = os.path.join(BASE_CACHE_PATH_TEXTUAL_INVERSION, textual_inversion["name"])
                path = os.path.join(BASE_CACHE_PATH_TEXTUAL_INVERSION, name)
                if os.path.exists(path):
                    print(f"The directory '{path}' exists.")
                else:
                    print(f"The directory '{path}' does not exist. Download it...")
-                    download_files(url, name, BASE_CACHE_PATH_TEXTUAL_INVERSION)
+                    download_file(
                        textual_inversion["download_url"],
                        textual_inversion["name"],
                        BASE_CACHE_PATH_TEXTUAL_INVERSION,
                    )
                self.pipe.load_textual_inversion(path)
        self.pipe.enable_xformers_memory_efficient_attention()
        # TODO: Add support for controlnets.
        # controlnet = diffusers.ControlNetModel.from_pretrained(
        #     "lllyasviel/control_v11f1e_sd15_tile",
        #     # "lllyasviel/sd-controlnet-canny",
        #     # self.cache_path,
        #     # subfolder="controlnet",
        #     torch_dtype=torch.float16,
        # )
        # self.controlnet_pipe = diffusers.StableDiffusionControlNetPipeline.from_pretrained(
        #     self.cache_path,
        #     controlnet=controlnet,
        #     custom_pipeline="lpw_stable_diffusion",
        #     # custom_pipeline="stable_diffusion_controlnet_img2img",
        #     scheduler=self.pipe.scheduler,
        #     vae=self.pipe.vae,
        #     torch_dtype=torch.float16,
        # )
        # self.controlnet_pipe.to("cuda")
        # self.controlnet_pipe.enable_xformers_memory_efficient_attention()
    @method()
    def count_token(self, p: str, n: str) -> int:
        """
@ -214,6 +280,22 @@ class StableDiffusion(ClsMixin):
                    generator=generator,
                ).images
        # for image in base_images:
        #     image = self.resize_image(image=image, scale_factor=2)
        #     with torch.inference_mode():
        #         with torch.autocast("cuda"):
        #             generatedWithControlnet = self.controlnet_pipe(
        #                 prompt=prompt * batch_size,
        #                 negative_prompt=n_prompt * batch_size,
        #                 num_inference_steps=steps,
        #                 strength=0.3,
        #                 guidance_scale=7.5,
        #                 max_embeddings_multiples=max_embeddings_multiples,
        #                 generator=generator,
        #                 image=image,
        #             ).images
        # base_images.extend(generatedWithControlnet)
        if upscaler != "":
            upscaled = self.upscale(
                base_images=base_images,
@ -224,8 +306,8 @@ class StableDiffusion(ClsMixin):
                use_hires_fix=use_hires_fix,
            )
            base_images.extend(upscaled)
            if use_hires_fix:
                torch.cuda.empty_cache()
                for img in upscaled:
                    with torch.inference_mode():
                        with torch.autocast("cuda"):
@ -240,7 +322,6 @@ class StableDiffusion(ClsMixin):
                                image=img,
                            ).images
                    base_images.extend(hires_fixed)
                torch.cuda.empty_cache()
        image_output = []
        for image in base_images:
@ -250,6 +331,15 @@ class StableDiffusion(ClsMixin):
        return image_output
    @method()
    def resize_image(self, image: Image.Image, scale_factor: int) -> Image.Image:
        from PIL import Image
        image = image.convert("RGB")
        width, height = image.size
        img = image.resize((width * scale_factor, height * scale_factor), resample=Image.LANCZOS)
        return img
    @method()
    def upscale(
        self,
@ -263,7 +353,7 @@ class StableDiffusion(ClsMixin):
        use_hires_fix: bool = False,
    ) -> list[Image.Image]:
        """
-        Upscales the given images using the given model.
+        Upscales the given images using a upscaler.
        https://github.com/xinntao/Real-ESRGAN
        """
        import numpy
@ -312,7 +402,6 @@ class StableDiffusion(ClsMixin):
                bg_upsampler=upsampler,
            )
        torch.cuda.empty_cache()
        upscaled_imgs = []
        with tqdm(total=len(base_images)) as progress_bar:
            for img in base_images:
@ -330,6 +419,4 @@ class StableDiffusion(ClsMixin):
                upscaled_imgs.append(Image.fromarray(enhance_result))
                progress_bar.update(1)
        torch.cuda.empty_cache()
        return upscaled_imgs