Merge pull request #19 from hodanov/feature/refactoring

Refactoring
2023-07-02 23:20:15 +09:00 · 2023-07-02 23:20:15 +09:00 · 0a8060cc85
commit 0a8060cc85
parent a3d3231b94 22d74ceff7
8 changed files with 231 additions and 98 deletions
--- a/.env.example
+++ b/.env.example
@ -1,22 +1,3 @@
+# `HUGGING_FACE_TOKEN` is the token for the Hugging Face API.
+# The token can be found at https://huggingface.co/settings/token.
 HUGGING_FACE_TOKEN=""
-MODEL_REPO_ID="stabilityai/stable-diffusion-2-1"
-MODEL_NAME="stable-diffusion-2-1"
-
-# Modify `USE_VAE` to `true` if you want to use VAE.
-USE_VAE="false"
-
-# Add LoRA if you want to use one. You can use a download link of civitai.
-# ex)
-#   - `LORA_NAMES="hogehoge.safetensors"`
-#   - `LORA_DOWNLOAD_URLS="https://civitai.com/api/download/models/xxxxxx"`
-#
-# If you have multiple LoRAs you want to use, separate by commas like the below:
-# ex)
-#   - `LORA_NAMES="hogehoge.safetensors,mogumogu.safetensors"`
-#   - `LORA_DOWNLOAD_URLS="https://civitai.com/api/download/models/xxxxxx,https://civitai.com/api/download/models/xxxxxx"`
-LORA_NAMES=""
-LORA_DOWNLOAD_URLS=""
-
-# Add Textual Inversion you wan to use. Usage is the same as `LORA_NAMES` and `LORA_DOWNLOAD_URLS`.
-TEXTUAL_INVERSION_NAMES=""
-TEXTUAL_INVERSION_DOWNLOAD_URLS=""
--- a/.gitignore
+++ b/.gitignore
@ -1,5 +1,8 @@
 .DS_Store
-.mypy_cache/
-__pycache__/
-outputs/
 .env
+.mypy_cache/
+.python-version
+__pycache__/
+config.yml
+memo.md
+outputs/
--- a/2
+++ b/2
@ -1,5 +1,5 @@
 deploy:
-	modal deploy setup.py
+	modal deploy ./setup_files/setup.py

 # `--upscaler` is a name of upscaler you want to use.
 # You can use upscalers the below:
--- a/README.md
+++ b/README.md
@ -29,13 +29,33 @@ Please see [the documentation of Modal](https://modal.com/docs/guide) for modals
 To use the script, execute the below.

 1. git clone the repository.
-2. Create the `.env` file and set a huggingface API token and a model with reference to `.env.example`.
-3. Open the Makefile and set prompts.
-4. Execute `make deploy` command. An application will be deployed to Modal by the command.
-5. Execute `make run` command.
+2. Create the `./setup_files/.env` file and set a huggingface API token with reference to `./setup_files/.env.example`.
+3. Copy `./setup_files/config.sample.yml` to `./setup_files/config.yml`
+4. Open the Makefile and set prompts.
+5. Execute `make deploy` command. An application will be deployed to Modal.
+6. Execute `make run` command.

 Images are generated and output to the `outputs/` directory.

+## Directory structure
+
+```
+.
+├── .env                    # Secrets manager
+├── Makefile
+├── README.md
+├── sdcli/                  # A directory with scripts to run inference.
+│   ├── __init__.py
+│   ├── outputs/            # Images are outputted this directory.
+│   ├── txt2img.py          # A script to run txt2img inference.
+│   └── util.py
+└── setup_files/            # A directory with config files.
+    ├── Dockerfile          # To build a base image.
+    ├── config.yml          # To set a model, vae and some tools.
+    ├── requirements.txt
+    └── setup.py            # Build an application to deploy on Modal.
+```
+
 Thank you.

 ## Author
--- a/setup_files/Dockerfile
+++ b/setup_files/Dockerfile
@ -1,7 +1,9 @@
 FROM python:3.11.3-slim-bullseye
-COPY requirements.txt /
+COPY ./requirements.txt /
 RUN apt update \
    && apt install -y wget git libgl1-mesa-glx libglib2.0-0 \
+    && apt autoremove -y \
+    && apt clean -y \
    && pip install -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu117 \
    && mkdir -p /vol/cache/esrgan \
    && wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth -P /vol/cache/esrgan \
--- a/setup_files/config.sample.yml
+++ b/setup_files/config.sample.yml
@ -0,0 +1,38 @@
+##########
+# This is the config file to set a base model, vae and some tools.
+# Rename the file to `config.yml` before running the script.
+# Execute `modal deploy ./setup_files/setup.py` every time modify this file.
+##########
+
+##########
+# You can use a diffusers model and VAE on hugging face.
+model:
+  name: stable-diffusion-2-1
+  repo_id: stabilityai/stable-diffusion-2-1
+vae:
+  name: sd-vae-ft-mse
+  repo_id: stabilityai/sd-vae-ft-mse
+##########
+# Add LoRA if you want to use one. You can use a download url such as the below.
+# ex)
+# loras:
+#   - name: hogehoge.safetensors
+#     download_url: https://hogehoge/xxxx
+#   - name: fugafuga.safetensors
+#     download_url: https://fugafuga/xxxx
+
+##########
+# You can use Textual Inversion and ControlNet also. Usage is the same as `loras`.
+# ex)
+# textual_inversions:
+#   - name: hogehoge
+#     download_url: https://hogehoge/xxxx
+#   - name: fugafuga
+#     download_url: https://fugafuga/xxxx
+# cotrolnets:
+#   - name: control_v11f1e_sd15_tile
+#     repo_id: lllyasviel/control_v11f1e_sd15_tile
+# upscaler:
+#   name: RealESRGAN_x2plus
+#   use_face_enhancer: false
+#   use_hires_fix: false
--- a/setup_files/requirements.txt
+++ b/setup_files/requirements.txt
@ -15,3 +15,6 @@ opencv-python
 Pillow
 torchvision
 tqdm
+
+controlnet_aux
+pyyaml
--- a/setup_files/setup.py
+++ b/setup_files/setup.py
@ -4,52 +4,63 @@ import io
 import os
 from urllib.request import Request, urlopen

+import diffusers
+import yaml
 from modal import Image, Mount, Secret, Stub, method
 from modal.cls import ClsMixin

 BASE_CACHE_PATH = "/vol/cache"
 BASE_CACHE_PATH_LORA = "/vol/cache/lora"
 BASE_CACHE_PATH_TEXTUAL_INVERSION = "/vol/cache/textual_inversion"
+BASE_CACHE_PATH_CONTROLNET = "/vol/cache/controlnet"


-def download_files(urls, file_names, file_path):
+def download_file(url, file_name, file_path):
    """
    Download files.
    """
-    file_names = file_names.split(",")
-    urls = urls.split(",")
-
-    for file_name, url in zip(file_names, urls):
-        req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
-        downloaded = urlopen(req).read()
-
-        dir_names = os.path.join(file_path, file_name)
-        os.makedirs(os.path.dirname(dir_names), exist_ok=True)
-        with open(dir_names, mode="wb") as f:
-            f.write(downloaded)
+    req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
+    downloaded = urlopen(req).read()
+    dir_names = os.path.join(file_path, file_name)
+    os.makedirs(os.path.dirname(dir_names), exist_ok=True)
+    with open(dir_names, mode="wb") as f:
+        f.write(downloaded)


-def download_models():
+def download_controlnet(name: str, repo_id: str, token: str):
    """
-    Downloads the model from Hugging Face and saves it to the cache path using
-    diffusers.StableDiffusionPipeline.from_pretrained().
+    Download a controlnet.
    """
-    import diffusers
+    cache_path = os.path.join(BASE_CACHE_PATH_CONTROLNET, name)
+    controlnet = diffusers.ControlNetModel.from_pretrained(
+        repo_id,
+        use_auth_token=token,
+        cache_dir=cache_path,
+    )
+    controlnet.save_pretrained(cache_path, safe_serialization=True)

-    hugging_face_token = os.environ["HUGGING_FACE_TOKEN"]
-    model_repo_id = os.environ["MODEL_REPO_ID"]
-    cache_path = os.path.join(BASE_CACHE_PATH, os.environ["MODEL_NAME"])

+def download_vae(name: str, repo_id: str, token: str):
+    """
+    Download a vae.
+    """
+    cache_path = os.path.join(BASE_CACHE_PATH, name)
    vae = diffusers.AutoencoderKL.from_pretrained(
-        "stabilityai/sd-vae-ft-mse",
-        use_auth_token=hugging_face_token,
+        repo_id,
+        use_auth_token=token,
        cache_dir=cache_path,
    )
    vae.save_pretrained(cache_path, safe_serialization=True)

+
+def download_model(name: str, repo_id: str, token: str):
+    """
+    Download a model.
+    """
+    cache_path = os.path.join(BASE_CACHE_PATH, name)
    pipe = diffusers.StableDiffusionPipeline.from_pretrained(
-        model_repo_id,
-        use_auth_token=hugging_face_token,
+        repo_id,
+        use_auth_token=token,
        cache_dir=cache_path,
    )
    pipe.save_pretrained(cache_path, safe_serialization=True)
@ -59,52 +70,82 @@ def build_image():
    """
    Build the Docker image.
    """
-    download_models()
+    token = os.environ["HUGGING_FACE_TOKEN"]
+    config = {}
+    with open("/config.yml", "r") as file:
+        config = yaml.safe_load(file)

-    if os.environ["LORA_NAMES"] != "":
-        download_files(
-            os.getenv("LORA_DOWNLOAD_URLS"),
-            os.getenv("LORA_NAMES"),
-            BASE_CACHE_PATH_LORA,
-        )
+    model = config.get("model")
+    if model is not None:
+        download_model(name=model["name"], repo_id=model["repo_id"], token=token)

-    if os.environ["TEXTUAL_INVERSION_NAMES"] != "":
-        download_files(
-            os.getenv("TEXTUAL_INVERSION_DOWNLOAD_URLS"),
-            os.getenv("TEXTUAL_INVERSION_NAMES"),
-            BASE_CACHE_PATH_TEXTUAL_INVERSION,
-        )
+    vae = config.get("vae")
+    if vae is not None:
+        download_vae(name=model["name"], repo_id=vae["repo_id"], token=token)
+
+    controlnets = config.get("controlnets")
+    if controlnets is not None:
+        for controlnet in controlnets:
+            download_controlnet(name=controlnet["name"], repo_id=controlnet["repo_id"], token=token)
+
+    loras = config.get("loras")
+    if loras is not None:
+        for lora in loras:
+            download_file(
+                url=lora["download_url"],
+                file_name=lora["name"],
+                file_path=BASE_CACHE_PATH_LORA,
+            )
+
+    textual_inversions = config.get("textual_inversions")
+    if textual_inversions is not None:
+        for textual_inversion in textual_inversions:
+            download_file(
+                url=textual_inversion["download_url"],
+                file_name=textual_inversion["name"],
+                file_path=BASE_CACHE_PATH_TEXTUAL_INVERSION,
+            )


-stub_image = Image.from_dockerfile(
-    path="./Dockerfile",
-    context_mount=Mount.from_local_file("./requirements.txt"),
+stub = Stub("stable-diffusion-cli")
+base_stub = Image.from_dockerfile(
+    path="./setup_files/Dockerfile",
+    context_mount=Mount.from_local_file("./setup_files/requirements.txt"),
+)
+stub.image = base_stub.extend(
+    dockerfile_commands=[
+        "FROM base",
+        "COPY ./config.yml /",
+    ],
+    context_mount=Mount.from_local_file("./setup_files/config.yml"),
 ).run_function(
    build_image,
    secrets=[Secret.from_dotenv(__file__)],
 )
-stub = Stub("stable-diffusion-cli")
-stub.image = stub_image


-@stub.cls(gpu="A10G", secrets=[Secret.from_dotenv(__file__)])
+@stub.cls(
+    gpu="A10G",
+    secrets=[Secret.from_dotenv(__file__)],
+)
 class StableDiffusion(ClsMixin):
    """
    A class that wraps the Stable Diffusion pipeline and scheduler.
    """

    def __enter__(self):
-        import diffusers
        import torch

-        self.cache_path = os.path.join(BASE_CACHE_PATH, os.environ["MODEL_NAME"])
+        config = {}
+        with open("/config.yml", "r") as file:
+            config = yaml.safe_load(file)
+        self.cache_path = os.path.join(BASE_CACHE_PATH, config["model"]["name"])
        if os.path.exists(self.cache_path):
            print(f"The directory '{self.cache_path}' exists.")
        else:
-            print(f"The directory '{self.cache_path}' does not exist. Download models...")
-            download_models()
+            print(f"The directory '{self.cache_path}' does not exist.")

-        torch.backends.cuda.matmul.allow_tf32 = True
+        torch.cuda.memory._set_allocator_settings("max_split_size_mb:256")

        self.pipe = diffusers.StableDiffusionPipeline.from_pretrained(
            self.cache_path,
@ -119,40 +160,65 @@ class StableDiffusion(ClsMixin):
            subfolder="scheduler",
        )

-        if os.environ["USE_VAE"] == "true":
+        vae = config.get("vae")
+        if vae is not None:
            self.pipe.vae = diffusers.AutoencoderKL.from_pretrained(
                self.cache_path,
                subfolder="vae",
            )
-
        self.pipe.to("cuda")

-        if os.environ["LORA_NAMES"] != "":
-            names = os.environ["LORA_NAMES"].split(",")
-            urls = os.environ["LORA_DOWNLOAD_URLS"].split(",")
-            for name, url in zip(names, urls):
-                path = os.path.join(BASE_CACHE_PATH_LORA, name)
+        loras = config.get("loras")
+        if loras is not None:
+            for lora in loras:
+                path = os.path.join(BASE_CACHE_PATH_LORA, lora["name"])
                if os.path.exists(path):
                    print(f"The directory '{path}' exists.")
                else:
                    print(f"The directory '{path}' does not exist. Download it...")
-                    download_files(url, name, BASE_CACHE_PATH_LORA)
+                    download_file(lora["download_url"], lora["name"], BASE_CACHE_PATH_LORA)
                self.pipe.load_lora_weights(".", weight_name=path)

-        if os.environ["TEXTUAL_INVERSION_NAMES"] != "":
-            names = os.environ["TEXTUAL_INVERSION_NAMES"].split(",")
-            urls = os.environ["TEXTUAL_INVERSION_DOWNLOAD_URLS"].split(",")
-            for name, url in zip(names, urls):
-                path = os.path.join(BASE_CACHE_PATH_TEXTUAL_INVERSION, name)
+        textual_inversions = config.get("textual_inversions")
+        if textual_inversions is not None:
+            for textual_inversion in textual_inversions:
+                path = os.path.join(BASE_CACHE_PATH_TEXTUAL_INVERSION, textual_inversion["name"])
                if os.path.exists(path):
                    print(f"The directory '{path}' exists.")
                else:
                    print(f"The directory '{path}' does not exist. Download it...")
-                    download_files(url, name, BASE_CACHE_PATH_TEXTUAL_INVERSION)
+                    download_file(
+                        textual_inversion["download_url"],
+                        textual_inversion["name"],
+                        BASE_CACHE_PATH_TEXTUAL_INVERSION,
+                    )
                self.pipe.load_textual_inversion(path)

        self.pipe.enable_xformers_memory_efficient_attention()

+        # TODO: Add support for controlnets.
+        # controlnet = diffusers.ControlNetModel.from_pretrained(
+        #     "lllyasviel/control_v11f1e_sd15_tile",
+        #     # "lllyasviel/sd-controlnet-canny",
+        #     # self.cache_path,
+        #     # subfolder="controlnet",
+        #     torch_dtype=torch.float16,
+        # )
+
+        # self.controlnet_pipe = diffusers.StableDiffusionControlNetPipeline.from_pretrained(
+        #     self.cache_path,
+        #     controlnet=controlnet,
+        #     custom_pipeline="lpw_stable_diffusion",
+        #     # custom_pipeline="stable_diffusion_controlnet_img2img",
+        #     scheduler=self.pipe.scheduler,
+        #     vae=self.pipe.vae,
+        #     torch_dtype=torch.float16,
+        # )
+
+        # self.controlnet_pipe.to("cuda")
+
+        # self.controlnet_pipe.enable_xformers_memory_efficient_attention()
+
    @method()
    def count_token(self, p: str, n: str) -> int:
        """
@ -214,6 +280,22 @@ class StableDiffusion(ClsMixin):
                    generator=generator,
                ).images

+        # for image in base_images:
+        #     image = self.resize_image(image=image, scale_factor=2)
+        #     with torch.inference_mode():
+        #         with torch.autocast("cuda"):
+        #             generatedWithControlnet = self.controlnet_pipe(
+        #                 prompt=prompt * batch_size,
+        #                 negative_prompt=n_prompt * batch_size,
+        #                 num_inference_steps=steps,
+        #                 strength=0.3,
+        #                 guidance_scale=7.5,
+        #                 max_embeddings_multiples=max_embeddings_multiples,
+        #                 generator=generator,
+        #                 image=image,
+        #             ).images
+        # base_images.extend(generatedWithControlnet)
+
        if upscaler != "":
            upscaled = self.upscale(
                base_images=base_images,
@ -224,8 +306,8 @@ class StableDiffusion(ClsMixin):
                use_hires_fix=use_hires_fix,
            )
            base_images.extend(upscaled)
+
            if use_hires_fix:
-                torch.cuda.empty_cache()
                for img in upscaled:
                    with torch.inference_mode():
                        with torch.autocast("cuda"):
@ -240,7 +322,6 @@ class StableDiffusion(ClsMixin):
                                image=img,
                            ).images
                    base_images.extend(hires_fixed)
-                torch.cuda.empty_cache()

        image_output = []
        for image in base_images:
@ -250,6 +331,15 @@ class StableDiffusion(ClsMixin):

        return image_output

+    @method()
+    def resize_image(self, image: Image.Image, scale_factor: int) -> Image.Image:
+        from PIL import Image
+
+        image = image.convert("RGB")
+        width, height = image.size
+        img = image.resize((width * scale_factor, height * scale_factor), resample=Image.LANCZOS)
+        return img
+
    @method()
    def upscale(
        self,
@ -263,11 +353,10 @@ class StableDiffusion(ClsMixin):
        use_hires_fix: bool = False,
    ) -> list[Image.Image]:
        """
-        Upscales the given images using the given model.
+        Upscales the given images using a upscaler.
        https://github.com/xinntao/Real-ESRGAN
        """
        import numpy
-        import torch
        from basicsr.archs.rrdbnet_arch import RRDBNet
        from PIL import Image
        from realesrgan import RealESRGANer
@ -312,7 +401,6 @@ class StableDiffusion(ClsMixin):
                bg_upsampler=upsampler,
            )

-        torch.cuda.empty_cache()
        upscaled_imgs = []
        with tqdm(total=len(base_images)) as progress_bar:
            for img in base_images:
@ -330,6 +418,4 @@ class StableDiffusion(ClsMixin):
                upscaled_imgs.append(Image.fromarray(enhance_result))
                progress_bar.update(1)

-        torch.cuda.empty_cache()
-
        return upscaled_imgs