From 11e2a6b790b5b855055c483766e1b2ca04b05e99 Mon Sep 17 00:00:00 2001 From: hodanov <1031hoda@gmail.com> Date: Sun, 2 Jul 2023 22:31:46 +0900 Subject: [PATCH 1/6] Update .gitignore --- .gitignore | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 421016c..062f158 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,8 @@ .DS_Store -.mypy_cache/ -__pycache__/ -outputs/ .env +.mypy_cache/ +.python-version +__pycache__/ +config.yml +memo.md +outputs/ From d810577f3b1b367e2efc7a3cec56743f8ecc75ea Mon Sep 17 00:00:00 2001 From: hodanov <1031hoda@gmail.com> Date: Sun, 2 Jul 2023 22:44:32 +0900 Subject: [PATCH 2/6] Refactoring. --- .env.example | 22 -- Makefile | 2 +- setup_files/.env.example | 3 + Dockerfile => setup_files/Dockerfile | 4 +- setup_files/config.sample.yml | 38 +++ .../requirements.txt | 3 + setup.py => setup_files/setup.py | 221 ++++++++++++------ 7 files changed, 202 insertions(+), 91 deletions(-) delete mode 100644 .env.example create mode 100644 setup_files/.env.example rename Dockerfile => setup_files/Dockerfile (91%) create mode 100644 setup_files/config.sample.yml rename requirements.txt => setup_files/requirements.txt (91%) rename setup.py => setup_files/setup.py (60%) diff --git a/.env.example b/.env.example deleted file mode 100644 index e14931a..0000000 --- a/.env.example +++ /dev/null @@ -1,22 +0,0 @@ -HUGGING_FACE_TOKEN="" -MODEL_REPO_ID="stabilityai/stable-diffusion-2-1" -MODEL_NAME="stable-diffusion-2-1" - -# Modify `USE_VAE` to `true` if you want to use VAE. -USE_VAE="false" - -# Add LoRA if you want to use one. You can use a download link of civitai. -# ex) -# - `LORA_NAMES="hogehoge.safetensors"` -# - `LORA_DOWNLOAD_URLS="https://civitai.com/api/download/models/xxxxxx"` -# -# If you have multiple LoRAs you want to use, separate by commas like the below: -# ex) -# - `LORA_NAMES="hogehoge.safetensors,mogumogu.safetensors"` -# - `LORA_DOWNLOAD_URLS="https://civitai.com/api/download/models/xxxxxx,https://civitai.com/api/download/models/xxxxxx"` -LORA_NAMES="" -LORA_DOWNLOAD_URLS="" - -# Add Textual Inversion you wan to use. Usage is the same as `LORA_NAMES` and `LORA_DOWNLOAD_URLS`. -TEXTUAL_INVERSION_NAMES="" -TEXTUAL_INVERSION_DOWNLOAD_URLS="" diff --git a/Makefile b/Makefile index 67716ae..400f818 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ deploy: - modal deploy setup.py + modal deploy ./setup_files/setup.py # `--upscaler` is a name of upscaler you want to use. # You can use upscalers the below: diff --git a/setup_files/.env.example b/setup_files/.env.example new file mode 100644 index 0000000..79c037d --- /dev/null +++ b/setup_files/.env.example @@ -0,0 +1,3 @@ +# `HUGGING_FACE_TOKEN` is the token for the Hugging Face API. +# The token can be found at https://huggingface.co/settings/token. +HUGGING_FACE_TOKEN="" diff --git a/Dockerfile b/setup_files/Dockerfile similarity index 91% rename from Dockerfile rename to setup_files/Dockerfile index d69b8c1..00227ed 100644 --- a/Dockerfile +++ b/setup_files/Dockerfile @@ -1,7 +1,9 @@ FROM python:3.11.3-slim-bullseye -COPY requirements.txt / +COPY ./requirements.txt / RUN apt update \ && apt install -y wget git libgl1-mesa-glx libglib2.0-0 \ + && apt autoremove -y \ + && apt clean -y \ && pip install -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu117 \ && mkdir -p /vol/cache/esrgan \ && wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth -P /vol/cache/esrgan \ diff --git a/setup_files/config.sample.yml b/setup_files/config.sample.yml new file mode 100644 index 0000000..2f86f59 --- /dev/null +++ b/setup_files/config.sample.yml @@ -0,0 +1,38 @@ +########## +# This is the config file to set a base model, vae and some tools. +# Rename the file to `config.yml` before running the script. +# Execute `modal deploy ./setup_files/setup.py` every time modify this file. +########## + +########## +# You can use a diffusers model and VAE on hugging face. +model: + name: stable-diffusion-2-1 + repo_id: stabilityai/stable-diffusion-2-1 +vae: + name: sd-vae-ft-mse + repo_id: stabilityai/sd-vae-ft-mse +########## +# Add LoRA if you want to use one. You can use a download url such as the below. +# ex) +# loras: +# - name: hogehoge.safetensors +# download_url: https://hogehoge/xxxx +# - name: fugafuga.safetensors +# download_url: https://fugafuga/xxxx + +########## +# You can use Textual Inversion and ControlNet also. Usage is the same as `loras`. +# ex) +# textual_inversions: +# - name: hogehoge +# download_url: https://hogehoge/xxxx +# - name: fugafuga +# download_url: https://fugafuga/xxxx +# cotrolnets: +# - name: control_v11f1e_sd15_tile +# repo_id: lllyasviel/control_v11f1e_sd15_tile +# upscaler: +# name: RealESRGAN_x2plus +# use_face_enhancer: false +# use_hires_fix: false diff --git a/requirements.txt b/setup_files/requirements.txt similarity index 91% rename from requirements.txt rename to setup_files/requirements.txt index 119f567..6ba20b7 100644 --- a/requirements.txt +++ b/setup_files/requirements.txt @@ -15,3 +15,6 @@ opencv-python Pillow torchvision tqdm + +controlnet_aux +pyyaml diff --git a/setup.py b/setup_files/setup.py similarity index 60% rename from setup.py rename to setup_files/setup.py index 33f905b..32c9b2d 100644 --- a/setup.py +++ b/setup_files/setup.py @@ -4,52 +4,63 @@ import io import os from urllib.request import Request, urlopen +import diffusers +import yaml from modal import Image, Mount, Secret, Stub, method from modal.cls import ClsMixin BASE_CACHE_PATH = "/vol/cache" BASE_CACHE_PATH_LORA = "/vol/cache/lora" BASE_CACHE_PATH_TEXTUAL_INVERSION = "/vol/cache/textual_inversion" +BASE_CACHE_PATH_CONTROLNET = "/vol/cache/controlnet" -def download_files(urls, file_names, file_path): +def download_file(url, file_name, file_path): """ Download files. """ - file_names = file_names.split(",") - urls = urls.split(",") - - for file_name, url in zip(file_names, urls): - req = Request(url, headers={"User-Agent": "Mozilla/5.0"}) - downloaded = urlopen(req).read() - - dir_names = os.path.join(file_path, file_name) - os.makedirs(os.path.dirname(dir_names), exist_ok=True) - with open(dir_names, mode="wb") as f: - f.write(downloaded) + req = Request(url, headers={"User-Agent": "Mozilla/5.0"}) + downloaded = urlopen(req).read() + dir_names = os.path.join(file_path, file_name) + os.makedirs(os.path.dirname(dir_names), exist_ok=True) + with open(dir_names, mode="wb") as f: + f.write(downloaded) -def download_models(): +def download_controlnet(name: str, repo_id: str, token: str): """ - Downloads the model from Hugging Face and saves it to the cache path using - diffusers.StableDiffusionPipeline.from_pretrained(). + Download a controlnet. """ - import diffusers + cache_path = os.path.join(BASE_CACHE_PATH_CONTROLNET, name) + controlnet = diffusers.ControlNetModel.from_pretrained( + repo_id, + use_auth_token=token, + cache_dir=cache_path, + ) + controlnet.save_pretrained(cache_path, safe_serialization=True) - hugging_face_token = os.environ["HUGGING_FACE_TOKEN"] - model_repo_id = os.environ["MODEL_REPO_ID"] - cache_path = os.path.join(BASE_CACHE_PATH, os.environ["MODEL_NAME"]) +def download_vae(name: str, repo_id: str, token: str): + """ + Download a vae. + """ + cache_path = os.path.join(BASE_CACHE_PATH, name) vae = diffusers.AutoencoderKL.from_pretrained( - "stabilityai/sd-vae-ft-mse", - use_auth_token=hugging_face_token, + repo_id, + use_auth_token=token, cache_dir=cache_path, ) vae.save_pretrained(cache_path, safe_serialization=True) + +def download_model(name: str, repo_id: str, token: str): + """ + Download a model. + """ + cache_path = os.path.join(BASE_CACHE_PATH, name) pipe = diffusers.StableDiffusionPipeline.from_pretrained( - model_repo_id, - use_auth_token=hugging_face_token, + repo_id, + use_auth_token=token, cache_dir=cache_path, ) pipe.save_pretrained(cache_path, safe_serialization=True) @@ -59,52 +70,82 @@ def build_image(): """ Build the Docker image. """ - download_models() + token = os.environ["HUGGING_FACE_TOKEN"] + config = {} + with open("/config.yml", "r") as file: + config = yaml.safe_load(file) - if os.environ["LORA_NAMES"] != "": - download_files( - os.getenv("LORA_DOWNLOAD_URLS"), - os.getenv("LORA_NAMES"), - BASE_CACHE_PATH_LORA, - ) + model = config.get("model") + if model is not None: + download_model(name=model["name"], repo_id=model["repo_id"], token=token) - if os.environ["TEXTUAL_INVERSION_NAMES"] != "": - download_files( - os.getenv("TEXTUAL_INVERSION_DOWNLOAD_URLS"), - os.getenv("TEXTUAL_INVERSION_NAMES"), - BASE_CACHE_PATH_TEXTUAL_INVERSION, - ) + vae = config.get("vae") + if vae is not None: + download_vae(name=model["name"], repo_id=vae["repo_id"], token=token) + + controlnets = config.get("controlnets") + if controlnets is not None: + for controlnet in controlnets: + download_controlnet(name=controlnet["name"], repo_id=controlnet["repo_id"], token=token) + + loras = config.get("loras") + if loras is not None: + for lora in loras: + download_file( + url=lora["download_url"], + file_name=lora["name"], + file_path=BASE_CACHE_PATH_LORA, + ) + + textual_inversions = config.get("textual_inversions") + if textual_inversions is not None: + for textual_inversion in textual_inversions: + download_file( + url=textual_inversion["download_url"], + file_name=textual_inversion["name"], + file_path=BASE_CACHE_PATH_TEXTUAL_INVERSION, + ) -stub_image = Image.from_dockerfile( - path="./Dockerfile", - context_mount=Mount.from_local_file("./requirements.txt"), +stub = Stub("stable-diffusion-cli") +base_stub = Image.from_dockerfile( + path="./setup_files/Dockerfile", + context_mount=Mount.from_local_file("./setup_files/requirements.txt"), +) +stub.image = base_stub.extend( + dockerfile_commands=[ + "FROM base", + "COPY ./config.yml /", + ], + context_mount=Mount.from_local_file("./setup_files/config.yml"), ).run_function( build_image, secrets=[Secret.from_dotenv(__file__)], ) -stub = Stub("stable-diffusion-cli") -stub.image = stub_image -@stub.cls(gpu="A10G", secrets=[Secret.from_dotenv(__file__)]) +@stub.cls( + gpu="A10G", + secrets=[Secret.from_dotenv(__file__)], +) class StableDiffusion(ClsMixin): """ A class that wraps the Stable Diffusion pipeline and scheduler. """ def __enter__(self): - import diffusers import torch - self.cache_path = os.path.join(BASE_CACHE_PATH, os.environ["MODEL_NAME"]) + config = {} + with open("/config.yml", "r") as file: + config = yaml.safe_load(file) + self.cache_path = os.path.join(BASE_CACHE_PATH, config["model"]["name"]) if os.path.exists(self.cache_path): print(f"The directory '{self.cache_path}' exists.") else: - print(f"The directory '{self.cache_path}' does not exist. Download models...") - download_models() + print(f"The directory '{self.cache_path}' does not exist.") - torch.backends.cuda.matmul.allow_tf32 = True + torch.cuda.memory._set_allocator_settings("max_split_size_mb:256") self.pipe = diffusers.StableDiffusionPipeline.from_pretrained( self.cache_path, @@ -119,40 +160,65 @@ class StableDiffusion(ClsMixin): subfolder="scheduler", ) - if os.environ["USE_VAE"] == "true": + vae = config.get("vae") + if vae is not None: self.pipe.vae = diffusers.AutoencoderKL.from_pretrained( self.cache_path, subfolder="vae", ) - self.pipe.to("cuda") - if os.environ["LORA_NAMES"] != "": - names = os.environ["LORA_NAMES"].split(",") - urls = os.environ["LORA_DOWNLOAD_URLS"].split(",") - for name, url in zip(names, urls): - path = os.path.join(BASE_CACHE_PATH_LORA, name) + loras = config.get("loras") + if loras is not None: + for lora in loras: + path = os.path.join(BASE_CACHE_PATH_LORA, lora["name"]) if os.path.exists(path): print(f"The directory '{path}' exists.") else: print(f"The directory '{path}' does not exist. Download it...") - download_files(url, name, BASE_CACHE_PATH_LORA) + download_file(lora["download_url"], lora["name"], BASE_CACHE_PATH_LORA) self.pipe.load_lora_weights(".", weight_name=path) - if os.environ["TEXTUAL_INVERSION_NAMES"] != "": - names = os.environ["TEXTUAL_INVERSION_NAMES"].split(",") - urls = os.environ["TEXTUAL_INVERSION_DOWNLOAD_URLS"].split(",") - for name, url in zip(names, urls): - path = os.path.join(BASE_CACHE_PATH_TEXTUAL_INVERSION, name) + textual_inversions = config.get("textual_inversions") + if textual_inversions is not None: + for textual_inversion in textual_inversions: + path = os.path.join(BASE_CACHE_PATH_TEXTUAL_INVERSION, textual_inversion["name"]) if os.path.exists(path): print(f"The directory '{path}' exists.") else: print(f"The directory '{path}' does not exist. Download it...") - download_files(url, name, BASE_CACHE_PATH_TEXTUAL_INVERSION) + download_file( + textual_inversion["download_url"], + textual_inversion["name"], + BASE_CACHE_PATH_TEXTUAL_INVERSION, + ) self.pipe.load_textual_inversion(path) self.pipe.enable_xformers_memory_efficient_attention() + # TODO: Add support for controlnets. + # controlnet = diffusers.ControlNetModel.from_pretrained( + # "lllyasviel/control_v11f1e_sd15_tile", + # # "lllyasviel/sd-controlnet-canny", + # # self.cache_path, + # # subfolder="controlnet", + # torch_dtype=torch.float16, + # ) + + # self.controlnet_pipe = diffusers.StableDiffusionControlNetPipeline.from_pretrained( + # self.cache_path, + # controlnet=controlnet, + # custom_pipeline="lpw_stable_diffusion", + # # custom_pipeline="stable_diffusion_controlnet_img2img", + # scheduler=self.pipe.scheduler, + # vae=self.pipe.vae, + # torch_dtype=torch.float16, + # ) + + # self.controlnet_pipe.to("cuda") + + # self.controlnet_pipe.enable_xformers_memory_efficient_attention() + @method() def count_token(self, p: str, n: str) -> int: """ @@ -214,6 +280,22 @@ class StableDiffusion(ClsMixin): generator=generator, ).images + # for image in base_images: + # image = self.resize_image(image=image, scale_factor=2) + # with torch.inference_mode(): + # with torch.autocast("cuda"): + # generatedWithControlnet = self.controlnet_pipe( + # prompt=prompt * batch_size, + # negative_prompt=n_prompt * batch_size, + # num_inference_steps=steps, + # strength=0.3, + # guidance_scale=7.5, + # max_embeddings_multiples=max_embeddings_multiples, + # generator=generator, + # image=image, + # ).images + # base_images.extend(generatedWithControlnet) + if upscaler != "": upscaled = self.upscale( base_images=base_images, @@ -224,8 +306,8 @@ class StableDiffusion(ClsMixin): use_hires_fix=use_hires_fix, ) base_images.extend(upscaled) + if use_hires_fix: - torch.cuda.empty_cache() for img in upscaled: with torch.inference_mode(): with torch.autocast("cuda"): @@ -240,7 +322,6 @@ class StableDiffusion(ClsMixin): image=img, ).images base_images.extend(hires_fixed) - torch.cuda.empty_cache() image_output = [] for image in base_images: @@ -250,6 +331,15 @@ class StableDiffusion(ClsMixin): return image_output + @method() + def resize_image(self, image: Image.Image, scale_factor: int) -> Image.Image: + from PIL import Image + + image = image.convert("RGB") + width, height = image.size + img = image.resize((width * scale_factor, height * scale_factor), resample=Image.LANCZOS) + return img + @method() def upscale( self, @@ -263,7 +353,7 @@ class StableDiffusion(ClsMixin): use_hires_fix: bool = False, ) -> list[Image.Image]: """ - Upscales the given images using the given model. + Upscales the given images using a upscaler. https://github.com/xinntao/Real-ESRGAN """ import numpy @@ -312,7 +402,6 @@ class StableDiffusion(ClsMixin): bg_upsampler=upsampler, ) - torch.cuda.empty_cache() upscaled_imgs = [] with tqdm(total=len(base_images)) as progress_bar: for img in base_images: @@ -330,6 +419,4 @@ class StableDiffusion(ClsMixin): upscaled_imgs.append(Image.fromarray(enhance_result)) progress_bar.update(1) - torch.cuda.empty_cache() - return upscaled_imgs From ecd5d580228308a7e2970c5274ba1f057c814c55 Mon Sep 17 00:00:00 2001 From: hodanov <1031hoda@gmail.com> Date: Sun, 2 Jul 2023 22:50:50 +0900 Subject: [PATCH 3/6] Repair a pycodestyle error. --- setup_files/setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup_files/setup.py b/setup_files/setup.py index 32c9b2d..2c1fde2 100644 --- a/setup_files/setup.py +++ b/setup_files/setup.py @@ -357,7 +357,6 @@ class StableDiffusion(ClsMixin): https://github.com/xinntao/Real-ESRGAN """ import numpy - import torch from basicsr.archs.rrdbnet_arch import RRDBNet from PIL import Image from realesrgan import RealESRGANer From 762f28626d4850d10583eda2c32c6c5ed0b21fb1 Mon Sep 17 00:00:00 2001 From: hodanov <1031hoda@gmail.com> Date: Sun, 2 Jul 2023 23:08:59 +0900 Subject: [PATCH 4/6] Modify README.md --- README.md | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index c0706c8..1f8e754 100644 --- a/README.md +++ b/README.md @@ -29,13 +29,33 @@ Please see [the documentation of Modal](https://modal.com/docs/guide) for modals To use the script, execute the below. 1. git clone the repository. -2. Create the `.env` file and set a huggingface API token and a model with reference to `.env.example`. -3. Open the Makefile and set prompts. -4. Execute `make deploy` command. An application will be deployed to Modal by the command. -5. Execute `make run` command. +2. Create the `./setup_files/.env` file and set a huggingface API token with reference to `./setup_files/.env.example`. +3. Copy `./setup_files/config.sample.yml` to `./setup_files/config.yml` +4. Open the Makefile and set prompts. +5. Execute `make deploy` command. An application will be deployed to Modal. +6. Execute `make run` command. Images are generated and output to the `outputs/` directory. +## Directory structure + +``` +. +├── Makefile +├── README.md +├── sdcli/ # A directory with scripts to run inference. +│   ├── __init__.py +│   ├── outputs/ # Images are outputted this directory. +│   ├── txt2img.py # A script to run txt2img inference. +│   └── util.py +└── setup_files/ # A directory with config files. + ├── .env # Secrets manager + ├── Dockerfile # To build a base image. + ├── config.yml # To set a model, vae and some tools. + ├── requirements.txt + └── setup.py # Build an application to deploy on Modal. +``` + Thank you. ## Author From 7f5a0c863443463438dc8b6b007d8389587dedf1 Mon Sep 17 00:00:00 2001 From: hodanov <1031hoda@gmail.com> Date: Sun, 2 Jul 2023 23:11:28 +0900 Subject: [PATCH 5/6] Move .env.example --- setup_files/.env.example => .env.example | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename setup_files/.env.example => .env.example (100%) diff --git a/setup_files/.env.example b/.env.example similarity index 100% rename from setup_files/.env.example rename to .env.example From 22d74ceff714de7d086cf7018200ecfc376b938b Mon Sep 17 00:00:00 2001 From: hodanov <1031hoda@gmail.com> Date: Sun, 2 Jul 2023 23:16:52 +0900 Subject: [PATCH 6/6] Modify README.md --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 1f8e754..710fb9c 100644 --- a/README.md +++ b/README.md @@ -41,19 +41,19 @@ Images are generated and output to the `outputs/` directory. ``` . +├── .env # Secrets manager ├── Makefile ├── README.md ├── sdcli/ # A directory with scripts to run inference. │   ├── __init__.py -│   ├── outputs/ # Images are outputted this directory. -│   ├── txt2img.py # A script to run txt2img inference. +│   ├── outputs/ # Images are outputted this directory. +│   ├── txt2img.py # A script to run txt2img inference. │   └── util.py └── setup_files/ # A directory with config files. - ├── .env # Secrets manager - ├── Dockerfile # To build a base image. - ├── config.yml # To set a model, vae and some tools. + ├── Dockerfile # To build a base image. + ├── config.yml # To set a model, vae and some tools. ├── requirements.txt - └── setup.py # Build an application to deploy on Modal. + └── setup.py # Build an application to deploy on Modal. ``` Thank you.