diff --git a/.env.example b/.env.example deleted file mode 100644 index e14931a..0000000 --- a/.env.example +++ /dev/null @@ -1,22 +0,0 @@ -HUGGING_FACE_TOKEN="" -MODEL_REPO_ID="stabilityai/stable-diffusion-2-1" -MODEL_NAME="stable-diffusion-2-1" - -# Modify `USE_VAE` to `true` if you want to use VAE. -USE_VAE="false" - -# Add LoRA if you want to use one. You can use a download link of civitai. -# ex) -# - `LORA_NAMES="hogehoge.safetensors"` -# - `LORA_DOWNLOAD_URLS="https://civitai.com/api/download/models/xxxxxx"` -# -# If you have multiple LoRAs you want to use, separate by commas like the below: -# ex) -# - `LORA_NAMES="hogehoge.safetensors,mogumogu.safetensors"` -# - `LORA_DOWNLOAD_URLS="https://civitai.com/api/download/models/xxxxxx,https://civitai.com/api/download/models/xxxxxx"` -LORA_NAMES="" -LORA_DOWNLOAD_URLS="" - -# Add Textual Inversion you wan to use. Usage is the same as `LORA_NAMES` and `LORA_DOWNLOAD_URLS`. -TEXTUAL_INVERSION_NAMES="" -TEXTUAL_INVERSION_DOWNLOAD_URLS="" diff --git a/Makefile b/Makefile index 67716ae..400f818 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ deploy: - modal deploy setup.py + modal deploy ./setup_files/setup.py # `--upscaler` is a name of upscaler you want to use. # You can use upscalers the below: diff --git a/setup_files/.env.example b/setup_files/.env.example new file mode 100644 index 0000000..79c037d --- /dev/null +++ b/setup_files/.env.example @@ -0,0 +1,3 @@ +# `HUGGING_FACE_TOKEN` is the token for the Hugging Face API. +# The token can be found at https://huggingface.co/settings/token. +HUGGING_FACE_TOKEN="" diff --git a/Dockerfile b/setup_files/Dockerfile similarity index 91% rename from Dockerfile rename to setup_files/Dockerfile index d69b8c1..00227ed 100644 --- a/Dockerfile +++ b/setup_files/Dockerfile @@ -1,7 +1,9 @@ FROM python:3.11.3-slim-bullseye -COPY requirements.txt / +COPY ./requirements.txt / RUN apt update \ && apt install -y wget git libgl1-mesa-glx libglib2.0-0 \ + && apt autoremove -y \ + && apt clean -y \ && pip install -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu117 \ && mkdir -p /vol/cache/esrgan \ && wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth -P /vol/cache/esrgan \ diff --git a/setup_files/config.sample.yml b/setup_files/config.sample.yml new file mode 100644 index 0000000..2f86f59 --- /dev/null +++ b/setup_files/config.sample.yml @@ -0,0 +1,38 @@ +########## +# This is the config file to set a base model, vae and some tools. +# Rename the file to `config.yml` before running the script. +# Execute `modal deploy ./setup_files/setup.py` every time modify this file. +########## + +########## +# You can use a diffusers model and VAE on hugging face. +model: + name: stable-diffusion-2-1 + repo_id: stabilityai/stable-diffusion-2-1 +vae: + name: sd-vae-ft-mse + repo_id: stabilityai/sd-vae-ft-mse +########## +# Add LoRA if you want to use one. You can use a download url such as the below. +# ex) +# loras: +# - name: hogehoge.safetensors +# download_url: https://hogehoge/xxxx +# - name: fugafuga.safetensors +# download_url: https://fugafuga/xxxx + +########## +# You can use Textual Inversion and ControlNet also. Usage is the same as `loras`. +# ex) +# textual_inversions: +# - name: hogehoge +# download_url: https://hogehoge/xxxx +# - name: fugafuga +# download_url: https://fugafuga/xxxx +# cotrolnets: +# - name: control_v11f1e_sd15_tile +# repo_id: lllyasviel/control_v11f1e_sd15_tile +# upscaler: +# name: RealESRGAN_x2plus +# use_face_enhancer: false +# use_hires_fix: false diff --git a/requirements.txt b/setup_files/requirements.txt similarity index 91% rename from requirements.txt rename to setup_files/requirements.txt index 119f567..6ba20b7 100644 --- a/requirements.txt +++ b/setup_files/requirements.txt @@ -15,3 +15,6 @@ opencv-python Pillow torchvision tqdm + +controlnet_aux +pyyaml diff --git a/setup.py b/setup_files/setup.py similarity index 60% rename from setup.py rename to setup_files/setup.py index 33f905b..32c9b2d 100644 --- a/setup.py +++ b/setup_files/setup.py @@ -4,52 +4,63 @@ import io import os from urllib.request import Request, urlopen +import diffusers +import yaml from modal import Image, Mount, Secret, Stub, method from modal.cls import ClsMixin BASE_CACHE_PATH = "/vol/cache" BASE_CACHE_PATH_LORA = "/vol/cache/lora" BASE_CACHE_PATH_TEXTUAL_INVERSION = "/vol/cache/textual_inversion" +BASE_CACHE_PATH_CONTROLNET = "/vol/cache/controlnet" -def download_files(urls, file_names, file_path): +def download_file(url, file_name, file_path): """ Download files. """ - file_names = file_names.split(",") - urls = urls.split(",") - - for file_name, url in zip(file_names, urls): - req = Request(url, headers={"User-Agent": "Mozilla/5.0"}) - downloaded = urlopen(req).read() - - dir_names = os.path.join(file_path, file_name) - os.makedirs(os.path.dirname(dir_names), exist_ok=True) - with open(dir_names, mode="wb") as f: - f.write(downloaded) + req = Request(url, headers={"User-Agent": "Mozilla/5.0"}) + downloaded = urlopen(req).read() + dir_names = os.path.join(file_path, file_name) + os.makedirs(os.path.dirname(dir_names), exist_ok=True) + with open(dir_names, mode="wb") as f: + f.write(downloaded) -def download_models(): +def download_controlnet(name: str, repo_id: str, token: str): """ - Downloads the model from Hugging Face and saves it to the cache path using - diffusers.StableDiffusionPipeline.from_pretrained(). + Download a controlnet. """ - import diffusers + cache_path = os.path.join(BASE_CACHE_PATH_CONTROLNET, name) + controlnet = diffusers.ControlNetModel.from_pretrained( + repo_id, + use_auth_token=token, + cache_dir=cache_path, + ) + controlnet.save_pretrained(cache_path, safe_serialization=True) - hugging_face_token = os.environ["HUGGING_FACE_TOKEN"] - model_repo_id = os.environ["MODEL_REPO_ID"] - cache_path = os.path.join(BASE_CACHE_PATH, os.environ["MODEL_NAME"]) +def download_vae(name: str, repo_id: str, token: str): + """ + Download a vae. + """ + cache_path = os.path.join(BASE_CACHE_PATH, name) vae = diffusers.AutoencoderKL.from_pretrained( - "stabilityai/sd-vae-ft-mse", - use_auth_token=hugging_face_token, + repo_id, + use_auth_token=token, cache_dir=cache_path, ) vae.save_pretrained(cache_path, safe_serialization=True) + +def download_model(name: str, repo_id: str, token: str): + """ + Download a model. + """ + cache_path = os.path.join(BASE_CACHE_PATH, name) pipe = diffusers.StableDiffusionPipeline.from_pretrained( - model_repo_id, - use_auth_token=hugging_face_token, + repo_id, + use_auth_token=token, cache_dir=cache_path, ) pipe.save_pretrained(cache_path, safe_serialization=True) @@ -59,52 +70,82 @@ def build_image(): """ Build the Docker image. """ - download_models() + token = os.environ["HUGGING_FACE_TOKEN"] + config = {} + with open("/config.yml", "r") as file: + config = yaml.safe_load(file) - if os.environ["LORA_NAMES"] != "": - download_files( - os.getenv("LORA_DOWNLOAD_URLS"), - os.getenv("LORA_NAMES"), - BASE_CACHE_PATH_LORA, - ) + model = config.get("model") + if model is not None: + download_model(name=model["name"], repo_id=model["repo_id"], token=token) - if os.environ["TEXTUAL_INVERSION_NAMES"] != "": - download_files( - os.getenv("TEXTUAL_INVERSION_DOWNLOAD_URLS"), - os.getenv("TEXTUAL_INVERSION_NAMES"), - BASE_CACHE_PATH_TEXTUAL_INVERSION, - ) + vae = config.get("vae") + if vae is not None: + download_vae(name=model["name"], repo_id=vae["repo_id"], token=token) + + controlnets = config.get("controlnets") + if controlnets is not None: + for controlnet in controlnets: + download_controlnet(name=controlnet["name"], repo_id=controlnet["repo_id"], token=token) + + loras = config.get("loras") + if loras is not None: + for lora in loras: + download_file( + url=lora["download_url"], + file_name=lora["name"], + file_path=BASE_CACHE_PATH_LORA, + ) + + textual_inversions = config.get("textual_inversions") + if textual_inversions is not None: + for textual_inversion in textual_inversions: + download_file( + url=textual_inversion["download_url"], + file_name=textual_inversion["name"], + file_path=BASE_CACHE_PATH_TEXTUAL_INVERSION, + ) -stub_image = Image.from_dockerfile( - path="./Dockerfile", - context_mount=Mount.from_local_file("./requirements.txt"), +stub = Stub("stable-diffusion-cli") +base_stub = Image.from_dockerfile( + path="./setup_files/Dockerfile", + context_mount=Mount.from_local_file("./setup_files/requirements.txt"), +) +stub.image = base_stub.extend( + dockerfile_commands=[ + "FROM base", + "COPY ./config.yml /", + ], + context_mount=Mount.from_local_file("./setup_files/config.yml"), ).run_function( build_image, secrets=[Secret.from_dotenv(__file__)], ) -stub = Stub("stable-diffusion-cli") -stub.image = stub_image -@stub.cls(gpu="A10G", secrets=[Secret.from_dotenv(__file__)]) +@stub.cls( + gpu="A10G", + secrets=[Secret.from_dotenv(__file__)], +) class StableDiffusion(ClsMixin): """ A class that wraps the Stable Diffusion pipeline and scheduler. """ def __enter__(self): - import diffusers import torch - self.cache_path = os.path.join(BASE_CACHE_PATH, os.environ["MODEL_NAME"]) + config = {} + with open("/config.yml", "r") as file: + config = yaml.safe_load(file) + self.cache_path = os.path.join(BASE_CACHE_PATH, config["model"]["name"]) if os.path.exists(self.cache_path): print(f"The directory '{self.cache_path}' exists.") else: - print(f"The directory '{self.cache_path}' does not exist. Download models...") - download_models() + print(f"The directory '{self.cache_path}' does not exist.") - torch.backends.cuda.matmul.allow_tf32 = True + torch.cuda.memory._set_allocator_settings("max_split_size_mb:256") self.pipe = diffusers.StableDiffusionPipeline.from_pretrained( self.cache_path, @@ -119,40 +160,65 @@ class StableDiffusion(ClsMixin): subfolder="scheduler", ) - if os.environ["USE_VAE"] == "true": + vae = config.get("vae") + if vae is not None: self.pipe.vae = diffusers.AutoencoderKL.from_pretrained( self.cache_path, subfolder="vae", ) - self.pipe.to("cuda") - if os.environ["LORA_NAMES"] != "": - names = os.environ["LORA_NAMES"].split(",") - urls = os.environ["LORA_DOWNLOAD_URLS"].split(",") - for name, url in zip(names, urls): - path = os.path.join(BASE_CACHE_PATH_LORA, name) + loras = config.get("loras") + if loras is not None: + for lora in loras: + path = os.path.join(BASE_CACHE_PATH_LORA, lora["name"]) if os.path.exists(path): print(f"The directory '{path}' exists.") else: print(f"The directory '{path}' does not exist. Download it...") - download_files(url, name, BASE_CACHE_PATH_LORA) + download_file(lora["download_url"], lora["name"], BASE_CACHE_PATH_LORA) self.pipe.load_lora_weights(".", weight_name=path) - if os.environ["TEXTUAL_INVERSION_NAMES"] != "": - names = os.environ["TEXTUAL_INVERSION_NAMES"].split(",") - urls = os.environ["TEXTUAL_INVERSION_DOWNLOAD_URLS"].split(",") - for name, url in zip(names, urls): - path = os.path.join(BASE_CACHE_PATH_TEXTUAL_INVERSION, name) + textual_inversions = config.get("textual_inversions") + if textual_inversions is not None: + for textual_inversion in textual_inversions: + path = os.path.join(BASE_CACHE_PATH_TEXTUAL_INVERSION, textual_inversion["name"]) if os.path.exists(path): print(f"The directory '{path}' exists.") else: print(f"The directory '{path}' does not exist. Download it...") - download_files(url, name, BASE_CACHE_PATH_TEXTUAL_INVERSION) + download_file( + textual_inversion["download_url"], + textual_inversion["name"], + BASE_CACHE_PATH_TEXTUAL_INVERSION, + ) self.pipe.load_textual_inversion(path) self.pipe.enable_xformers_memory_efficient_attention() + # TODO: Add support for controlnets. + # controlnet = diffusers.ControlNetModel.from_pretrained( + # "lllyasviel/control_v11f1e_sd15_tile", + # # "lllyasviel/sd-controlnet-canny", + # # self.cache_path, + # # subfolder="controlnet", + # torch_dtype=torch.float16, + # ) + + # self.controlnet_pipe = diffusers.StableDiffusionControlNetPipeline.from_pretrained( + # self.cache_path, + # controlnet=controlnet, + # custom_pipeline="lpw_stable_diffusion", + # # custom_pipeline="stable_diffusion_controlnet_img2img", + # scheduler=self.pipe.scheduler, + # vae=self.pipe.vae, + # torch_dtype=torch.float16, + # ) + + # self.controlnet_pipe.to("cuda") + + # self.controlnet_pipe.enable_xformers_memory_efficient_attention() + @method() def count_token(self, p: str, n: str) -> int: """ @@ -214,6 +280,22 @@ class StableDiffusion(ClsMixin): generator=generator, ).images + # for image in base_images: + # image = self.resize_image(image=image, scale_factor=2) + # with torch.inference_mode(): + # with torch.autocast("cuda"): + # generatedWithControlnet = self.controlnet_pipe( + # prompt=prompt * batch_size, + # negative_prompt=n_prompt * batch_size, + # num_inference_steps=steps, + # strength=0.3, + # guidance_scale=7.5, + # max_embeddings_multiples=max_embeddings_multiples, + # generator=generator, + # image=image, + # ).images + # base_images.extend(generatedWithControlnet) + if upscaler != "": upscaled = self.upscale( base_images=base_images, @@ -224,8 +306,8 @@ class StableDiffusion(ClsMixin): use_hires_fix=use_hires_fix, ) base_images.extend(upscaled) + if use_hires_fix: - torch.cuda.empty_cache() for img in upscaled: with torch.inference_mode(): with torch.autocast("cuda"): @@ -240,7 +322,6 @@ class StableDiffusion(ClsMixin): image=img, ).images base_images.extend(hires_fixed) - torch.cuda.empty_cache() image_output = [] for image in base_images: @@ -250,6 +331,15 @@ class StableDiffusion(ClsMixin): return image_output + @method() + def resize_image(self, image: Image.Image, scale_factor: int) -> Image.Image: + from PIL import Image + + image = image.convert("RGB") + width, height = image.size + img = image.resize((width * scale_factor, height * scale_factor), resample=Image.LANCZOS) + return img + @method() def upscale( self, @@ -263,7 +353,7 @@ class StableDiffusion(ClsMixin): use_hires_fix: bool = False, ) -> list[Image.Image]: """ - Upscales the given images using the given model. + Upscales the given images using a upscaler. https://github.com/xinntao/Real-ESRGAN """ import numpy @@ -312,7 +402,6 @@ class StableDiffusion(ClsMixin): bg_upsampler=upsampler, ) - torch.cuda.empty_cache() upscaled_imgs = [] with tqdm(total=len(base_images)) as progress_bar: for img in base_images: @@ -330,6 +419,4 @@ class StableDiffusion(ClsMixin): upscaled_imgs.append(Image.fromarray(enhance_result)) progress_bar.update(1) - torch.cuda.empty_cache() - return upscaled_imgs