From 6b522e20eb3acf2201e414a52189bd135ed127bc Mon Sep 17 00:00:00 2001 From: hodanov <1031hoda@gmail.com> Date: Sun, 10 Dec 2023 16:46:03 +0900 Subject: [PATCH 1/2] Implement SDXLTxt2Img. --- Makefile | 19 ++- sdcli/{txt2img.py => sd15_txt2img.py} | 2 +- sdcli/sdxl_txt2img.py | 51 ++++++++ setup_files/__main__.py | 6 +- setup_files/setup.py | 26 +++- setup_files/stable_diffusion_1_5.py | 2 +- setup_files/stable_diffusion_xl.py | 180 ++++++++++++++++++++++++++ 7 files changed, 277 insertions(+), 9 deletions(-) rename sdcli/{txt2img.py => sd15_txt2img.py} (98%) create mode 100644 sdcli/sdxl_txt2img.py create mode 100644 setup_files/stable_diffusion_xl.py diff --git a/Makefile b/Makefile index f822479..e2e07fc 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -deploy: +app: cd ./setup_files && modal deploy __main__.py # `--upscaler` is a name of upscaler you want to use. @@ -7,8 +7,8 @@ deploy: # - `RealESRNet_x4plus` # - `RealESRGAN_x4plus_anime_6B` # - `RealESRGAN_x2plus` -run: - cd ./sdcli && modal run txt2img.py \ +img_by_sd15_txt2img: + cd ./sdcli && modal run sd15_txt2img.py \ --prompt "a photograph of an astronaut riding a horse" \ --n-prompt "" \ --height 512 \ @@ -17,4 +17,15 @@ run: --steps 30 \ --upscaler "RealESRGAN_x2plus" \ --use-face-enhancer "False" \ - --fix-by-controlnet-tile "True" + --fix-by-controlnet-tile "True" \ + --output-format "avif" + + +img_by_sdxl_txt2img: + cd ./sdcli && modal run sdxl_txt2img.py \ + --prompt "A dog is running on the grass" \ + --height 1024 \ + --width 1024 \ + --samples 1 \ + --upscaler "RealESRGAN_x2plus" \ + --output-format "avif" \ No newline at end of file diff --git a/sdcli/txt2img.py b/sdcli/sd15_txt2img.py similarity index 98% rename from sdcli/txt2img.py rename to sdcli/sd15_txt2img.py index 430c403..26d876f 100644 --- a/sdcli/txt2img.py +++ b/sdcli/sd15_txt2img.py @@ -4,7 +4,7 @@ import modal import util stub = modal.Stub("run-stable-diffusion-cli") -stub.run_inference = modal.Function.from_name("stable-diffusion-cli", "Txt2Img.run_inference") +stub.run_inference = modal.Function.from_name("stable-diffusion-cli", "SD15Txt2Img.run_inference") @stub.local_entrypoint() diff --git a/sdcli/sdxl_txt2img.py b/sdcli/sdxl_txt2img.py new file mode 100644 index 0000000..0b018a9 --- /dev/null +++ b/sdcli/sdxl_txt2img.py @@ -0,0 +1,51 @@ +import time + +import modal +import util + +stub = modal.Stub("run-stable-diffusion-cli") +stub.run_inference = modal.Function.from_name("stable-diffusion-cli", "SDXLTxt2Img.run_inference") + + +@stub.local_entrypoint() +def main( + prompt: str, + height: int = 1024, + width: int = 1024, + samples: int = 5, + seed: int = -1, + upscaler: str = "", + use_face_enhancer: str = "False", + output_format: str = "png", +): + """ + This function is the entrypoint for the Runway CLI. + The function pass the given prompt to StableDiffusion on Modal, + gets back a list of images and outputs images to local. + """ + directory = util.make_directory() + seed_generated = seed + for i in range(samples): + if seed == -1: + seed_generated = util.generate_seed() + start_time = time.time() + images = stub.run_inference.remote( + prompt=prompt, + height=height, + width=width, + seed=seed_generated, + upscaler=upscaler, + use_face_enhancer=use_face_enhancer == "True", + output_format=output_format, + ) + util.save_images(directory, images, seed_generated, i, output_format) + total_time = time.time() - start_time + print(f"Sample {i} took {total_time:.3f}s ({(total_time)/len(images):.3f}s / image).") + + prompts: dict[str, int | str] = { + "prompt": prompt, + "height": height, + "width": width, + "samples": samples, + } + util.save_prompts(prompts) diff --git a/setup_files/__main__.py b/setup_files/__main__.py index 5fe3ea8..2d88a7b 100644 --- a/setup_files/__main__.py +++ b/setup_files/__main__.py @@ -1,12 +1,14 @@ from __future__ import annotations +import stable_diffusion_1_5 +import stable_diffusion_xl from setup import stub -from stable_diffusion_1_5 import Txt2Img @stub.function(gpu="A10G") def main(): - Txt2Img + stable_diffusion_1_5.SD15Txt2Img + stable_diffusion_xl.SDXLTxt2Img if __name__ == "__main__": diff --git a/setup_files/setup.py b/setup_files/setup.py index 726bf01..791933c 100644 --- a/setup_files/setup.py +++ b/setup_files/setup.py @@ -64,6 +64,26 @@ def download_model(name: str, model_url: str, token: str): pipe.save_pretrained(cache_path, safe_serialization=True) +def download_model_sdxl(name: str, model_url: str, token: str): + """ + Download a sdxl model. + """ + cache_path = os.path.join(BASE_CACHE_PATH, name) + pipe = diffusers.StableDiffusionXLPipeline.from_single_file( + pretrained_model_link_or_path=model_url, + use_auth_token=token, + cache_dir=cache_path, + ) + pipe.save_pretrained(cache_path, safe_serialization=True) + + refiner_cache_path = cache_path + "-refiner" + refiner = diffusers.StableDiffusionXLImg2ImgPipeline.from_single_file( + "https://huggingface.co/stabilityai/stable-diffusion-xl-refiner-1.0/blob/main/sd_xl_refiner_1.0.safetensors", + cache_dir=refiner_cache_path, + ) + refiner.save_pretrained(refiner_cache_path, safe_serialization=True) + + def build_image(): """ Build the Docker image. @@ -76,8 +96,12 @@ def build_image(): config = yaml.safe_load(file) model = config.get("model") + use_xl = config.get("use_xl") if model is not None: - download_model(name=model["name"], model_url=model["url"], token=token) + if use_xl is not None and use_xl: + download_model_sdxl(name=model["name"], model_url=model["url"], token=token) + else: + download_model(name=model["name"], model_url=model["url"], token=token) vae = config.get("vae") if vae is not None: diff --git a/setup_files/stable_diffusion_1_5.py b/setup_files/stable_diffusion_1_5.py index 645930c..8b55529 100644 --- a/setup_files/stable_diffusion_1_5.py +++ b/setup_files/stable_diffusion_1_5.py @@ -18,7 +18,7 @@ from setup import ( gpu="A10G", secrets=[Secret.from_dotenv(__file__)], ) -class Txt2Img: +class SD15Txt2Img: """ A class that wraps the Stable Diffusion pipeline and scheduler. """ diff --git a/setup_files/stable_diffusion_xl.py b/setup_files/stable_diffusion_xl.py new file mode 100644 index 0000000..c4012fa --- /dev/null +++ b/setup_files/stable_diffusion_xl.py @@ -0,0 +1,180 @@ +from __future__ import annotations + +import io +import os + +import PIL.Image +from modal import Secret, method +from setup import BASE_CACHE_PATH, stub + + +@stub.cls( + gpu="A10G", + secrets=[Secret.from_dotenv(__file__)], +) +class SDXLTxt2Img: + """ + A class that wraps the Stable Diffusion pipeline and scheduler. + """ + + def __enter__(self): + import diffusers + import torch + import yaml + + config = {} + with open("/config.yml", "r") as file: + config = yaml.safe_load(file) + self.cache_path = os.path.join(BASE_CACHE_PATH, config["model"]["name"]) + if os.path.exists(self.cache_path): + print(f"The directory '{self.cache_path}' exists.") + else: + print(f"The directory '{self.cache_path}' does not exist.") + + self.pipe = diffusers.AutoPipelineForText2Image.from_pretrained( + self.cache_path, + torch_dtype=torch.float16, + use_safetensors=True, + variant="fp16", + ) + + self.refiner_cache_path = self.cache_path + "-refiner" + self.refiner = diffusers.StableDiffusionXLImg2ImgPipeline.from_pretrained( + self.refiner_cache_path, + torch_dtype=torch.float16, + use_safetensors=True, + variant="fp16", + ) + + @method() + def run_inference( + self, + prompt: str, + height: int = 1024, + width: int = 1024, + seed: int = 1, + upscaler: str = "", + use_face_enhancer: bool = False, + output_format: str = "png", + ) -> list[bytes]: + """ + Runs the Stable Diffusion pipeline on the given prompt and outputs images. + """ + import pillow_avif # noqa + import torch + + generator = torch.Generator("cuda").manual_seed(seed) + self.pipe.to("cuda") + generated_images = self.pipe( + prompt=prompt, + height=height, + width=width, + generator=generator, + ).images + base_images = generated_images + + for image in base_images: + self.refiner.to("cuda") + refined_images = self.refiner( + prompt=prompt, + image=image, + ).images + generated_images.extend(refined_images) + base_images = refined_images + + if upscaler != "": + upscaled = self._upscale( + base_images=base_images, + half_precision=False, + tile=700, + upscaler=upscaler, + use_face_enhancer=use_face_enhancer, + ) + generated_images.extend(upscaled) + + image_output = [] + for image in generated_images: + with io.BytesIO() as buf: + image.save(buf, format=output_format) + image_output.append(buf.getvalue()) + + return image_output + + def _upscale( + self, + base_images: list[PIL.Image], + half_precision: bool = False, + tile: int = 0, + tile_pad: int = 10, + pre_pad: int = 0, + upscaler: str = "", + use_face_enhancer: bool = False, + ) -> list[PIL.Image]: + """ + Upscale the generated images by the upscaler when `upscaler` is selected. + The upscaler can be selected from the following list: + - `RealESRGAN_x4plus` + - `RealESRNet_x4plus` + - `RealESRGAN_x4plus_anime_6B` + - `RealESRGAN_x2plus` + https://github.com/xinntao/Real-ESRGAN + """ + import numpy + from basicsr.archs.rrdbnet_arch import RRDBNet + from gfpgan import GFPGANer + from realesrgan import RealESRGANer + from tqdm import tqdm + + model_name = upscaler + if model_name == "RealESRGAN_x4plus": + upscale_model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4) + netscale = 4 + elif model_name == "RealESRNet_x4plus": + upscale_model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4) + netscale = 4 + elif model_name == "RealESRGAN_x4plus_anime_6B": + upscale_model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4) + netscale = 4 + elif model_name == "RealESRGAN_x2plus": + upscale_model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2) + netscale = 2 + else: + raise NotImplementedError("Model name not supported") + + upsampler = RealESRGANer( + scale=netscale, + model_path=os.path.join(BASE_CACHE_PATH, "esrgan", f"{model_name}.pth"), + dni_weight=None, + model=upscale_model, + tile=tile, + tile_pad=tile_pad, + pre_pad=pre_pad, + half=half_precision, + gpu_id=None, + ) + + if use_face_enhancer: + face_enhancer = GFPGANer( + model_path=os.path.join(BASE_CACHE_PATH, "esrgan", "GFPGANv1.3.pth"), + upscale=netscale, + arch="clean", + channel_multiplier=2, + bg_upsampler=upsampler, + ) + + upscaled_imgs = [] + for img in base_images: + img = numpy.array(img) + if use_face_enhancer: + _, _, enhance_result = face_enhancer.enhance( + img, + has_aligned=False, + only_center_face=False, + paste_back=True, + ) + else: + enhance_result, _ = upsampler.enhance(img) + + upscaled_imgs.append(PIL.Image.fromarray(enhance_result)) + + return upscaled_imgs From c882fa4649e2b124d1318767ec52db8c5764e5a8 Mon Sep 17 00:00:00 2001 From: hodanov <1031hoda@gmail.com> Date: Sun, 10 Dec 2023 17:00:05 +0900 Subject: [PATCH 2/2] Update README. --- README.md | 17 ++++++++++------- README_ja.md | 18 +++++++++++------- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index febb69b..0ff0bf6 100644 --- a/README.md +++ b/README.md @@ -44,8 +44,8 @@ To use the script, execute the below. 1. git clone the repository. 2. Copy `./setup_files/config.sample.yml` to `./setup_files/config.yml` 3. Open the Makefile and set prompts. -4. Execute `make deploy` command. An application will be deployed to Modal. -5. Execute `make run` command. +4. Execute `make app` command. An application will be deployed to Modal. +5. Execute `make img_by_sd15_txt2img` command. Images are generated and output to the `outputs/` directory. @@ -58,7 +58,8 @@ Images are generated and output to the `outputs/` directory. ├── README.md ├── sdcli/ # A directory with scripts to run inference. │   ├── outputs/ # Images are outputted this directory. -│   ├── txt2img.py # A script to run txt2img inference. +│   ├── sd15_txt2img.py # A script to run sd15_txt2img inference. +│   ├── sdxl_txt2img.py # A script to run sdxl_txt2img inference. │   └── util.py └── setup_files/ # A directory with config files. ├── __main__.py # A main script to run inference. @@ -66,7 +67,8 @@ Images are generated and output to the `outputs/` directory. ├── config.yml # To set a model, vae and some tools. ├── requirements.txt ├── setup.py # Build an application to deploy on Modal. - └── txt2img.py # There is a class to run inference. + ├── stable_diffusion_1_5.py # There is a class to run inference about sd15. + └── stable_diffusion_xl.py # There is a class to run inference about sdxl. ``` ## How to use @@ -131,7 +133,8 @@ run: --seed 12321 | --upscaler "RealESRGAN_x2plus" \ --use-face-enhancer "False" \ - --fix-by-controlnet-tile "True" + --fix-by-controlnet-tile "True" \ + --output-fomart "avif" ``` ### 5. make deploy @@ -139,7 +142,7 @@ run: Execute the below command. An application will be deployed on Modal. ```bash -make deploy +make app ``` ### 6. make run @@ -147,7 +150,7 @@ make deploy The txt2img inference is executed with the following command. ```bash -make run +make img_by_sd15_txt2img ``` Thank you. diff --git a/README_ja.md b/README_ja.md index 89fdb6d..358dc9e 100644 --- a/README_ja.md +++ b/README_ja.md @@ -46,8 +46,8 @@ modal token new 1. リポジトリをgit clone 2. ./setup_files/config.example.yml を ./setup_files/config.ymlにコピー 3. Makefile を開いてプロンプトを設定 -4. make deployをコマンドラインで実行(Modal上にアプリケーションが構築されます) -5. make run(スクリプトが起動します) +4. make appをコマンドラインで実行(Modal上にアプリケーションが構築されます) +5. make img_by_sd15_txt2img(スクリプトが起動します) ## ディレクトリ構成 @@ -58,7 +58,8 @@ modal token new ├── README.md ├── sdcli/ # A directory with scripts to run inference. │   ├── outputs/ # Images are outputted this directory. -│   ├── txt2img.py # A script to run txt2img inference. +│   ├── sd15_txt2img.py # A script to run sd15_txt2img inference. +│   ├── sdxl_txt2img.py # A script to run sdxl_txt2img inference. │   └── util.py └── setup_files/ # A directory with config files. ├── __main__.py # A main script to run inference. @@ -66,7 +67,8 @@ modal token new ├── config.yml # To set a model, vae and some tools. ├── requirements.txt ├── setup.py # Build an application to deploy on Modal. - └── txt2img.py # There is a class to run inference. + ├── stable_diffusion_1_5.py # There is a class to run inference about sd15. + └── stable_diffusion_xl.py # There is a class to run inference about sdxl. ``` ## 使い方の詳細 @@ -133,7 +135,8 @@ run: --seed 12321 | --upscaler "RealESRGAN_x2plus" \ --use-face-enhancer "False" \ - --fix-by-controlnet-tile "True" + --fix-by-controlnet-tile "True" \ + --output-fomart "png" ``` - prompt: プロンプトを指定します。 @@ -145,13 +148,14 @@ run: - seed: seedを指定します。 - upscaler: 画像の解像度を上げるためのアップスケーラーを指定します。 - fix-by-controlnet-tile: ControlNet 1.1 Tileの利用有無を指定します。有効にすると、崩れた画像を修復しつつ、高解像度な画像を生成します。 +- output-format: 出力フォーマットを指定します。avifも指定可能です。 ### 5. make deployの実行 下記のコマンドでModal上にアプリケーションが構築されます。 ```bash -make deploy +make app ``` ### 6. make runの実行 @@ -159,5 +163,5 @@ make deploy 下記のコマンドでtxt2img推論が実行されます。 ```bash -make run +make img_by_sd15_txt2img ```