From d531c25bd01fd942326579d3d3d3564b40e3d95f Mon Sep 17 00:00:00 2001 From: hodanov <1031hoda@gmail.com> Date: Sun, 21 Apr 2024 21:34:57 +0900 Subject: [PATCH] Replace realesrgan to the Stable Diffusion latent upscaler. Remove unnecessary libraries. --- Makefile | 13 +--- README.md | 3 +- README_ja.md | 5 +- app/Dockerfile | 8 +-- app/requirements.txt | 26 ++----- app/setup.py | 12 ++++ app/stable_diffusion_1_5.py | 131 +++++++++--------------------------- app/stable_diffusion_xl.py | 99 +++------------------------ cmd/sd15_img2img.py | 6 +- cmd/sd15_txt2img.py | 6 +- cmd/sdxl_txt2img.py | 6 +- 11 files changed, 71 insertions(+), 244 deletions(-) diff --git a/Makefile b/Makefile index 826a109..3e48cc3 100644 --- a/Makefile +++ b/Makefile @@ -3,12 +3,6 @@ app: cd ./app && modal deploy __main__.py -# `--upscaler` is a name of upscaler you want to use. -# You can use upscalers the below: -# - `RealESRGAN_x4plus` -# - `RealESRNet_x4plus` -# - `RealESRGAN_x4plus_anime_6B` -# - `RealESRGAN_x2plus` img_by_sd15_txt2img: cd ./cmd && modal run sd15_txt2img.py \ --prompt "a photograph of an astronaut riding a horse" \ @@ -17,8 +11,7 @@ img_by_sd15_txt2img: --width 768 \ --samples 1 \ --steps 30 \ - --upscaler "RealESRGAN_x2plus" \ - --use-face-enhancer "False" \ + --use-upscaler "True" \ --fix-by-controlnet-tile "True" \ --output-format "avif" @@ -28,8 +21,7 @@ img_by_sd15_img2img: --n-prompt "" \ --samples 1 \ --steps 30 \ - --upscaler "RealESRGAN_x2plus" \ - --use-face-enhancer "False" \ + --use-upscaler "True" \ --fix-by-controlnet-tile "True" \ --output-format "avif" \ --base-image-url "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/cat.png" @@ -40,5 +32,4 @@ img_by_sdxl_txt2img: --height 1024 \ --width 1024 \ --samples 1 \ - --upscaler "RealESRGAN_x2plus" \ --output-format "avif" \ No newline at end of file diff --git a/README.md b/README.md index fe41eea..8e6ec7c 100644 --- a/README.md +++ b/README.md @@ -132,8 +132,7 @@ run: --samples 1 \ --steps 30 \ --seed 12321 | - --upscaler "RealESRGAN_x2plus" \ - --use-face-enhancer "False" \ + --use-upscaler "True" \ --fix-by-controlnet-tile "True" \ --output-fomart "avif" ``` diff --git a/README_ja.md b/README_ja.md index 6952a02..a9f9b9c 100644 --- a/README_ja.md +++ b/README_ja.md @@ -134,8 +134,7 @@ run: --samples 1 \ --steps 30 \ --seed 12321 | - --upscaler "RealESRGAN_x2plus" \ - --use-face-enhancer "False" \ + --use-upscaler "True" \ --fix-by-controlnet-tile "True" \ --output-fomart "png" ``` @@ -147,7 +146,7 @@ run: - samples: 生成する画像の数を指定します。 - steps: ステップ数を指定します。 - seed: seedを指定します。 -- upscaler: 画像の解像度を上げるためのアップスケーラーを指定します。 +- use-upscaler: 画像の解像度を上げるためのアップスケーラーを有効にします。 - fix-by-controlnet-tile: ControlNet 1.1 Tileの利用有無を指定します。有効にすると、崩れた画像を修復しつつ、高解像度な画像を生成します。 - output-format: 出力フォーマットを指定します。avifも指定可能です。 diff --git a/app/Dockerfile b/app/Dockerfile index 3b79c54..698cfd7 100644 --- a/app/Dockerfile +++ b/app/Dockerfile @@ -5,10 +5,4 @@ RUN apt-get update \ && apt-get autoremove -y \ && apt-get clean -y \ && rm -rf /var/lib/apt/lists/* \ - && pip install -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu121 --no-cache-dir \ - && mkdir -p /vol/cache/esrgan \ - && wget --progress=dot:giga https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth -P /vol/cache/esrgan \ - && wget --progress=dot:giga https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.1/RealESRNet_x4plus.pth -P /vol/cache/esrgan \ - && wget --progress=dot:giga https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth -P /vol/cache/esrgan \ - && wget --progress=dot:giga https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth -P /vol/cache/esrgan \ - && wget --progress=dot:giga https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth -P /vol/cache/esrgan + && pip install -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu121 --no-cache-dir diff --git a/app/requirements.txt b/app/requirements.txt index 3b377cb..244ea4f 100644 --- a/app/requirements.txt +++ b/app/requirements.txt @@ -1,27 +1,15 @@ -invisible_watermark -accelerate diffusers[torch]==0.27.2 -onnxruntime==1.17.3 -safetensors==0.4.3 +accelerate torch==2.2.2 transformers==4.40.0 xformers==0.0.25.post1 -realesrgan==0.3.0 -basicsr>=1.4.2 -facexlib>=0.3.0 -gfpgan>=1.3.8 -scipy==1.13.0 -opencv-python -Pillow -pillow-avif-plugin -torchvision==0.17.2 -tqdm - +invisible_watermark # To help viewers identify the images as machine-generated. +onnxruntime==1.17.3 # ONNX Runtime uses the following optimizations to speed up Stable Diffusion in CUDA. +safetensors==0.4.3 # To store tensors safely. controlnet_aux -pyyaml - -# Use the below in 'download_from_original_stable_diffusion_ckpt'. -omegaconf==2.3.0 +Pillow +pillow-avif-plugin # To save images in AVIF format. +pyyaml # To read the configuration file by written YAML. peft \ No newline at end of file diff --git a/app/setup.py b/app/setup.py index fd927f5..113278f 100644 --- a/app/setup.py +++ b/app/setup.py @@ -9,6 +9,7 @@ BASE_CACHE_PATH = "/vol/cache" BASE_CACHE_PATH_LORA = "/vol/cache/lora" BASE_CACHE_PATH_TEXTUAL_INVERSION = "/vol/cache/textual_inversion" BASE_CACHE_PATH_CONTROLNET = "/vol/cache/controlnet" +BASE_CACHE_PATH_UPSCALER = "/vol/cache/upscaler" def download_file(url, file_name, file_path): @@ -25,6 +26,15 @@ def download_file(url, file_name, file_path): f.write(downloaded) +def download_upscaler(): + """ + Download the stabilityai/sd-x2-latent-upscaler. + """ + model_id = "stabilityai/sd-x2-latent-upscaler" + upscaler = diffusers.StableDiffusionLatentUpscalePipeline.from_pretrained(model_id) + upscaler.save_pretrained(BASE_CACHE_PATH_UPSCALER, safe_serialization=True) + + def download_controlnet(name: str, repo_id: str, token: str): """ Download a controlnet. @@ -130,6 +140,8 @@ def build_image(): file_path=BASE_CACHE_PATH_TEXTUAL_INVERSION, ) + download_upscaler() + app = App("stable-diffusion-cli") base_stub = Image.from_dockerfile( diff --git a/app/stable_diffusion_1_5.py b/app/stable_diffusion_1_5.py index b306714..5d72755 100644 --- a/app/stable_diffusion_1_5.py +++ b/app/stable_diffusion_1_5.py @@ -10,6 +10,7 @@ from setup import ( BASE_CACHE_PATH_CONTROLNET, BASE_CACHE_PATH_LORA, BASE_CACHE_PATH_TEXTUAL_INVERSION, + BASE_CACHE_PATH_UPSCALER, app, ) @@ -53,6 +54,11 @@ class SD15: ) # self.pipe.scheduler = diffusers.LCMScheduler.from_config(self.pipe.scheduler.config) + self.upscaler = diffusers.StableDiffusionLatentUpscalePipeline.from_pretrained( + BASE_CACHE_PATH_UPSCALER, + torch_dtype=torch.float16, + ) + vae = config.get("vae") if vae is not None: self.pipe.vae = diffusers.AutoencoderKL.from_pretrained( @@ -133,8 +139,7 @@ class SD15: batch_size: int = 1, steps: int = 30, seed: int = 1, - upscaler: str = "", - use_face_enhancer: bool = False, + use_upscaler: bool = False, fix_by_controlnet_tile: bool = False, output_format: str = "png", ) -> list[bytes]: @@ -187,16 +192,18 @@ class SD15: generated_images.extend(fixed_by_controlnet) base_images = fixed_by_controlnet - # TODO: Upscaler stopped working due to update of dependent packages. Replace with diffusers upscaler. - # if upscaler != "": - # upscaled = self._upscale( - # base_images=base_images, - # half_precision=False, - # tile=700, - # upscaler=upscaler, - # use_face_enhancer=use_face_enhancer, - # ) - # generated_images.extend(upscaled) + if use_upscaler: + self.upscaler.to("cuda") + self.upscaler.enable_xformers_memory_efficient_attention() + upscaled = self.upscaler( + prompt=prompt, + negative_prompt=n_prompt, + image=base_images[0], + num_inference_steps=steps, + guidance_scale=0, + generator=generator, + ).images + generated_images.extend(upscaled) image_output = [] for image in generated_images: @@ -214,8 +221,7 @@ class SD15: batch_size: int = 1, steps: int = 30, seed: int = 1, - upscaler: str = "", - use_face_enhancer: bool = False, + use_upscaler: bool = False, fix_by_controlnet_tile: bool = False, output_format: str = "png", base_image_url: str = "", @@ -269,14 +275,17 @@ class SD15: generated_images.extend(fixed_by_controlnet) base_images = fixed_by_controlnet - if upscaler != "": - upscaled = self._upscale( - base_images=base_images, - half_precision=False, - tile=700, - upscaler=upscaler, - use_face_enhancer=use_face_enhancer, - ) + if use_upscaler: + self.upscaler.to("cuda") + self.upscaler.enable_xformers_memory_efficient_attention() + upscaled = self.upscaler( + prompt=prompt, + negative_prompt=n_prompt, + image=base_images[0], + num_inference_steps=steps, + guidance_scale=0, + generator=generator, + ).images generated_images.extend(upscaled) image_output = [] @@ -292,81 +301,3 @@ class SD15: width, height = image.size img = image.resize((width * scale_factor, height * scale_factor), resample=PIL.Image.LANCZOS) return img - - def _upscale( - self, - base_images: list[PIL.Image], - half_precision: bool = False, - tile: int = 0, - tile_pad: int = 10, - pre_pad: int = 0, - upscaler: str = "", - use_face_enhancer: bool = False, - ) -> list[PIL.Image]: - """ - Upscale the generated images by the upscaler when `upscaler` is selected. - The upscaler can be selected from the following list: - - `RealESRGAN_x4plus` - - `RealESRNet_x4plus` - - `RealESRGAN_x4plus_anime_6B` - - `RealESRGAN_x2plus` - https://github.com/xinntao/Real-ESRGAN - """ - import numpy - from basicsr.archs.rrdbnet_arch import RRDBNet - from gfpgan import GFPGANer - from realesrgan import RealESRGANer - - model_name = upscaler - if model_name == "RealESRGAN_x4plus": - upscale_model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4) - netscale = 4 - elif model_name == "RealESRNet_x4plus": - upscale_model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4) - netscale = 4 - elif model_name == "RealESRGAN_x4plus_anime_6B": - upscale_model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4) - netscale = 4 - elif model_name == "RealESRGAN_x2plus": - upscale_model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2) - netscale = 2 - else: - raise NotImplementedError("Model name not supported") - - upsampler = RealESRGANer( - scale=netscale, - model_path=os.path.join(BASE_CACHE_PATH, "esrgan", f"{model_name}.pth"), - dni_weight=None, - model=upscale_model, - tile=tile, - tile_pad=tile_pad, - pre_pad=pre_pad, - half=half_precision, - gpu_id=None, - ) - - if use_face_enhancer: - face_enhancer = GFPGANer( - model_path=os.path.join(BASE_CACHE_PATH, "esrgan", "GFPGANv1.3.pth"), - upscale=netscale, - arch="clean", - channel_multiplier=2, - bg_upsampler=upsampler, - ) - - upscaled_imgs = [] - for img in base_images: - img = numpy.array(img) - if use_face_enhancer: - _, _, enhance_result = face_enhancer.enhance( - img, - has_aligned=False, - only_center_face=False, - paste_back=True, - ) - else: - enhance_result, _ = upsampler.enhance(img) - - upscaled_imgs.append(PIL.Image.fromarray(enhance_result)) - - return upscaled_imgs diff --git a/app/stable_diffusion_xl.py b/app/stable_diffusion_xl.py index 23c3b49..85cd6d3 100644 --- a/app/stable_diffusion_xl.py +++ b/app/stable_diffusion_xl.py @@ -96,8 +96,7 @@ class SDXLTxt2Img: width: int = 1024, steps: int = 30, seed: int = 1, - upscaler: str = "", - use_face_enhancer: bool = False, + use_upscaler: bool = False, output_format: str = "png", ) -> list[bytes]: """ @@ -157,15 +156,14 @@ class SDXLTxt2Img: # generated_images.extend(fixed_by_controlnet) # base_images = fixed_by_controlnet - if upscaler != "": - upscaled = self._upscale( - base_images=base_images, - half_precision=False, - tile=700, - upscaler=upscaler, - use_face_enhancer=use_face_enhancer, - ) - generated_images.extend(upscaled) + # if use_upscaler: + # upscaled = self._upscale( + # base_images=base_images, + # half_precision=False, + # tile=700, + # upscaler=upscaler, + # ) + # generated_images.extend(upscaled) image_output = [] for image in generated_images: @@ -180,82 +178,3 @@ class SDXLTxt2Img: width, height = image.size img = image.resize((width * scale_factor, height * scale_factor), resample=PIL.Image.LANCZOS) return img - - def _upscale( - self, - base_images: list[PIL.Image], - half_precision: bool = False, - tile: int = 0, - tile_pad: int = 10, - pre_pad: int = 0, - upscaler: str = "", - use_face_enhancer: bool = False, - ) -> list[PIL.Image]: - """ - Upscale the generated images by the upscaler when `upscaler` is selected. - The upscaler can be selected from the following list: - - `RealESRGAN_x4plus` - - `RealESRNet_x4plus` - - `RealESRGAN_x4plus_anime_6B` - - `RealESRGAN_x2plus` - https://github.com/xinntao/Real-ESRGAN - """ - import numpy - from basicsr.archs.rrdbnet_arch import RRDBNet - from gfpgan import GFPGANer - from realesrgan import RealESRGANer - from tqdm import tqdm - - model_name = upscaler - if model_name == "RealESRGAN_x4plus": - upscale_model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4) - netscale = 4 - elif model_name == "RealESRNet_x4plus": - upscale_model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4) - netscale = 4 - elif model_name == "RealESRGAN_x4plus_anime_6B": - upscale_model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4) - netscale = 4 - elif model_name == "RealESRGAN_x2plus": - upscale_model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2) - netscale = 2 - else: - raise NotImplementedError("Model name not supported") - - upsampler = RealESRGANer( - scale=netscale, - model_path=os.path.join(BASE_CACHE_PATH, "esrgan", f"{model_name}.pth"), - dni_weight=None, - model=upscale_model, - tile=tile, - tile_pad=tile_pad, - pre_pad=pre_pad, - half=half_precision, - gpu_id=None, - ) - - if use_face_enhancer: - face_enhancer = GFPGANer( - model_path=os.path.join(BASE_CACHE_PATH, "esrgan", "GFPGANv1.3.pth"), - upscale=netscale, - arch="clean", - channel_multiplier=2, - bg_upsampler=upsampler, - ) - - upscaled_imgs = [] - for img in base_images: - img = numpy.array(img) - if use_face_enhancer: - _, _, enhance_result = face_enhancer.enhance( - img, - has_aligned=False, - only_center_face=False, - paste_back=True, - ) - else: - enhance_result, _ = upsampler.enhance(img) - - upscaled_imgs.append(PIL.Image.fromarray(enhance_result)) - - return upscaled_imgs diff --git a/cmd/sd15_img2img.py b/cmd/sd15_img2img.py index 34771fd..f488549 100644 --- a/cmd/sd15_img2img.py +++ b/cmd/sd15_img2img.py @@ -15,8 +15,7 @@ def main( batch_size: int = 1, steps: int = 20, seed: int = -1, - upscaler: str = "", - use_face_enhancer: str = "False", + use_upscaler: str = "False", fix_by_controlnet_tile: str = "False", output_format: str = "png", base_image_url: str = "", @@ -38,8 +37,7 @@ def main( batch_size=batch_size, steps=steps, seed=seed_generated, - upscaler=upscaler, - use_face_enhancer=use_face_enhancer == "True", + use_upscaler=use_upscaler == "True", fix_by_controlnet_tile=fix_by_controlnet_tile == "True", output_format=output_format, base_image_url=base_image_url, diff --git a/cmd/sd15_txt2img.py b/cmd/sd15_txt2img.py index 821bdea..1b2edcd 100644 --- a/cmd/sd15_txt2img.py +++ b/cmd/sd15_txt2img.py @@ -17,8 +17,7 @@ def main( batch_size: int = 1, steps: int = 20, seed: int = -1, - upscaler: str = "", - use_face_enhancer: str = "False", + use_upscaler: str = "", fix_by_controlnet_tile: str = "False", output_format: str = "png", ): @@ -41,8 +40,7 @@ def main( batch_size=batch_size, steps=steps, seed=seed_generated, - upscaler=upscaler, - use_face_enhancer=use_face_enhancer == "True", + use_upscaler=use_upscaler == "True", fix_by_controlnet_tile=fix_by_controlnet_tile == "True", output_format=output_format, ) diff --git a/cmd/sdxl_txt2img.py b/cmd/sdxl_txt2img.py index 7a9f5f5..bfb1c49 100644 --- a/cmd/sdxl_txt2img.py +++ b/cmd/sdxl_txt2img.py @@ -16,8 +16,7 @@ def main( samples: int = 5, steps: int = 20, seed: int = -1, - upscaler: str = "", - use_face_enhancer: str = "False", + use_upscaler: str = "False", output_format: str = "png", ): """ @@ -38,8 +37,7 @@ def main( width=width, steps=steps, seed=seed_generated, - upscaler=upscaler, - use_face_enhancer=use_face_enhancer == "True", + use_upscaler=use_upscaler == "True", output_format=output_format, ) util.save_images(directory, images, seed_generated, i, output_format)