From b3c348409c90de54f76f9c78095f4138b107c09e Mon Sep 17 00:00:00 2001
From: hodanov <1031hoda@gmail.com>
Date: Wed, 5 Jul 2023 10:14:52 +0900
Subject: [PATCH] Add .

---
 Makefile                      |   2 +-
 sdcli/txt2img.py              |   4 +-
 setup_files/config.sample.yml |   6 +-
 setup_files/setup.py          | 109 +++++++++++++++-------------------
 4 files changed, 53 insertions(+), 68 deletions(-)

diff --git a/Makefile b/Makefile
index 400f818..38750ff 100644
--- a/Makefile
+++ b/Makefile
@@ -17,4 +17,4 @@ run:
 	--steps 50 \
 	--upscaler "" \
 	--use-face-enhancer "False" \
-	--use-hires-fix "False"
+	--fix-by-controlnet-tile "False"
diff --git a/sdcli/txt2img.py b/sdcli/txt2img.py
index 46d37c1..b731758 100644
--- a/sdcli/txt2img.py
+++ b/sdcli/txt2img.py
@@ -18,7 +18,7 @@ def main(
     seed: int = -1,
     upscaler: str = "",
     use_face_enhancer: str = "False",
-    use_hires_fix: str = "False",
+    fix_by_controlnet_tile: str = "False",
 ):
     """
     This function is the entrypoint for the Runway CLI.
@@ -43,7 +43,7 @@ def main(
             seed=seed_generated,
             upscaler=upscaler,
             use_face_enhancer=use_face_enhancer == "True",
-            use_hires_fix=use_hires_fix == "True",
+            fix_by_controlnet_tile=fix_by_controlnet_tile == "True",
         )
         util.save_images(directory, images, seed_generated, i)
         total_time = time.time() - start_time
diff --git a/setup_files/config.sample.yml b/setup_files/config.sample.yml
index 2f86f59..4d2b343 100644
--- a/setup_files/config.sample.yml
+++ b/setup_files/config.sample.yml
@@ -29,10 +29,6 @@ vae:
 #     download_url: https://hogehoge/xxxx
 #   - name: fugafuga
 #     download_url: https://fugafuga/xxxx
-# cotrolnets:
+# controlnets:
 #   - name: control_v11f1e_sd15_tile
 #     repo_id: lllyasviel/control_v11f1e_sd15_tile
-# upscaler:
-#   name: RealESRGAN_x2plus
-#   use_face_enhancer: false
-#   use_hires_fix: false
diff --git a/setup_files/setup.py b/setup_files/setup.py
index 2c1fde2..946604c 100644
--- a/setup_files/setup.py
+++ b/setup_files/setup.py
@@ -196,28 +196,22 @@ class StableDiffusion(ClsMixin):
 
         self.pipe.enable_xformers_memory_efficient_attention()
 
-        # TODO: Add support for controlnets.
-        # controlnet = diffusers.ControlNetModel.from_pretrained(
-        #     "lllyasviel/control_v11f1e_sd15_tile",
-        #     # "lllyasviel/sd-controlnet-canny",
-        #     # self.cache_path,
-        #     # subfolder="controlnet",
-        #     torch_dtype=torch.float16,
-        # )
-
-        # self.controlnet_pipe = diffusers.StableDiffusionControlNetPipeline.from_pretrained(
-        #     self.cache_path,
-        #     controlnet=controlnet,
-        #     custom_pipeline="lpw_stable_diffusion",
-        #     # custom_pipeline="stable_diffusion_controlnet_img2img",
-        #     scheduler=self.pipe.scheduler,
-        #     vae=self.pipe.vae,
-        #     torch_dtype=torch.float16,
-        # )
-
-        # self.controlnet_pipe.to("cuda")
-
-        # self.controlnet_pipe.enable_xformers_memory_efficient_attention()
+        # TODO: Repair the controlnet loading.
+        controlnets = config.get("controlnets")
+        if controlnets is not None:
+            for controlnet in controlnets:
+                path = os.path.join(BASE_CACHE_PATH_CONTROLNET, controlnet["name"])
+                controlnet = diffusers.ControlNetModel.from_pretrained(path, torch_dtype=torch.float16)
+                self.controlnet_pipe = diffusers.StableDiffusionControlNetPipeline.from_pretrained(
+                    self.cache_path,
+                    controlnet=controlnet,
+                    custom_pipeline="lpw_stable_diffusion",
+                    scheduler=self.pipe.scheduler,
+                    vae=self.pipe.vae,
+                    torch_dtype=torch.float16,
+                )
+                self.controlnet_pipe.to("cuda")
+                self.controlnet_pipe.enable_xformers_memory_efficient_attention()
 
     @method()
     def count_token(self, p: str, n: str) -> int:
@@ -258,7 +252,7 @@ class StableDiffusion(ClsMixin):
         seed: int = 1,
         upscaler: str = "",
         use_face_enhancer: bool = False,
-        use_hires_fix: bool = False,
+        fix_by_controlnet_tile: bool = False,
     ) -> list[bytes]:
         """
         Runs the Stable Diffusion pipeline on the given prompt and outputs images.
@@ -269,7 +263,7 @@ class StableDiffusion(ClsMixin):
         generator = torch.Generator("cuda").manual_seed(seed)
         with torch.inference_mode():
             with torch.autocast("cuda"):
-                base_images = self.pipe.text2img(
+                generated_images = self.pipe.text2img(
                     prompt * batch_size,
                     negative_prompt=n_prompt * batch_size,
                     height=height,
@@ -280,21 +274,29 @@ class StableDiffusion(ClsMixin):
                     generator=generator,
                 ).images
 
-        # for image in base_images:
-        #     image = self.resize_image(image=image, scale_factor=2)
-        #     with torch.inference_mode():
-        #         with torch.autocast("cuda"):
-        #             generatedWithControlnet = self.controlnet_pipe(
-        #                 prompt=prompt * batch_size,
-        #                 negative_prompt=n_prompt * batch_size,
-        #                 num_inference_steps=steps,
-        #                 strength=0.3,
-        #                 guidance_scale=7.5,
-        #                 max_embeddings_multiples=max_embeddings_multiples,
-        #                 generator=generator,
-        #                 image=image,
-        #             ).images
-        # base_images.extend(generatedWithControlnet)
+        base_images = generated_images
+
+        """
+        Fix the generated images by the control_v11f1e_sd15_tile when `fix_by_controlnet_tile` is `True`.
+        https://huggingface.co/lllyasviel/control_v11f1e_sd15_tile
+        """
+        if fix_by_controlnet_tile:
+            for image in base_images:
+                image = self.resize_image(image=image, scale_factor=2)
+                with torch.inference_mode():
+                    with torch.autocast("cuda"):
+                        fixed_by_controlnet = self.controlnet_pipe(
+                            prompt=prompt * batch_size,
+                            negative_prompt=n_prompt * batch_size,
+                            num_inference_steps=steps,
+                            strength=0.3,
+                            guidance_scale=7.5,
+                            max_embeddings_multiples=max_embeddings_multiples,
+                            generator=generator,
+                            image=image,
+                        ).images
+            generated_images.extend(fixed_by_controlnet)
+            base_images = fixed_by_controlnet
 
         if upscaler != "":
             upscaled = self.upscale(
@@ -303,28 +305,11 @@ class StableDiffusion(ClsMixin):
                 tile=700,
                 upscaler=upscaler,
                 use_face_enhancer=use_face_enhancer,
-                use_hires_fix=use_hires_fix,
             )
-            base_images.extend(upscaled)
-
-            if use_hires_fix:
-                for img in upscaled:
-                    with torch.inference_mode():
-                        with torch.autocast("cuda"):
-                            hires_fixed = self.pipe.img2img(
-                                prompt=prompt * batch_size,
-                                negative_prompt=n_prompt * batch_size,
-                                num_inference_steps=steps,
-                                strength=0.3,
-                                guidance_scale=7.5,
-                                max_embeddings_multiples=max_embeddings_multiples,
-                                generator=generator,
-                                image=img,
-                            ).images
-                    base_images.extend(hires_fixed)
+            generated_images.extend(upscaled)
 
         image_output = []
-        for image in base_images:
+        for image in generated_images:
             with io.BytesIO() as buf:
                 image.save(buf, format="PNG")
                 image_output.append(buf.getvalue())
@@ -350,10 +335,14 @@ class StableDiffusion(ClsMixin):
         pre_pad: int = 0,
         upscaler: str = "",
         use_face_enhancer: bool = False,
-        use_hires_fix: bool = False,
     ) -> list[Image.Image]:
         """
-        Upscales the given images using a upscaler.
+        Upscale the generated images by the upscaler when `upscaler` is selected.
+        The upscaler can be selected from the following list:
+        - `RealESRGAN_x4plus`
+        - `RealESRNet_x4plus`
+        - `RealESRGAN_x4plus_anime_6B`
+        - `RealESRGAN_x2plus`
         https://github.com/xinntao/Real-ESRGAN
         """
         import numpy