diff --git a/.env.example b/.env.example
deleted file mode 100644
index e14931a..0000000
--- a/.env.example
+++ /dev/null
@@ -1,22 +0,0 @@
-HUGGING_FACE_TOKEN=""
-MODEL_REPO_ID="stabilityai/stable-diffusion-2-1"
-MODEL_NAME="stable-diffusion-2-1"
-
-# Modify `USE_VAE` to `true` if you want to use VAE.
-USE_VAE="false"
-
-# Add LoRA if you want to use one. You can use a download link of civitai.
-# ex)
-#   - `LORA_NAMES="hogehoge.safetensors"`
-#   - `LORA_DOWNLOAD_URLS="https://civitai.com/api/download/models/xxxxxx"`
-#
-# If you have multiple LoRAs you want to use, separate by commas like the below:
-# ex)
-#   - `LORA_NAMES="hogehoge.safetensors,mogumogu.safetensors"`
-#   - `LORA_DOWNLOAD_URLS="https://civitai.com/api/download/models/xxxxxx,https://civitai.com/api/download/models/xxxxxx"`
-LORA_NAMES=""
-LORA_DOWNLOAD_URLS=""
-
-# Add Textual Inversion you wan to use. Usage is the same as `LORA_NAMES` and `LORA_DOWNLOAD_URLS`.
-TEXTUAL_INVERSION_NAMES=""
-TEXTUAL_INVERSION_DOWNLOAD_URLS=""
diff --git a/Makefile b/Makefile
index 67716ae..400f818 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
 deploy:
-	modal deploy setup.py
+	modal deploy ./setup_files/setup.py
 
 # `--upscaler` is a name of upscaler you want to use.
 # You can use upscalers the below:
diff --git a/setup_files/.env.example b/setup_files/.env.example
new file mode 100644
index 0000000..79c037d
--- /dev/null
+++ b/setup_files/.env.example
@@ -0,0 +1,3 @@
+# `HUGGING_FACE_TOKEN` is the token for the Hugging Face API.
+# The token can be found at https://huggingface.co/settings/token.
+HUGGING_FACE_TOKEN=""
diff --git a/Dockerfile b/setup_files/Dockerfile
similarity index 91%
rename from Dockerfile
rename to setup_files/Dockerfile
index d69b8c1..00227ed 100644
--- a/Dockerfile
+++ b/setup_files/Dockerfile
@@ -1,7 +1,9 @@
 FROM python:3.11.3-slim-bullseye
-COPY requirements.txt /
+COPY ./requirements.txt /
 RUN apt update \
     && apt install -y wget git libgl1-mesa-glx libglib2.0-0 \
+    && apt autoremove -y \
+    && apt clean -y \
     && pip install -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu117 \
     && mkdir -p /vol/cache/esrgan \
     && wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth -P /vol/cache/esrgan \
diff --git a/setup_files/config.sample.yml b/setup_files/config.sample.yml
new file mode 100644
index 0000000..2f86f59
--- /dev/null
+++ b/setup_files/config.sample.yml
@@ -0,0 +1,38 @@
+##########
+# This is the config file to set a base model, vae and some tools.
+# Rename the file to `config.yml` before running the script.
+# Execute `modal deploy ./setup_files/setup.py` every time modify this file.
+##########
+
+##########
+# You can use a diffusers model and VAE on hugging face.
+model:
+  name: stable-diffusion-2-1
+  repo_id: stabilityai/stable-diffusion-2-1
+vae:
+  name: sd-vae-ft-mse
+  repo_id: stabilityai/sd-vae-ft-mse
+##########
+# Add LoRA if you want to use one. You can use a download url such as the below.
+# ex)
+# loras:
+#   - name: hogehoge.safetensors
+#     download_url: https://hogehoge/xxxx
+#   - name: fugafuga.safetensors
+#     download_url: https://fugafuga/xxxx
+
+##########
+# You can use Textual Inversion and ControlNet also. Usage is the same as `loras`.
+# ex)
+# textual_inversions:
+#   - name: hogehoge
+#     download_url: https://hogehoge/xxxx
+#   - name: fugafuga
+#     download_url: https://fugafuga/xxxx
+# cotrolnets:
+#   - name: control_v11f1e_sd15_tile
+#     repo_id: lllyasviel/control_v11f1e_sd15_tile
+# upscaler:
+#   name: RealESRGAN_x2plus
+#   use_face_enhancer: false
+#   use_hires_fix: false
diff --git a/requirements.txt b/setup_files/requirements.txt
similarity index 91%
rename from requirements.txt
rename to setup_files/requirements.txt
index 119f567..6ba20b7 100644
--- a/requirements.txt
+++ b/setup_files/requirements.txt
@@ -15,3 +15,6 @@ opencv-python
 Pillow
 torchvision
 tqdm
+
+controlnet_aux
+pyyaml
diff --git a/setup.py b/setup_files/setup.py
similarity index 60%
rename from setup.py
rename to setup_files/setup.py
index 33f905b..32c9b2d 100644
--- a/setup.py
+++ b/setup_files/setup.py
@@ -4,52 +4,63 @@ import io
 import os
 from urllib.request import Request, urlopen
 
+import diffusers
+import yaml
 from modal import Image, Mount, Secret, Stub, method
 from modal.cls import ClsMixin
 
 BASE_CACHE_PATH = "/vol/cache"
 BASE_CACHE_PATH_LORA = "/vol/cache/lora"
 BASE_CACHE_PATH_TEXTUAL_INVERSION = "/vol/cache/textual_inversion"
+BASE_CACHE_PATH_CONTROLNET = "/vol/cache/controlnet"
 
 
-def download_files(urls, file_names, file_path):
+def download_file(url, file_name, file_path):
     """
     Download files.
     """
-    file_names = file_names.split(",")
-    urls = urls.split(",")
-
-    for file_name, url in zip(file_names, urls):
-        req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
-        downloaded = urlopen(req).read()
-
-        dir_names = os.path.join(file_path, file_name)
-        os.makedirs(os.path.dirname(dir_names), exist_ok=True)
-        with open(dir_names, mode="wb") as f:
-            f.write(downloaded)
+    req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
+    downloaded = urlopen(req).read()
+    dir_names = os.path.join(file_path, file_name)
+    os.makedirs(os.path.dirname(dir_names), exist_ok=True)
+    with open(dir_names, mode="wb") as f:
+        f.write(downloaded)
 
 
-def download_models():
+def download_controlnet(name: str, repo_id: str, token: str):
     """
-    Downloads the model from Hugging Face and saves it to the cache path using
-    diffusers.StableDiffusionPipeline.from_pretrained().
+    Download a controlnet.
     """
-    import diffusers
+    cache_path = os.path.join(BASE_CACHE_PATH_CONTROLNET, name)
+    controlnet = diffusers.ControlNetModel.from_pretrained(
+        repo_id,
+        use_auth_token=token,
+        cache_dir=cache_path,
+    )
+    controlnet.save_pretrained(cache_path, safe_serialization=True)
 
-    hugging_face_token = os.environ["HUGGING_FACE_TOKEN"]
-    model_repo_id = os.environ["MODEL_REPO_ID"]
-    cache_path = os.path.join(BASE_CACHE_PATH, os.environ["MODEL_NAME"])
 
+def download_vae(name: str, repo_id: str, token: str):
+    """
+    Download a vae.
+    """
+    cache_path = os.path.join(BASE_CACHE_PATH, name)
     vae = diffusers.AutoencoderKL.from_pretrained(
-        "stabilityai/sd-vae-ft-mse",
-        use_auth_token=hugging_face_token,
+        repo_id,
+        use_auth_token=token,
         cache_dir=cache_path,
     )
     vae.save_pretrained(cache_path, safe_serialization=True)
 
+
+def download_model(name: str, repo_id: str, token: str):
+    """
+    Download a model.
+    """
+    cache_path = os.path.join(BASE_CACHE_PATH, name)
     pipe = diffusers.StableDiffusionPipeline.from_pretrained(
-        model_repo_id,
-        use_auth_token=hugging_face_token,
+        repo_id,
+        use_auth_token=token,
         cache_dir=cache_path,
     )
     pipe.save_pretrained(cache_path, safe_serialization=True)
@@ -59,52 +70,82 @@ def build_image():
     """
     Build the Docker image.
     """
-    download_models()
+    token = os.environ["HUGGING_FACE_TOKEN"]
+    config = {}
+    with open("/config.yml", "r") as file:
+        config = yaml.safe_load(file)
 
-    if os.environ["LORA_NAMES"] != "":
-        download_files(
-            os.getenv("LORA_DOWNLOAD_URLS"),
-            os.getenv("LORA_NAMES"),
-            BASE_CACHE_PATH_LORA,
-        )
+    model = config.get("model")
+    if model is not None:
+        download_model(name=model["name"], repo_id=model["repo_id"], token=token)
 
-    if os.environ["TEXTUAL_INVERSION_NAMES"] != "":
-        download_files(
-            os.getenv("TEXTUAL_INVERSION_DOWNLOAD_URLS"),
-            os.getenv("TEXTUAL_INVERSION_NAMES"),
-            BASE_CACHE_PATH_TEXTUAL_INVERSION,
-        )
+    vae = config.get("vae")
+    if vae is not None:
+        download_vae(name=model["name"], repo_id=vae["repo_id"], token=token)
+
+    controlnets = config.get("controlnets")
+    if controlnets is not None:
+        for controlnet in controlnets:
+            download_controlnet(name=controlnet["name"], repo_id=controlnet["repo_id"], token=token)
+
+    loras = config.get("loras")
+    if loras is not None:
+        for lora in loras:
+            download_file(
+                url=lora["download_url"],
+                file_name=lora["name"],
+                file_path=BASE_CACHE_PATH_LORA,
+            )
+
+    textual_inversions = config.get("textual_inversions")
+    if textual_inversions is not None:
+        for textual_inversion in textual_inversions:
+            download_file(
+                url=textual_inversion["download_url"],
+                file_name=textual_inversion["name"],
+                file_path=BASE_CACHE_PATH_TEXTUAL_INVERSION,
+            )
 
 
-stub_image = Image.from_dockerfile(
-    path="./Dockerfile",
-    context_mount=Mount.from_local_file("./requirements.txt"),
+stub = Stub("stable-diffusion-cli")
+base_stub = Image.from_dockerfile(
+    path="./setup_files/Dockerfile",
+    context_mount=Mount.from_local_file("./setup_files/requirements.txt"),
+)
+stub.image = base_stub.extend(
+    dockerfile_commands=[
+        "FROM base",
+        "COPY ./config.yml /",
+    ],
+    context_mount=Mount.from_local_file("./setup_files/config.yml"),
 ).run_function(
     build_image,
     secrets=[Secret.from_dotenv(__file__)],
 )
-stub = Stub("stable-diffusion-cli")
-stub.image = stub_image
 
 
-@stub.cls(gpu="A10G", secrets=[Secret.from_dotenv(__file__)])
+@stub.cls(
+    gpu="A10G",
+    secrets=[Secret.from_dotenv(__file__)],
+)
 class StableDiffusion(ClsMixin):
     """
     A class that wraps the Stable Diffusion pipeline and scheduler.
     """
 
     def __enter__(self):
-        import diffusers
         import torch
 
-        self.cache_path = os.path.join(BASE_CACHE_PATH, os.environ["MODEL_NAME"])
+        config = {}
+        with open("/config.yml", "r") as file:
+            config = yaml.safe_load(file)
+        self.cache_path = os.path.join(BASE_CACHE_PATH, config["model"]["name"])
         if os.path.exists(self.cache_path):
             print(f"The directory '{self.cache_path}' exists.")
         else:
-            print(f"The directory '{self.cache_path}' does not exist. Download models...")
-            download_models()
+            print(f"The directory '{self.cache_path}' does not exist.")
 
-        torch.backends.cuda.matmul.allow_tf32 = True
+        torch.cuda.memory._set_allocator_settings("max_split_size_mb:256")
 
         self.pipe = diffusers.StableDiffusionPipeline.from_pretrained(
             self.cache_path,
@@ -119,40 +160,65 @@ class StableDiffusion(ClsMixin):
             subfolder="scheduler",
         )
 
-        if os.environ["USE_VAE"] == "true":
+        vae = config.get("vae")
+        if vae is not None:
             self.pipe.vae = diffusers.AutoencoderKL.from_pretrained(
                 self.cache_path,
                 subfolder="vae",
             )
-
         self.pipe.to("cuda")
 
-        if os.environ["LORA_NAMES"] != "":
-            names = os.environ["LORA_NAMES"].split(",")
-            urls = os.environ["LORA_DOWNLOAD_URLS"].split(",")
-            for name, url in zip(names, urls):
-                path = os.path.join(BASE_CACHE_PATH_LORA, name)
+        loras = config.get("loras")
+        if loras is not None:
+            for lora in loras:
+                path = os.path.join(BASE_CACHE_PATH_LORA, lora["name"])
                 if os.path.exists(path):
                     print(f"The directory '{path}' exists.")
                 else:
                     print(f"The directory '{path}' does not exist. Download it...")
-                    download_files(url, name, BASE_CACHE_PATH_LORA)
+                    download_file(lora["download_url"], lora["name"], BASE_CACHE_PATH_LORA)
                 self.pipe.load_lora_weights(".", weight_name=path)
 
-        if os.environ["TEXTUAL_INVERSION_NAMES"] != "":
-            names = os.environ["TEXTUAL_INVERSION_NAMES"].split(",")
-            urls = os.environ["TEXTUAL_INVERSION_DOWNLOAD_URLS"].split(",")
-            for name, url in zip(names, urls):
-                path = os.path.join(BASE_CACHE_PATH_TEXTUAL_INVERSION, name)
+        textual_inversions = config.get("textual_inversions")
+        if textual_inversions is not None:
+            for textual_inversion in textual_inversions:
+                path = os.path.join(BASE_CACHE_PATH_TEXTUAL_INVERSION, textual_inversion["name"])
                 if os.path.exists(path):
                     print(f"The directory '{path}' exists.")
                 else:
                     print(f"The directory '{path}' does not exist. Download it...")
-                    download_files(url, name, BASE_CACHE_PATH_TEXTUAL_INVERSION)
+                    download_file(
+                        textual_inversion["download_url"],
+                        textual_inversion["name"],
+                        BASE_CACHE_PATH_TEXTUAL_INVERSION,
+                    )
                 self.pipe.load_textual_inversion(path)
 
         self.pipe.enable_xformers_memory_efficient_attention()
 
+        # TODO: Add support for controlnets.
+        # controlnet = diffusers.ControlNetModel.from_pretrained(
+        #     "lllyasviel/control_v11f1e_sd15_tile",
+        #     # "lllyasviel/sd-controlnet-canny",
+        #     # self.cache_path,
+        #     # subfolder="controlnet",
+        #     torch_dtype=torch.float16,
+        # )
+
+        # self.controlnet_pipe = diffusers.StableDiffusionControlNetPipeline.from_pretrained(
+        #     self.cache_path,
+        #     controlnet=controlnet,
+        #     custom_pipeline="lpw_stable_diffusion",
+        #     # custom_pipeline="stable_diffusion_controlnet_img2img",
+        #     scheduler=self.pipe.scheduler,
+        #     vae=self.pipe.vae,
+        #     torch_dtype=torch.float16,
+        # )
+
+        # self.controlnet_pipe.to("cuda")
+
+        # self.controlnet_pipe.enable_xformers_memory_efficient_attention()
+
     @method()
     def count_token(self, p: str, n: str) -> int:
         """
@@ -214,6 +280,22 @@ class StableDiffusion(ClsMixin):
                     generator=generator,
                 ).images
 
+        # for image in base_images:
+        #     image = self.resize_image(image=image, scale_factor=2)
+        #     with torch.inference_mode():
+        #         with torch.autocast("cuda"):
+        #             generatedWithControlnet = self.controlnet_pipe(
+        #                 prompt=prompt * batch_size,
+        #                 negative_prompt=n_prompt * batch_size,
+        #                 num_inference_steps=steps,
+        #                 strength=0.3,
+        #                 guidance_scale=7.5,
+        #                 max_embeddings_multiples=max_embeddings_multiples,
+        #                 generator=generator,
+        #                 image=image,
+        #             ).images
+        # base_images.extend(generatedWithControlnet)
+
         if upscaler != "":
             upscaled = self.upscale(
                 base_images=base_images,
@@ -224,8 +306,8 @@ class StableDiffusion(ClsMixin):
                 use_hires_fix=use_hires_fix,
             )
             base_images.extend(upscaled)
+
             if use_hires_fix:
-                torch.cuda.empty_cache()
                 for img in upscaled:
                     with torch.inference_mode():
                         with torch.autocast("cuda"):
@@ -240,7 +322,6 @@ class StableDiffusion(ClsMixin):
                                 image=img,
                             ).images
                     base_images.extend(hires_fixed)
-                torch.cuda.empty_cache()
 
         image_output = []
         for image in base_images:
@@ -250,6 +331,15 @@ class StableDiffusion(ClsMixin):
 
         return image_output
 
+    @method()
+    def resize_image(self, image: Image.Image, scale_factor: int) -> Image.Image:
+        from PIL import Image
+
+        image = image.convert("RGB")
+        width, height = image.size
+        img = image.resize((width * scale_factor, height * scale_factor), resample=Image.LANCZOS)
+        return img
+
     @method()
     def upscale(
         self,
@@ -263,7 +353,7 @@ class StableDiffusion(ClsMixin):
         use_hires_fix: bool = False,
     ) -> list[Image.Image]:
         """
-        Upscales the given images using the given model.
+        Upscales the given images using a upscaler.
         https://github.com/xinntao/Real-ESRGAN
         """
         import numpy
@@ -312,7 +402,6 @@ class StableDiffusion(ClsMixin):
                 bg_upsampler=upsampler,
             )
 
-        torch.cuda.empty_cache()
         upscaled_imgs = []
         with tqdm(total=len(base_images)) as progress_bar:
             for img in base_images:
@@ -330,6 +419,4 @@ class StableDiffusion(ClsMixin):
                 upscaled_imgs.append(Image.fromarray(enhance_result))
                 progress_bar.update(1)
 
-        torch.cuda.empty_cache()
-
         return upscaled_imgs