Refactoring.

This commit is contained in:
hodanov 2023-07-02 22:44:32 +09:00
parent 11e2a6b790
commit d810577f3b
7 changed files with 202 additions and 91 deletions

View File

@ -1,22 +0,0 @@
HUGGING_FACE_TOKEN=""
MODEL_REPO_ID="stabilityai/stable-diffusion-2-1"
MODEL_NAME="stable-diffusion-2-1"
# Modify `USE_VAE` to `true` if you want to use VAE.
USE_VAE="false"
# Add LoRA if you want to use one. You can use a download link of civitai.
# ex)
# - `LORA_NAMES="hogehoge.safetensors"`
# - `LORA_DOWNLOAD_URLS="https://civitai.com/api/download/models/xxxxxx"`
#
# If you have multiple LoRAs you want to use, separate by commas like the below:
# ex)
# - `LORA_NAMES="hogehoge.safetensors,mogumogu.safetensors"`
# - `LORA_DOWNLOAD_URLS="https://civitai.com/api/download/models/xxxxxx,https://civitai.com/api/download/models/xxxxxx"`
LORA_NAMES=""
LORA_DOWNLOAD_URLS=""
# Add Textual Inversion you wan to use. Usage is the same as `LORA_NAMES` and `LORA_DOWNLOAD_URLS`.
TEXTUAL_INVERSION_NAMES=""
TEXTUAL_INVERSION_DOWNLOAD_URLS=""

View File

@ -1,5 +1,5 @@
deploy: deploy:
modal deploy setup.py modal deploy ./setup_files/setup.py
# `--upscaler` is a name of upscaler you want to use. # `--upscaler` is a name of upscaler you want to use.
# You can use upscalers the below: # You can use upscalers the below:

3
setup_files/.env.example Normal file
View File

@ -0,0 +1,3 @@
# `HUGGING_FACE_TOKEN` is the token for the Hugging Face API.
# The token can be found at https://huggingface.co/settings/token.
HUGGING_FACE_TOKEN=""

View File

@ -1,7 +1,9 @@
FROM python:3.11.3-slim-bullseye FROM python:3.11.3-slim-bullseye
COPY requirements.txt / COPY ./requirements.txt /
RUN apt update \ RUN apt update \
&& apt install -y wget git libgl1-mesa-glx libglib2.0-0 \ && apt install -y wget git libgl1-mesa-glx libglib2.0-0 \
&& apt autoremove -y \
&& apt clean -y \
&& pip install -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu117 \ && pip install -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu117 \
&& mkdir -p /vol/cache/esrgan \ && mkdir -p /vol/cache/esrgan \
&& wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth -P /vol/cache/esrgan \ && wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth -P /vol/cache/esrgan \

View File

@ -0,0 +1,38 @@
##########
# This is the config file to set a base model, vae and some tools.
# Rename the file to `config.yml` before running the script.
# Execute `modal deploy ./setup_files/setup.py` every time modify this file.
##########
##########
# You can use a diffusers model and VAE on hugging face.
model:
name: stable-diffusion-2-1
repo_id: stabilityai/stable-diffusion-2-1
vae:
name: sd-vae-ft-mse
repo_id: stabilityai/sd-vae-ft-mse
##########
# Add LoRA if you want to use one. You can use a download url such as the below.
# ex)
# loras:
# - name: hogehoge.safetensors
# download_url: https://hogehoge/xxxx
# - name: fugafuga.safetensors
# download_url: https://fugafuga/xxxx
##########
# You can use Textual Inversion and ControlNet also. Usage is the same as `loras`.
# ex)
# textual_inversions:
# - name: hogehoge
# download_url: https://hogehoge/xxxx
# - name: fugafuga
# download_url: https://fugafuga/xxxx
# cotrolnets:
# - name: control_v11f1e_sd15_tile
# repo_id: lllyasviel/control_v11f1e_sd15_tile
# upscaler:
# name: RealESRGAN_x2plus
# use_face_enhancer: false
# use_hires_fix: false

View File

@ -15,3 +15,6 @@ opencv-python
Pillow Pillow
torchvision torchvision
tqdm tqdm
controlnet_aux
pyyaml

View File

@ -4,52 +4,63 @@ import io
import os import os
from urllib.request import Request, urlopen from urllib.request import Request, urlopen
import diffusers
import yaml
from modal import Image, Mount, Secret, Stub, method from modal import Image, Mount, Secret, Stub, method
from modal.cls import ClsMixin from modal.cls import ClsMixin
BASE_CACHE_PATH = "/vol/cache" BASE_CACHE_PATH = "/vol/cache"
BASE_CACHE_PATH_LORA = "/vol/cache/lora" BASE_CACHE_PATH_LORA = "/vol/cache/lora"
BASE_CACHE_PATH_TEXTUAL_INVERSION = "/vol/cache/textual_inversion" BASE_CACHE_PATH_TEXTUAL_INVERSION = "/vol/cache/textual_inversion"
BASE_CACHE_PATH_CONTROLNET = "/vol/cache/controlnet"
def download_files(urls, file_names, file_path): def download_file(url, file_name, file_path):
""" """
Download files. Download files.
""" """
file_names = file_names.split(",") req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
urls = urls.split(",") downloaded = urlopen(req).read()
dir_names = os.path.join(file_path, file_name)
for file_name, url in zip(file_names, urls): os.makedirs(os.path.dirname(dir_names), exist_ok=True)
req = Request(url, headers={"User-Agent": "Mozilla/5.0"}) with open(dir_names, mode="wb") as f:
downloaded = urlopen(req).read() f.write(downloaded)
dir_names = os.path.join(file_path, file_name)
os.makedirs(os.path.dirname(dir_names), exist_ok=True)
with open(dir_names, mode="wb") as f:
f.write(downloaded)
def download_models(): def download_controlnet(name: str, repo_id: str, token: str):
""" """
Downloads the model from Hugging Face and saves it to the cache path using Download a controlnet.
diffusers.StableDiffusionPipeline.from_pretrained().
""" """
import diffusers cache_path = os.path.join(BASE_CACHE_PATH_CONTROLNET, name)
controlnet = diffusers.ControlNetModel.from_pretrained(
repo_id,
use_auth_token=token,
cache_dir=cache_path,
)
controlnet.save_pretrained(cache_path, safe_serialization=True)
hugging_face_token = os.environ["HUGGING_FACE_TOKEN"]
model_repo_id = os.environ["MODEL_REPO_ID"]
cache_path = os.path.join(BASE_CACHE_PATH, os.environ["MODEL_NAME"])
def download_vae(name: str, repo_id: str, token: str):
"""
Download a vae.
"""
cache_path = os.path.join(BASE_CACHE_PATH, name)
vae = diffusers.AutoencoderKL.from_pretrained( vae = diffusers.AutoencoderKL.from_pretrained(
"stabilityai/sd-vae-ft-mse", repo_id,
use_auth_token=hugging_face_token, use_auth_token=token,
cache_dir=cache_path, cache_dir=cache_path,
) )
vae.save_pretrained(cache_path, safe_serialization=True) vae.save_pretrained(cache_path, safe_serialization=True)
def download_model(name: str, repo_id: str, token: str):
"""
Download a model.
"""
cache_path = os.path.join(BASE_CACHE_PATH, name)
pipe = diffusers.StableDiffusionPipeline.from_pretrained( pipe = diffusers.StableDiffusionPipeline.from_pretrained(
model_repo_id, repo_id,
use_auth_token=hugging_face_token, use_auth_token=token,
cache_dir=cache_path, cache_dir=cache_path,
) )
pipe.save_pretrained(cache_path, safe_serialization=True) pipe.save_pretrained(cache_path, safe_serialization=True)
@ -59,52 +70,82 @@ def build_image():
""" """
Build the Docker image. Build the Docker image.
""" """
download_models() token = os.environ["HUGGING_FACE_TOKEN"]
config = {}
with open("/config.yml", "r") as file:
config = yaml.safe_load(file)
if os.environ["LORA_NAMES"] != "": model = config.get("model")
download_files( if model is not None:
os.getenv("LORA_DOWNLOAD_URLS"), download_model(name=model["name"], repo_id=model["repo_id"], token=token)
os.getenv("LORA_NAMES"),
BASE_CACHE_PATH_LORA,
)
if os.environ["TEXTUAL_INVERSION_NAMES"] != "": vae = config.get("vae")
download_files( if vae is not None:
os.getenv("TEXTUAL_INVERSION_DOWNLOAD_URLS"), download_vae(name=model["name"], repo_id=vae["repo_id"], token=token)
os.getenv("TEXTUAL_INVERSION_NAMES"),
BASE_CACHE_PATH_TEXTUAL_INVERSION, controlnets = config.get("controlnets")
) if controlnets is not None:
for controlnet in controlnets:
download_controlnet(name=controlnet["name"], repo_id=controlnet["repo_id"], token=token)
loras = config.get("loras")
if loras is not None:
for lora in loras:
download_file(
url=lora["download_url"],
file_name=lora["name"],
file_path=BASE_CACHE_PATH_LORA,
)
textual_inversions = config.get("textual_inversions")
if textual_inversions is not None:
for textual_inversion in textual_inversions:
download_file(
url=textual_inversion["download_url"],
file_name=textual_inversion["name"],
file_path=BASE_CACHE_PATH_TEXTUAL_INVERSION,
)
stub_image = Image.from_dockerfile( stub = Stub("stable-diffusion-cli")
path="./Dockerfile", base_stub = Image.from_dockerfile(
context_mount=Mount.from_local_file("./requirements.txt"), path="./setup_files/Dockerfile",
context_mount=Mount.from_local_file("./setup_files/requirements.txt"),
)
stub.image = base_stub.extend(
dockerfile_commands=[
"FROM base",
"COPY ./config.yml /",
],
context_mount=Mount.from_local_file("./setup_files/config.yml"),
).run_function( ).run_function(
build_image, build_image,
secrets=[Secret.from_dotenv(__file__)], secrets=[Secret.from_dotenv(__file__)],
) )
stub = Stub("stable-diffusion-cli")
stub.image = stub_image
@stub.cls(gpu="A10G", secrets=[Secret.from_dotenv(__file__)]) @stub.cls(
gpu="A10G",
secrets=[Secret.from_dotenv(__file__)],
)
class StableDiffusion(ClsMixin): class StableDiffusion(ClsMixin):
""" """
A class that wraps the Stable Diffusion pipeline and scheduler. A class that wraps the Stable Diffusion pipeline and scheduler.
""" """
def __enter__(self): def __enter__(self):
import diffusers
import torch import torch
self.cache_path = os.path.join(BASE_CACHE_PATH, os.environ["MODEL_NAME"]) config = {}
with open("/config.yml", "r") as file:
config = yaml.safe_load(file)
self.cache_path = os.path.join(BASE_CACHE_PATH, config["model"]["name"])
if os.path.exists(self.cache_path): if os.path.exists(self.cache_path):
print(f"The directory '{self.cache_path}' exists.") print(f"The directory '{self.cache_path}' exists.")
else: else:
print(f"The directory '{self.cache_path}' does not exist. Download models...") print(f"The directory '{self.cache_path}' does not exist.")
download_models()
torch.backends.cuda.matmul.allow_tf32 = True torch.cuda.memory._set_allocator_settings("max_split_size_mb:256")
self.pipe = diffusers.StableDiffusionPipeline.from_pretrained( self.pipe = diffusers.StableDiffusionPipeline.from_pretrained(
self.cache_path, self.cache_path,
@ -119,40 +160,65 @@ class StableDiffusion(ClsMixin):
subfolder="scheduler", subfolder="scheduler",
) )
if os.environ["USE_VAE"] == "true": vae = config.get("vae")
if vae is not None:
self.pipe.vae = diffusers.AutoencoderKL.from_pretrained( self.pipe.vae = diffusers.AutoencoderKL.from_pretrained(
self.cache_path, self.cache_path,
subfolder="vae", subfolder="vae",
) )
self.pipe.to("cuda") self.pipe.to("cuda")
if os.environ["LORA_NAMES"] != "": loras = config.get("loras")
names = os.environ["LORA_NAMES"].split(",") if loras is not None:
urls = os.environ["LORA_DOWNLOAD_URLS"].split(",") for lora in loras:
for name, url in zip(names, urls): path = os.path.join(BASE_CACHE_PATH_LORA, lora["name"])
path = os.path.join(BASE_CACHE_PATH_LORA, name)
if os.path.exists(path): if os.path.exists(path):
print(f"The directory '{path}' exists.") print(f"The directory '{path}' exists.")
else: else:
print(f"The directory '{path}' does not exist. Download it...") print(f"The directory '{path}' does not exist. Download it...")
download_files(url, name, BASE_CACHE_PATH_LORA) download_file(lora["download_url"], lora["name"], BASE_CACHE_PATH_LORA)
self.pipe.load_lora_weights(".", weight_name=path) self.pipe.load_lora_weights(".", weight_name=path)
if os.environ["TEXTUAL_INVERSION_NAMES"] != "": textual_inversions = config.get("textual_inversions")
names = os.environ["TEXTUAL_INVERSION_NAMES"].split(",") if textual_inversions is not None:
urls = os.environ["TEXTUAL_INVERSION_DOWNLOAD_URLS"].split(",") for textual_inversion in textual_inversions:
for name, url in zip(names, urls): path = os.path.join(BASE_CACHE_PATH_TEXTUAL_INVERSION, textual_inversion["name"])
path = os.path.join(BASE_CACHE_PATH_TEXTUAL_INVERSION, name)
if os.path.exists(path): if os.path.exists(path):
print(f"The directory '{path}' exists.") print(f"The directory '{path}' exists.")
else: else:
print(f"The directory '{path}' does not exist. Download it...") print(f"The directory '{path}' does not exist. Download it...")
download_files(url, name, BASE_CACHE_PATH_TEXTUAL_INVERSION) download_file(
textual_inversion["download_url"],
textual_inversion["name"],
BASE_CACHE_PATH_TEXTUAL_INVERSION,
)
self.pipe.load_textual_inversion(path) self.pipe.load_textual_inversion(path)
self.pipe.enable_xformers_memory_efficient_attention() self.pipe.enable_xformers_memory_efficient_attention()
# TODO: Add support for controlnets.
# controlnet = diffusers.ControlNetModel.from_pretrained(
# "lllyasviel/control_v11f1e_sd15_tile",
# # "lllyasviel/sd-controlnet-canny",
# # self.cache_path,
# # subfolder="controlnet",
# torch_dtype=torch.float16,
# )
# self.controlnet_pipe = diffusers.StableDiffusionControlNetPipeline.from_pretrained(
# self.cache_path,
# controlnet=controlnet,
# custom_pipeline="lpw_stable_diffusion",
# # custom_pipeline="stable_diffusion_controlnet_img2img",
# scheduler=self.pipe.scheduler,
# vae=self.pipe.vae,
# torch_dtype=torch.float16,
# )
# self.controlnet_pipe.to("cuda")
# self.controlnet_pipe.enable_xformers_memory_efficient_attention()
@method() @method()
def count_token(self, p: str, n: str) -> int: def count_token(self, p: str, n: str) -> int:
""" """
@ -214,6 +280,22 @@ class StableDiffusion(ClsMixin):
generator=generator, generator=generator,
).images ).images
# for image in base_images:
# image = self.resize_image(image=image, scale_factor=2)
# with torch.inference_mode():
# with torch.autocast("cuda"):
# generatedWithControlnet = self.controlnet_pipe(
# prompt=prompt * batch_size,
# negative_prompt=n_prompt * batch_size,
# num_inference_steps=steps,
# strength=0.3,
# guidance_scale=7.5,
# max_embeddings_multiples=max_embeddings_multiples,
# generator=generator,
# image=image,
# ).images
# base_images.extend(generatedWithControlnet)
if upscaler != "": if upscaler != "":
upscaled = self.upscale( upscaled = self.upscale(
base_images=base_images, base_images=base_images,
@ -224,8 +306,8 @@ class StableDiffusion(ClsMixin):
use_hires_fix=use_hires_fix, use_hires_fix=use_hires_fix,
) )
base_images.extend(upscaled) base_images.extend(upscaled)
if use_hires_fix: if use_hires_fix:
torch.cuda.empty_cache()
for img in upscaled: for img in upscaled:
with torch.inference_mode(): with torch.inference_mode():
with torch.autocast("cuda"): with torch.autocast("cuda"):
@ -240,7 +322,6 @@ class StableDiffusion(ClsMixin):
image=img, image=img,
).images ).images
base_images.extend(hires_fixed) base_images.extend(hires_fixed)
torch.cuda.empty_cache()
image_output = [] image_output = []
for image in base_images: for image in base_images:
@ -250,6 +331,15 @@ class StableDiffusion(ClsMixin):
return image_output return image_output
@method()
def resize_image(self, image: Image.Image, scale_factor: int) -> Image.Image:
from PIL import Image
image = image.convert("RGB")
width, height = image.size
img = image.resize((width * scale_factor, height * scale_factor), resample=Image.LANCZOS)
return img
@method() @method()
def upscale( def upscale(
self, self,
@ -263,7 +353,7 @@ class StableDiffusion(ClsMixin):
use_hires_fix: bool = False, use_hires_fix: bool = False,
) -> list[Image.Image]: ) -> list[Image.Image]:
""" """
Upscales the given images using the given model. Upscales the given images using a upscaler.
https://github.com/xinntao/Real-ESRGAN https://github.com/xinntao/Real-ESRGAN
""" """
import numpy import numpy
@ -312,7 +402,6 @@ class StableDiffusion(ClsMixin):
bg_upsampler=upsampler, bg_upsampler=upsampler,
) )
torch.cuda.empty_cache()
upscaled_imgs = [] upscaled_imgs = []
with tqdm(total=len(base_images)) as progress_bar: with tqdm(total=len(base_images)) as progress_bar:
for img in base_images: for img in base_images:
@ -330,6 +419,4 @@ class StableDiffusion(ClsMixin):
upscaled_imgs.append(Image.fromarray(enhance_result)) upscaled_imgs.append(Image.fromarray(enhance_result))
progress_bar.update(1) progress_bar.update(1)
torch.cuda.empty_cache()
return upscaled_imgs return upscaled_imgs