Merge pull request #19 from hodanov/feature/refactoring

Refactoring
This commit is contained in:
hodanov 2023-07-02 23:20:15 +09:00 committed by GitHub
commit 0a8060cc85
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 231 additions and 98 deletions

View File

@ -1,22 +1,3 @@
# `HUGGING_FACE_TOKEN` is the token for the Hugging Face API.
# The token can be found at https://huggingface.co/settings/token.
HUGGING_FACE_TOKEN=""
MODEL_REPO_ID="stabilityai/stable-diffusion-2-1"
MODEL_NAME="stable-diffusion-2-1"
# Modify `USE_VAE` to `true` if you want to use VAE.
USE_VAE="false"
# Add LoRA if you want to use one. You can use a download link of civitai.
# ex)
# - `LORA_NAMES="hogehoge.safetensors"`
# - `LORA_DOWNLOAD_URLS="https://civitai.com/api/download/models/xxxxxx"`
#
# If you have multiple LoRAs you want to use, separate by commas like the below:
# ex)
# - `LORA_NAMES="hogehoge.safetensors,mogumogu.safetensors"`
# - `LORA_DOWNLOAD_URLS="https://civitai.com/api/download/models/xxxxxx,https://civitai.com/api/download/models/xxxxxx"`
LORA_NAMES=""
LORA_DOWNLOAD_URLS=""
# Add Textual Inversion you wan to use. Usage is the same as `LORA_NAMES` and `LORA_DOWNLOAD_URLS`.
TEXTUAL_INVERSION_NAMES=""
TEXTUAL_INVERSION_DOWNLOAD_URLS=""

9
.gitignore vendored
View File

@ -1,5 +1,8 @@
.DS_Store
.mypy_cache/
__pycache__/
outputs/
.env
.mypy_cache/
.python-version
__pycache__/
config.yml
memo.md
outputs/

View File

@ -1,5 +1,5 @@
deploy:
modal deploy setup.py
modal deploy ./setup_files/setup.py
# `--upscaler` is a name of upscaler you want to use.
# You can use upscalers the below:

View File

@ -29,13 +29,33 @@ Please see [the documentation of Modal](https://modal.com/docs/guide) for modals
To use the script, execute the below.
1. git clone the repository.
2. Create the `.env` file and set a huggingface API token and a model with reference to `.env.example`.
3. Open the Makefile and set prompts.
4. Execute `make deploy` command. An application will be deployed to Modal by the command.
5. Execute `make run` command.
2. Create the `./setup_files/.env` file and set a huggingface API token with reference to `./setup_files/.env.example`.
3. Copy `./setup_files/config.sample.yml` to `./setup_files/config.yml`
4. Open the Makefile and set prompts.
5. Execute `make deploy` command. An application will be deployed to Modal.
6. Execute `make run` command.
Images are generated and output to the `outputs/` directory.
## Directory structure
```
.
├── .env # Secrets manager
├── Makefile
├── README.md
├── sdcli/ # A directory with scripts to run inference.
│   ├── __init__.py
│   ├── outputs/ # Images are outputted this directory.
│   ├── txt2img.py # A script to run txt2img inference.
│   └── util.py
└── setup_files/ # A directory with config files.
├── Dockerfile # To build a base image.
├── config.yml # To set a model, vae and some tools.
├── requirements.txt
└── setup.py # Build an application to deploy on Modal.
```
Thank you.
## Author

View File

@ -1,7 +1,9 @@
FROM python:3.11.3-slim-bullseye
COPY requirements.txt /
COPY ./requirements.txt /
RUN apt update \
&& apt install -y wget git libgl1-mesa-glx libglib2.0-0 \
&& apt autoremove -y \
&& apt clean -y \
&& pip install -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu117 \
&& mkdir -p /vol/cache/esrgan \
&& wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth -P /vol/cache/esrgan \

View File

@ -0,0 +1,38 @@
##########
# This is the config file to set a base model, vae and some tools.
# Rename the file to `config.yml` before running the script.
# Execute `modal deploy ./setup_files/setup.py` every time modify this file.
##########
##########
# You can use a diffusers model and VAE on hugging face.
model:
name: stable-diffusion-2-1
repo_id: stabilityai/stable-diffusion-2-1
vae:
name: sd-vae-ft-mse
repo_id: stabilityai/sd-vae-ft-mse
##########
# Add LoRA if you want to use one. You can use a download url such as the below.
# ex)
# loras:
# - name: hogehoge.safetensors
# download_url: https://hogehoge/xxxx
# - name: fugafuga.safetensors
# download_url: https://fugafuga/xxxx
##########
# You can use Textual Inversion and ControlNet also. Usage is the same as `loras`.
# ex)
# textual_inversions:
# - name: hogehoge
# download_url: https://hogehoge/xxxx
# - name: fugafuga
# download_url: https://fugafuga/xxxx
# cotrolnets:
# - name: control_v11f1e_sd15_tile
# repo_id: lllyasviel/control_v11f1e_sd15_tile
# upscaler:
# name: RealESRGAN_x2plus
# use_face_enhancer: false
# use_hires_fix: false

View File

@ -15,3 +15,6 @@ opencv-python
Pillow
torchvision
tqdm
controlnet_aux
pyyaml

View File

@ -4,52 +4,63 @@ import io
import os
from urllib.request import Request, urlopen
import diffusers
import yaml
from modal import Image, Mount, Secret, Stub, method
from modal.cls import ClsMixin
BASE_CACHE_PATH = "/vol/cache"
BASE_CACHE_PATH_LORA = "/vol/cache/lora"
BASE_CACHE_PATH_TEXTUAL_INVERSION = "/vol/cache/textual_inversion"
BASE_CACHE_PATH_CONTROLNET = "/vol/cache/controlnet"
def download_files(urls, file_names, file_path):
def download_file(url, file_name, file_path):
"""
Download files.
"""
file_names = file_names.split(",")
urls = urls.split(",")
for file_name, url in zip(file_names, urls):
req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
downloaded = urlopen(req).read()
dir_names = os.path.join(file_path, file_name)
os.makedirs(os.path.dirname(dir_names), exist_ok=True)
with open(dir_names, mode="wb") as f:
f.write(downloaded)
req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
downloaded = urlopen(req).read()
dir_names = os.path.join(file_path, file_name)
os.makedirs(os.path.dirname(dir_names), exist_ok=True)
with open(dir_names, mode="wb") as f:
f.write(downloaded)
def download_models():
def download_controlnet(name: str, repo_id: str, token: str):
"""
Downloads the model from Hugging Face and saves it to the cache path using
diffusers.StableDiffusionPipeline.from_pretrained().
Download a controlnet.
"""
import diffusers
cache_path = os.path.join(BASE_CACHE_PATH_CONTROLNET, name)
controlnet = diffusers.ControlNetModel.from_pretrained(
repo_id,
use_auth_token=token,
cache_dir=cache_path,
)
controlnet.save_pretrained(cache_path, safe_serialization=True)
hugging_face_token = os.environ["HUGGING_FACE_TOKEN"]
model_repo_id = os.environ["MODEL_REPO_ID"]
cache_path = os.path.join(BASE_CACHE_PATH, os.environ["MODEL_NAME"])
def download_vae(name: str, repo_id: str, token: str):
"""
Download a vae.
"""
cache_path = os.path.join(BASE_CACHE_PATH, name)
vae = diffusers.AutoencoderKL.from_pretrained(
"stabilityai/sd-vae-ft-mse",
use_auth_token=hugging_face_token,
repo_id,
use_auth_token=token,
cache_dir=cache_path,
)
vae.save_pretrained(cache_path, safe_serialization=True)
def download_model(name: str, repo_id: str, token: str):
"""
Download a model.
"""
cache_path = os.path.join(BASE_CACHE_PATH, name)
pipe = diffusers.StableDiffusionPipeline.from_pretrained(
model_repo_id,
use_auth_token=hugging_face_token,
repo_id,
use_auth_token=token,
cache_dir=cache_path,
)
pipe.save_pretrained(cache_path, safe_serialization=True)
@ -59,52 +70,82 @@ def build_image():
"""
Build the Docker image.
"""
download_models()
token = os.environ["HUGGING_FACE_TOKEN"]
config = {}
with open("/config.yml", "r") as file:
config = yaml.safe_load(file)
if os.environ["LORA_NAMES"] != "":
download_files(
os.getenv("LORA_DOWNLOAD_URLS"),
os.getenv("LORA_NAMES"),
BASE_CACHE_PATH_LORA,
)
model = config.get("model")
if model is not None:
download_model(name=model["name"], repo_id=model["repo_id"], token=token)
if os.environ["TEXTUAL_INVERSION_NAMES"] != "":
download_files(
os.getenv("TEXTUAL_INVERSION_DOWNLOAD_URLS"),
os.getenv("TEXTUAL_INVERSION_NAMES"),
BASE_CACHE_PATH_TEXTUAL_INVERSION,
)
vae = config.get("vae")
if vae is not None:
download_vae(name=model["name"], repo_id=vae["repo_id"], token=token)
controlnets = config.get("controlnets")
if controlnets is not None:
for controlnet in controlnets:
download_controlnet(name=controlnet["name"], repo_id=controlnet["repo_id"], token=token)
loras = config.get("loras")
if loras is not None:
for lora in loras:
download_file(
url=lora["download_url"],
file_name=lora["name"],
file_path=BASE_CACHE_PATH_LORA,
)
textual_inversions = config.get("textual_inversions")
if textual_inversions is not None:
for textual_inversion in textual_inversions:
download_file(
url=textual_inversion["download_url"],
file_name=textual_inversion["name"],
file_path=BASE_CACHE_PATH_TEXTUAL_INVERSION,
)
stub_image = Image.from_dockerfile(
path="./Dockerfile",
context_mount=Mount.from_local_file("./requirements.txt"),
stub = Stub("stable-diffusion-cli")
base_stub = Image.from_dockerfile(
path="./setup_files/Dockerfile",
context_mount=Mount.from_local_file("./setup_files/requirements.txt"),
)
stub.image = base_stub.extend(
dockerfile_commands=[
"FROM base",
"COPY ./config.yml /",
],
context_mount=Mount.from_local_file("./setup_files/config.yml"),
).run_function(
build_image,
secrets=[Secret.from_dotenv(__file__)],
)
stub = Stub("stable-diffusion-cli")
stub.image = stub_image
@stub.cls(gpu="A10G", secrets=[Secret.from_dotenv(__file__)])
@stub.cls(
gpu="A10G",
secrets=[Secret.from_dotenv(__file__)],
)
class StableDiffusion(ClsMixin):
"""
A class that wraps the Stable Diffusion pipeline and scheduler.
"""
def __enter__(self):
import diffusers
import torch
self.cache_path = os.path.join(BASE_CACHE_PATH, os.environ["MODEL_NAME"])
config = {}
with open("/config.yml", "r") as file:
config = yaml.safe_load(file)
self.cache_path = os.path.join(BASE_CACHE_PATH, config["model"]["name"])
if os.path.exists(self.cache_path):
print(f"The directory '{self.cache_path}' exists.")
else:
print(f"The directory '{self.cache_path}' does not exist. Download models...")
download_models()
print(f"The directory '{self.cache_path}' does not exist.")
torch.backends.cuda.matmul.allow_tf32 = True
torch.cuda.memory._set_allocator_settings("max_split_size_mb:256")
self.pipe = diffusers.StableDiffusionPipeline.from_pretrained(
self.cache_path,
@ -119,40 +160,65 @@ class StableDiffusion(ClsMixin):
subfolder="scheduler",
)
if os.environ["USE_VAE"] == "true":
vae = config.get("vae")
if vae is not None:
self.pipe.vae = diffusers.AutoencoderKL.from_pretrained(
self.cache_path,
subfolder="vae",
)
self.pipe.to("cuda")
if os.environ["LORA_NAMES"] != "":
names = os.environ["LORA_NAMES"].split(",")
urls = os.environ["LORA_DOWNLOAD_URLS"].split(",")
for name, url in zip(names, urls):
path = os.path.join(BASE_CACHE_PATH_LORA, name)
loras = config.get("loras")
if loras is not None:
for lora in loras:
path = os.path.join(BASE_CACHE_PATH_LORA, lora["name"])
if os.path.exists(path):
print(f"The directory '{path}' exists.")
else:
print(f"The directory '{path}' does not exist. Download it...")
download_files(url, name, BASE_CACHE_PATH_LORA)
download_file(lora["download_url"], lora["name"], BASE_CACHE_PATH_LORA)
self.pipe.load_lora_weights(".", weight_name=path)
if os.environ["TEXTUAL_INVERSION_NAMES"] != "":
names = os.environ["TEXTUAL_INVERSION_NAMES"].split(",")
urls = os.environ["TEXTUAL_INVERSION_DOWNLOAD_URLS"].split(",")
for name, url in zip(names, urls):
path = os.path.join(BASE_CACHE_PATH_TEXTUAL_INVERSION, name)
textual_inversions = config.get("textual_inversions")
if textual_inversions is not None:
for textual_inversion in textual_inversions:
path = os.path.join(BASE_CACHE_PATH_TEXTUAL_INVERSION, textual_inversion["name"])
if os.path.exists(path):
print(f"The directory '{path}' exists.")
else:
print(f"The directory '{path}' does not exist. Download it...")
download_files(url, name, BASE_CACHE_PATH_TEXTUAL_INVERSION)
download_file(
textual_inversion["download_url"],
textual_inversion["name"],
BASE_CACHE_PATH_TEXTUAL_INVERSION,
)
self.pipe.load_textual_inversion(path)
self.pipe.enable_xformers_memory_efficient_attention()
# TODO: Add support for controlnets.
# controlnet = diffusers.ControlNetModel.from_pretrained(
# "lllyasviel/control_v11f1e_sd15_tile",
# # "lllyasviel/sd-controlnet-canny",
# # self.cache_path,
# # subfolder="controlnet",
# torch_dtype=torch.float16,
# )
# self.controlnet_pipe = diffusers.StableDiffusionControlNetPipeline.from_pretrained(
# self.cache_path,
# controlnet=controlnet,
# custom_pipeline="lpw_stable_diffusion",
# # custom_pipeline="stable_diffusion_controlnet_img2img",
# scheduler=self.pipe.scheduler,
# vae=self.pipe.vae,
# torch_dtype=torch.float16,
# )
# self.controlnet_pipe.to("cuda")
# self.controlnet_pipe.enable_xformers_memory_efficient_attention()
@method()
def count_token(self, p: str, n: str) -> int:
"""
@ -214,6 +280,22 @@ class StableDiffusion(ClsMixin):
generator=generator,
).images
# for image in base_images:
# image = self.resize_image(image=image, scale_factor=2)
# with torch.inference_mode():
# with torch.autocast("cuda"):
# generatedWithControlnet = self.controlnet_pipe(
# prompt=prompt * batch_size,
# negative_prompt=n_prompt * batch_size,
# num_inference_steps=steps,
# strength=0.3,
# guidance_scale=7.5,
# max_embeddings_multiples=max_embeddings_multiples,
# generator=generator,
# image=image,
# ).images
# base_images.extend(generatedWithControlnet)
if upscaler != "":
upscaled = self.upscale(
base_images=base_images,
@ -224,8 +306,8 @@ class StableDiffusion(ClsMixin):
use_hires_fix=use_hires_fix,
)
base_images.extend(upscaled)
if use_hires_fix:
torch.cuda.empty_cache()
for img in upscaled:
with torch.inference_mode():
with torch.autocast("cuda"):
@ -240,7 +322,6 @@ class StableDiffusion(ClsMixin):
image=img,
).images
base_images.extend(hires_fixed)
torch.cuda.empty_cache()
image_output = []
for image in base_images:
@ -250,6 +331,15 @@ class StableDiffusion(ClsMixin):
return image_output
@method()
def resize_image(self, image: Image.Image, scale_factor: int) -> Image.Image:
from PIL import Image
image = image.convert("RGB")
width, height = image.size
img = image.resize((width * scale_factor, height * scale_factor), resample=Image.LANCZOS)
return img
@method()
def upscale(
self,
@ -263,11 +353,10 @@ class StableDiffusion(ClsMixin):
use_hires_fix: bool = False,
) -> list[Image.Image]:
"""
Upscales the given images using the given model.
Upscales the given images using a upscaler.
https://github.com/xinntao/Real-ESRGAN
"""
import numpy
import torch
from basicsr.archs.rrdbnet_arch import RRDBNet
from PIL import Image
from realesrgan import RealESRGANer
@ -312,7 +401,6 @@ class StableDiffusion(ClsMixin):
bg_upsampler=upsampler,
)
torch.cuda.empty_cache()
upscaled_imgs = []
with tqdm(total=len(base_images)) as progress_bar:
for img in base_images:
@ -330,6 +418,4 @@ class StableDiffusion(ClsMixin):
upscaled_imgs.append(Image.fromarray(enhance_result))
progress_bar.update(1)
torch.cuda.empty_cache()
return upscaled_imgs