commit
0a8060cc85
23
.env.example
23
.env.example
@ -1,22 +1,3 @@
|
||||
# `HUGGING_FACE_TOKEN` is the token for the Hugging Face API.
|
||||
# The token can be found at https://huggingface.co/settings/token.
|
||||
HUGGING_FACE_TOKEN=""
|
||||
MODEL_REPO_ID="stabilityai/stable-diffusion-2-1"
|
||||
MODEL_NAME="stable-diffusion-2-1"
|
||||
|
||||
# Modify `USE_VAE` to `true` if you want to use VAE.
|
||||
USE_VAE="false"
|
||||
|
||||
# Add LoRA if you want to use one. You can use a download link of civitai.
|
||||
# ex)
|
||||
# - `LORA_NAMES="hogehoge.safetensors"`
|
||||
# - `LORA_DOWNLOAD_URLS="https://civitai.com/api/download/models/xxxxxx"`
|
||||
#
|
||||
# If you have multiple LoRAs you want to use, separate by commas like the below:
|
||||
# ex)
|
||||
# - `LORA_NAMES="hogehoge.safetensors,mogumogu.safetensors"`
|
||||
# - `LORA_DOWNLOAD_URLS="https://civitai.com/api/download/models/xxxxxx,https://civitai.com/api/download/models/xxxxxx"`
|
||||
LORA_NAMES=""
|
||||
LORA_DOWNLOAD_URLS=""
|
||||
|
||||
# Add Textual Inversion you wan to use. Usage is the same as `LORA_NAMES` and `LORA_DOWNLOAD_URLS`.
|
||||
TEXTUAL_INVERSION_NAMES=""
|
||||
TEXTUAL_INVERSION_DOWNLOAD_URLS=""
|
||||
|
||||
9
.gitignore
vendored
9
.gitignore
vendored
@ -1,5 +1,8 @@
|
||||
.DS_Store
|
||||
.mypy_cache/
|
||||
__pycache__/
|
||||
outputs/
|
||||
.env
|
||||
.mypy_cache/
|
||||
.python-version
|
||||
__pycache__/
|
||||
config.yml
|
||||
memo.md
|
||||
outputs/
|
||||
|
||||
2
Makefile
2
Makefile
@ -1,5 +1,5 @@
|
||||
deploy:
|
||||
modal deploy setup.py
|
||||
modal deploy ./setup_files/setup.py
|
||||
|
||||
# `--upscaler` is a name of upscaler you want to use.
|
||||
# You can use upscalers the below:
|
||||
|
||||
28
README.md
28
README.md
@ -29,13 +29,33 @@ Please see [the documentation of Modal](https://modal.com/docs/guide) for modals
|
||||
To use the script, execute the below.
|
||||
|
||||
1. git clone the repository.
|
||||
2. Create the `.env` file and set a huggingface API token and a model with reference to `.env.example`.
|
||||
3. Open the Makefile and set prompts.
|
||||
4. Execute `make deploy` command. An application will be deployed to Modal by the command.
|
||||
5. Execute `make run` command.
|
||||
2. Create the `./setup_files/.env` file and set a huggingface API token with reference to `./setup_files/.env.example`.
|
||||
3. Copy `./setup_files/config.sample.yml` to `./setup_files/config.yml`
|
||||
4. Open the Makefile and set prompts.
|
||||
5. Execute `make deploy` command. An application will be deployed to Modal.
|
||||
6. Execute `make run` command.
|
||||
|
||||
Images are generated and output to the `outputs/` directory.
|
||||
|
||||
## Directory structure
|
||||
|
||||
```
|
||||
.
|
||||
├── .env # Secrets manager
|
||||
├── Makefile
|
||||
├── README.md
|
||||
├── sdcli/ # A directory with scripts to run inference.
|
||||
│ ├── __init__.py
|
||||
│ ├── outputs/ # Images are outputted this directory.
|
||||
│ ├── txt2img.py # A script to run txt2img inference.
|
||||
│ └── util.py
|
||||
└── setup_files/ # A directory with config files.
|
||||
├── Dockerfile # To build a base image.
|
||||
├── config.yml # To set a model, vae and some tools.
|
||||
├── requirements.txt
|
||||
└── setup.py # Build an application to deploy on Modal.
|
||||
```
|
||||
|
||||
Thank you.
|
||||
|
||||
## Author
|
||||
|
||||
@ -1,7 +1,9 @@
|
||||
FROM python:3.11.3-slim-bullseye
|
||||
COPY requirements.txt /
|
||||
COPY ./requirements.txt /
|
||||
RUN apt update \
|
||||
&& apt install -y wget git libgl1-mesa-glx libglib2.0-0 \
|
||||
&& apt autoremove -y \
|
||||
&& apt clean -y \
|
||||
&& pip install -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu117 \
|
||||
&& mkdir -p /vol/cache/esrgan \
|
||||
&& wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth -P /vol/cache/esrgan \
|
||||
38
setup_files/config.sample.yml
Normal file
38
setup_files/config.sample.yml
Normal file
@ -0,0 +1,38 @@
|
||||
##########
|
||||
# This is the config file to set a base model, vae and some tools.
|
||||
# Rename the file to `config.yml` before running the script.
|
||||
# Execute `modal deploy ./setup_files/setup.py` every time modify this file.
|
||||
##########
|
||||
|
||||
##########
|
||||
# You can use a diffusers model and VAE on hugging face.
|
||||
model:
|
||||
name: stable-diffusion-2-1
|
||||
repo_id: stabilityai/stable-diffusion-2-1
|
||||
vae:
|
||||
name: sd-vae-ft-mse
|
||||
repo_id: stabilityai/sd-vae-ft-mse
|
||||
##########
|
||||
# Add LoRA if you want to use one. You can use a download url such as the below.
|
||||
# ex)
|
||||
# loras:
|
||||
# - name: hogehoge.safetensors
|
||||
# download_url: https://hogehoge/xxxx
|
||||
# - name: fugafuga.safetensors
|
||||
# download_url: https://fugafuga/xxxx
|
||||
|
||||
##########
|
||||
# You can use Textual Inversion and ControlNet also. Usage is the same as `loras`.
|
||||
# ex)
|
||||
# textual_inversions:
|
||||
# - name: hogehoge
|
||||
# download_url: https://hogehoge/xxxx
|
||||
# - name: fugafuga
|
||||
# download_url: https://fugafuga/xxxx
|
||||
# cotrolnets:
|
||||
# - name: control_v11f1e_sd15_tile
|
||||
# repo_id: lllyasviel/control_v11f1e_sd15_tile
|
||||
# upscaler:
|
||||
# name: RealESRGAN_x2plus
|
||||
# use_face_enhancer: false
|
||||
# use_hires_fix: false
|
||||
@ -15,3 +15,6 @@ opencv-python
|
||||
Pillow
|
||||
torchvision
|
||||
tqdm
|
||||
|
||||
controlnet_aux
|
||||
pyyaml
|
||||
@ -4,52 +4,63 @@ import io
|
||||
import os
|
||||
from urllib.request import Request, urlopen
|
||||
|
||||
import diffusers
|
||||
import yaml
|
||||
from modal import Image, Mount, Secret, Stub, method
|
||||
from modal.cls import ClsMixin
|
||||
|
||||
BASE_CACHE_PATH = "/vol/cache"
|
||||
BASE_CACHE_PATH_LORA = "/vol/cache/lora"
|
||||
BASE_CACHE_PATH_TEXTUAL_INVERSION = "/vol/cache/textual_inversion"
|
||||
BASE_CACHE_PATH_CONTROLNET = "/vol/cache/controlnet"
|
||||
|
||||
|
||||
def download_files(urls, file_names, file_path):
|
||||
def download_file(url, file_name, file_path):
|
||||
"""
|
||||
Download files.
|
||||
"""
|
||||
file_names = file_names.split(",")
|
||||
urls = urls.split(",")
|
||||
|
||||
for file_name, url in zip(file_names, urls):
|
||||
req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
|
||||
downloaded = urlopen(req).read()
|
||||
|
||||
dir_names = os.path.join(file_path, file_name)
|
||||
os.makedirs(os.path.dirname(dir_names), exist_ok=True)
|
||||
with open(dir_names, mode="wb") as f:
|
||||
f.write(downloaded)
|
||||
req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
|
||||
downloaded = urlopen(req).read()
|
||||
dir_names = os.path.join(file_path, file_name)
|
||||
os.makedirs(os.path.dirname(dir_names), exist_ok=True)
|
||||
with open(dir_names, mode="wb") as f:
|
||||
f.write(downloaded)
|
||||
|
||||
|
||||
def download_models():
|
||||
def download_controlnet(name: str, repo_id: str, token: str):
|
||||
"""
|
||||
Downloads the model from Hugging Face and saves it to the cache path using
|
||||
diffusers.StableDiffusionPipeline.from_pretrained().
|
||||
Download a controlnet.
|
||||
"""
|
||||
import diffusers
|
||||
cache_path = os.path.join(BASE_CACHE_PATH_CONTROLNET, name)
|
||||
controlnet = diffusers.ControlNetModel.from_pretrained(
|
||||
repo_id,
|
||||
use_auth_token=token,
|
||||
cache_dir=cache_path,
|
||||
)
|
||||
controlnet.save_pretrained(cache_path, safe_serialization=True)
|
||||
|
||||
hugging_face_token = os.environ["HUGGING_FACE_TOKEN"]
|
||||
model_repo_id = os.environ["MODEL_REPO_ID"]
|
||||
cache_path = os.path.join(BASE_CACHE_PATH, os.environ["MODEL_NAME"])
|
||||
|
||||
def download_vae(name: str, repo_id: str, token: str):
|
||||
"""
|
||||
Download a vae.
|
||||
"""
|
||||
cache_path = os.path.join(BASE_CACHE_PATH, name)
|
||||
vae = diffusers.AutoencoderKL.from_pretrained(
|
||||
"stabilityai/sd-vae-ft-mse",
|
||||
use_auth_token=hugging_face_token,
|
||||
repo_id,
|
||||
use_auth_token=token,
|
||||
cache_dir=cache_path,
|
||||
)
|
||||
vae.save_pretrained(cache_path, safe_serialization=True)
|
||||
|
||||
|
||||
def download_model(name: str, repo_id: str, token: str):
|
||||
"""
|
||||
Download a model.
|
||||
"""
|
||||
cache_path = os.path.join(BASE_CACHE_PATH, name)
|
||||
pipe = diffusers.StableDiffusionPipeline.from_pretrained(
|
||||
model_repo_id,
|
||||
use_auth_token=hugging_face_token,
|
||||
repo_id,
|
||||
use_auth_token=token,
|
||||
cache_dir=cache_path,
|
||||
)
|
||||
pipe.save_pretrained(cache_path, safe_serialization=True)
|
||||
@ -59,52 +70,82 @@ def build_image():
|
||||
"""
|
||||
Build the Docker image.
|
||||
"""
|
||||
download_models()
|
||||
token = os.environ["HUGGING_FACE_TOKEN"]
|
||||
config = {}
|
||||
with open("/config.yml", "r") as file:
|
||||
config = yaml.safe_load(file)
|
||||
|
||||
if os.environ["LORA_NAMES"] != "":
|
||||
download_files(
|
||||
os.getenv("LORA_DOWNLOAD_URLS"),
|
||||
os.getenv("LORA_NAMES"),
|
||||
BASE_CACHE_PATH_LORA,
|
||||
)
|
||||
model = config.get("model")
|
||||
if model is not None:
|
||||
download_model(name=model["name"], repo_id=model["repo_id"], token=token)
|
||||
|
||||
if os.environ["TEXTUAL_INVERSION_NAMES"] != "":
|
||||
download_files(
|
||||
os.getenv("TEXTUAL_INVERSION_DOWNLOAD_URLS"),
|
||||
os.getenv("TEXTUAL_INVERSION_NAMES"),
|
||||
BASE_CACHE_PATH_TEXTUAL_INVERSION,
|
||||
)
|
||||
vae = config.get("vae")
|
||||
if vae is not None:
|
||||
download_vae(name=model["name"], repo_id=vae["repo_id"], token=token)
|
||||
|
||||
controlnets = config.get("controlnets")
|
||||
if controlnets is not None:
|
||||
for controlnet in controlnets:
|
||||
download_controlnet(name=controlnet["name"], repo_id=controlnet["repo_id"], token=token)
|
||||
|
||||
loras = config.get("loras")
|
||||
if loras is not None:
|
||||
for lora in loras:
|
||||
download_file(
|
||||
url=lora["download_url"],
|
||||
file_name=lora["name"],
|
||||
file_path=BASE_CACHE_PATH_LORA,
|
||||
)
|
||||
|
||||
textual_inversions = config.get("textual_inversions")
|
||||
if textual_inversions is not None:
|
||||
for textual_inversion in textual_inversions:
|
||||
download_file(
|
||||
url=textual_inversion["download_url"],
|
||||
file_name=textual_inversion["name"],
|
||||
file_path=BASE_CACHE_PATH_TEXTUAL_INVERSION,
|
||||
)
|
||||
|
||||
|
||||
stub_image = Image.from_dockerfile(
|
||||
path="./Dockerfile",
|
||||
context_mount=Mount.from_local_file("./requirements.txt"),
|
||||
stub = Stub("stable-diffusion-cli")
|
||||
base_stub = Image.from_dockerfile(
|
||||
path="./setup_files/Dockerfile",
|
||||
context_mount=Mount.from_local_file("./setup_files/requirements.txt"),
|
||||
)
|
||||
stub.image = base_stub.extend(
|
||||
dockerfile_commands=[
|
||||
"FROM base",
|
||||
"COPY ./config.yml /",
|
||||
],
|
||||
context_mount=Mount.from_local_file("./setup_files/config.yml"),
|
||||
).run_function(
|
||||
build_image,
|
||||
secrets=[Secret.from_dotenv(__file__)],
|
||||
)
|
||||
stub = Stub("stable-diffusion-cli")
|
||||
stub.image = stub_image
|
||||
|
||||
|
||||
@stub.cls(gpu="A10G", secrets=[Secret.from_dotenv(__file__)])
|
||||
@stub.cls(
|
||||
gpu="A10G",
|
||||
secrets=[Secret.from_dotenv(__file__)],
|
||||
)
|
||||
class StableDiffusion(ClsMixin):
|
||||
"""
|
||||
A class that wraps the Stable Diffusion pipeline and scheduler.
|
||||
"""
|
||||
|
||||
def __enter__(self):
|
||||
import diffusers
|
||||
import torch
|
||||
|
||||
self.cache_path = os.path.join(BASE_CACHE_PATH, os.environ["MODEL_NAME"])
|
||||
config = {}
|
||||
with open("/config.yml", "r") as file:
|
||||
config = yaml.safe_load(file)
|
||||
self.cache_path = os.path.join(BASE_CACHE_PATH, config["model"]["name"])
|
||||
if os.path.exists(self.cache_path):
|
||||
print(f"The directory '{self.cache_path}' exists.")
|
||||
else:
|
||||
print(f"The directory '{self.cache_path}' does not exist. Download models...")
|
||||
download_models()
|
||||
print(f"The directory '{self.cache_path}' does not exist.")
|
||||
|
||||
torch.backends.cuda.matmul.allow_tf32 = True
|
||||
torch.cuda.memory._set_allocator_settings("max_split_size_mb:256")
|
||||
|
||||
self.pipe = diffusers.StableDiffusionPipeline.from_pretrained(
|
||||
self.cache_path,
|
||||
@ -119,40 +160,65 @@ class StableDiffusion(ClsMixin):
|
||||
subfolder="scheduler",
|
||||
)
|
||||
|
||||
if os.environ["USE_VAE"] == "true":
|
||||
vae = config.get("vae")
|
||||
if vae is not None:
|
||||
self.pipe.vae = diffusers.AutoencoderKL.from_pretrained(
|
||||
self.cache_path,
|
||||
subfolder="vae",
|
||||
)
|
||||
|
||||
self.pipe.to("cuda")
|
||||
|
||||
if os.environ["LORA_NAMES"] != "":
|
||||
names = os.environ["LORA_NAMES"].split(",")
|
||||
urls = os.environ["LORA_DOWNLOAD_URLS"].split(",")
|
||||
for name, url in zip(names, urls):
|
||||
path = os.path.join(BASE_CACHE_PATH_LORA, name)
|
||||
loras = config.get("loras")
|
||||
if loras is not None:
|
||||
for lora in loras:
|
||||
path = os.path.join(BASE_CACHE_PATH_LORA, lora["name"])
|
||||
if os.path.exists(path):
|
||||
print(f"The directory '{path}' exists.")
|
||||
else:
|
||||
print(f"The directory '{path}' does not exist. Download it...")
|
||||
download_files(url, name, BASE_CACHE_PATH_LORA)
|
||||
download_file(lora["download_url"], lora["name"], BASE_CACHE_PATH_LORA)
|
||||
self.pipe.load_lora_weights(".", weight_name=path)
|
||||
|
||||
if os.environ["TEXTUAL_INVERSION_NAMES"] != "":
|
||||
names = os.environ["TEXTUAL_INVERSION_NAMES"].split(",")
|
||||
urls = os.environ["TEXTUAL_INVERSION_DOWNLOAD_URLS"].split(",")
|
||||
for name, url in zip(names, urls):
|
||||
path = os.path.join(BASE_CACHE_PATH_TEXTUAL_INVERSION, name)
|
||||
textual_inversions = config.get("textual_inversions")
|
||||
if textual_inversions is not None:
|
||||
for textual_inversion in textual_inversions:
|
||||
path = os.path.join(BASE_CACHE_PATH_TEXTUAL_INVERSION, textual_inversion["name"])
|
||||
if os.path.exists(path):
|
||||
print(f"The directory '{path}' exists.")
|
||||
else:
|
||||
print(f"The directory '{path}' does not exist. Download it...")
|
||||
download_files(url, name, BASE_CACHE_PATH_TEXTUAL_INVERSION)
|
||||
download_file(
|
||||
textual_inversion["download_url"],
|
||||
textual_inversion["name"],
|
||||
BASE_CACHE_PATH_TEXTUAL_INVERSION,
|
||||
)
|
||||
self.pipe.load_textual_inversion(path)
|
||||
|
||||
self.pipe.enable_xformers_memory_efficient_attention()
|
||||
|
||||
# TODO: Add support for controlnets.
|
||||
# controlnet = diffusers.ControlNetModel.from_pretrained(
|
||||
# "lllyasviel/control_v11f1e_sd15_tile",
|
||||
# # "lllyasviel/sd-controlnet-canny",
|
||||
# # self.cache_path,
|
||||
# # subfolder="controlnet",
|
||||
# torch_dtype=torch.float16,
|
||||
# )
|
||||
|
||||
# self.controlnet_pipe = diffusers.StableDiffusionControlNetPipeline.from_pretrained(
|
||||
# self.cache_path,
|
||||
# controlnet=controlnet,
|
||||
# custom_pipeline="lpw_stable_diffusion",
|
||||
# # custom_pipeline="stable_diffusion_controlnet_img2img",
|
||||
# scheduler=self.pipe.scheduler,
|
||||
# vae=self.pipe.vae,
|
||||
# torch_dtype=torch.float16,
|
||||
# )
|
||||
|
||||
# self.controlnet_pipe.to("cuda")
|
||||
|
||||
# self.controlnet_pipe.enable_xformers_memory_efficient_attention()
|
||||
|
||||
@method()
|
||||
def count_token(self, p: str, n: str) -> int:
|
||||
"""
|
||||
@ -214,6 +280,22 @@ class StableDiffusion(ClsMixin):
|
||||
generator=generator,
|
||||
).images
|
||||
|
||||
# for image in base_images:
|
||||
# image = self.resize_image(image=image, scale_factor=2)
|
||||
# with torch.inference_mode():
|
||||
# with torch.autocast("cuda"):
|
||||
# generatedWithControlnet = self.controlnet_pipe(
|
||||
# prompt=prompt * batch_size,
|
||||
# negative_prompt=n_prompt * batch_size,
|
||||
# num_inference_steps=steps,
|
||||
# strength=0.3,
|
||||
# guidance_scale=7.5,
|
||||
# max_embeddings_multiples=max_embeddings_multiples,
|
||||
# generator=generator,
|
||||
# image=image,
|
||||
# ).images
|
||||
# base_images.extend(generatedWithControlnet)
|
||||
|
||||
if upscaler != "":
|
||||
upscaled = self.upscale(
|
||||
base_images=base_images,
|
||||
@ -224,8 +306,8 @@ class StableDiffusion(ClsMixin):
|
||||
use_hires_fix=use_hires_fix,
|
||||
)
|
||||
base_images.extend(upscaled)
|
||||
|
||||
if use_hires_fix:
|
||||
torch.cuda.empty_cache()
|
||||
for img in upscaled:
|
||||
with torch.inference_mode():
|
||||
with torch.autocast("cuda"):
|
||||
@ -240,7 +322,6 @@ class StableDiffusion(ClsMixin):
|
||||
image=img,
|
||||
).images
|
||||
base_images.extend(hires_fixed)
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
image_output = []
|
||||
for image in base_images:
|
||||
@ -250,6 +331,15 @@ class StableDiffusion(ClsMixin):
|
||||
|
||||
return image_output
|
||||
|
||||
@method()
|
||||
def resize_image(self, image: Image.Image, scale_factor: int) -> Image.Image:
|
||||
from PIL import Image
|
||||
|
||||
image = image.convert("RGB")
|
||||
width, height = image.size
|
||||
img = image.resize((width * scale_factor, height * scale_factor), resample=Image.LANCZOS)
|
||||
return img
|
||||
|
||||
@method()
|
||||
def upscale(
|
||||
self,
|
||||
@ -263,11 +353,10 @@ class StableDiffusion(ClsMixin):
|
||||
use_hires_fix: bool = False,
|
||||
) -> list[Image.Image]:
|
||||
"""
|
||||
Upscales the given images using the given model.
|
||||
Upscales the given images using a upscaler.
|
||||
https://github.com/xinntao/Real-ESRGAN
|
||||
"""
|
||||
import numpy
|
||||
import torch
|
||||
from basicsr.archs.rrdbnet_arch import RRDBNet
|
||||
from PIL import Image
|
||||
from realesrgan import RealESRGANer
|
||||
@ -312,7 +401,6 @@ class StableDiffusion(ClsMixin):
|
||||
bg_upsampler=upsampler,
|
||||
)
|
||||
|
||||
torch.cuda.empty_cache()
|
||||
upscaled_imgs = []
|
||||
with tqdm(total=len(base_images)) as progress_bar:
|
||||
for img in base_images:
|
||||
@ -330,6 +418,4 @@ class StableDiffusion(ClsMixin):
|
||||
upscaled_imgs.append(Image.fromarray(enhance_result))
|
||||
progress_bar.update(1)
|
||||
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
return upscaled_imgs
|
||||
Loading…
x
Reference in New Issue
Block a user