video node

This commit is contained in:
nick 2024-07-20 00:15:41 -07:00
parent 6e4532078f
commit 3c9d1865ca

View File

@ -1,10 +1,15 @@
# credit goes to https://github.com/Kosinkadink/ComfyUI-VideoHelperSuite and is meant to work with
# credit goes to https://github.com/Kosinkadink/ComfyUI-VideoHelperSuite
# Intended to work with https://github.com/NicholasKao1029/ComfyUI-VideoHelperSuite/tree/main
import os
import itertools
import numpy as np
import torch
from typing import Union
from torch import Tensor
import cv2
import psutil
from collections.abc import Mapping
import folder_paths
from comfy.utils import common_upscale
@ -90,13 +95,25 @@ if gifski_path is None:
gifski_path = shutil.which("gifski")
def is_safe_path(path):
if "VHS_STRICT_PATHS" not in os.environ:
return True
basedir = os.path.abspath(".")
try:
common_path = os.path.commonpath([basedir, path])
except:
# Different drive on windows
return False
return common_path == basedir
def get_sorted_dir_files_from_directory(
directory: str,
skip_first_images: int = 0,
select_every_nth: int = 1,
extensions: Iterable = None,
):
directory = directory.strip()
directory = strip_path(directory)
dir_files = os.listdir(directory)
dir_files = sorted(dir_files)
dir_files = [os.path.join(directory, x) for x in dir_files]
@ -177,18 +194,59 @@ def requeue_workflow(requeue_required=(-1, True)):
def get_audio(file, start_time=0, duration=0):
args = [ffmpeg_path, "-v", "error", "-i", file]
args = [ffmpeg_path, "-i", file]
if start_time > 0:
args += ["-ss", str(start_time)]
if duration > 0:
args += ["-t", str(duration)]
try:
# TODO: scan for sample rate and maintain
res = subprocess.run(
args + ["-f", "wav", "-"], stdout=subprocess.PIPE, check=True
).stdout
args + ["-f", "f32le", "-"], capture_output=True, check=True
)
audio = torch.frombuffer(bytearray(res.stdout), dtype=torch.float32)
match = re.search(", (\\d+) Hz, (\\w+), ", res.stderr.decode("utf-8"))
except subprocess.CalledProcessError as e:
return False
return res
raise Exception(
f"VHS failed to extract audio from {file}:\n" + e.stderr.decode("utf-8")
)
if match:
ar = int(match.group(1))
# NOTE: Just throwing an error for other channel types right now
# Will deal with issues if they come
ac = {"mono": 1, "stereo": 2}[match.group(2)]
else:
ar = 44100
ac = 2
audio = audio.reshape((-1, ac)).transpose(0, 1).unsqueeze(0)
return {"waveform": audio, "sample_rate": ar}
class LazyAudioMap(Mapping):
def __init__(self, file, start_time, duration):
self.file = file
self.start_time = start_time
self.duration = duration
self._dict = None
def __getitem__(self, key):
if self._dict is None:
self._dict = get_audio(self.file, self.start_time, self.duration)
return self._dict[key]
def __iter__(self):
if self._dict is None:
self._dict = get_audio(self.file, self.start_time, self.duration)
return iter(self._dict)
def __len__(self):
if self._dict is None:
self._dict = get_audio(self.file, self.start_time, self.duration)
return len(self._dict)
def lazy_get_audio(file, start_time=0, duration=0):
return LazyAudioMap(file, start_time, duration)
def lazy_eval(func):
@ -230,6 +288,19 @@ def validate_sequence(path):
return False
def strip_path(path):
# This leaves whitespace inside quotes and only a single "
# thus ' ""test"' -> '"test'
# consider path.strip(string.whitespace+"\"")
# or weightier re.fullmatch("[\\s\"]*(.+?)[\\s\"]*", path).group(1)
path = path.strip()
if path.startswith('"'):
path = path[1:]
if path.endswith('"'):
path = path[:-1]
return path
def hash_path(path):
if path is None:
return "input"
@ -286,6 +357,145 @@ def target_size(
return (width, height)
def validate_index(
index: int,
length: int = 0,
is_range: bool = False,
allow_negative=False,
allow_missing=False,
) -> int:
# if part of range, do nothing
if is_range:
return index
# otherwise, validate index
# validate not out of range - only when latent_count is passed in
if length > 0 and index > length - 1 and not allow_missing:
raise IndexError(f"Index '{index}' out of range for {length} item(s).")
# if negative, validate not out of range
if index < 0:
if not allow_negative:
raise IndexError(f"Negative indeces not allowed, but was '{index}'.")
conv_index = length + index
if conv_index < 0 and not allow_missing:
raise IndexError(
f"Index '{index}', converted to '{conv_index}' out of range for {length} item(s)."
)
index = conv_index
return index
def convert_to_index_int(
raw_index: str,
length: int = 0,
is_range: bool = False,
allow_negative=False,
allow_missing=False,
) -> int:
try:
return validate_index(
int(raw_index),
length=length,
is_range=is_range,
allow_negative=allow_negative,
allow_missing=allow_missing,
)
except ValueError as e:
raise ValueError(f"Index '{raw_index}' must be an integer.", e)
def convert_str_to_indexes(
indexes_str: str, length: int = 0, allow_missing=False
) -> list[int]:
if not indexes_str:
return []
int_indexes = list(range(0, length))
allow_negative = length > 0
chosen_indexes = []
# parse string - allow positive ints, negative ints, and ranges separated by ':'
groups = indexes_str.split(",")
groups = [g.strip() for g in groups]
for g in groups:
# parse range of indeces (e.g. 2:16)
if ":" in g:
index_range = g.split(":", 2)
index_range = [r.strip() for r in index_range]
start_index = index_range[0]
if len(start_index) > 0:
start_index = convert_to_index_int(
start_index,
length=length,
is_range=True,
allow_negative=allow_negative,
allow_missing=allow_missing,
)
else:
start_index = 0
end_index = index_range[1]
if len(end_index) > 0:
end_index = convert_to_index_int(
end_index,
length=length,
is_range=True,
allow_negative=allow_negative,
allow_missing=allow_missing,
)
else:
end_index = length
# support step as well, to allow things like reversing, every-other, etc.
step = 1
if len(index_range) > 2:
step = index_range[2]
if len(step) > 0:
step = convert_to_index_int(
step,
length=length,
is_range=True,
allow_negative=True,
allow_missing=True,
)
else:
step = 1
# if latents were passed in, base indeces on known latent count
if len(int_indexes) > 0:
chosen_indexes.extend(int_indexes[start_index:end_index][::step])
# otherwise, assume indeces are valid
else:
chosen_indexes.extend(list(range(start_index, end_index, step)))
# parse individual indeces
else:
chosen_indexes.append(
convert_to_index_int(
g,
length=length,
allow_negative=allow_negative,
allow_missing=allow_missing,
)
)
return chosen_indexes
def select_indexes(input_obj: Union[Tensor, list], idxs: list):
if type(input_obj) == Tensor:
return input_obj[idxs]
else:
return [input_obj[i] for i in idxs]
def select_indexes_from_str(
input_obj: Union[Tensor, list], indexes: str, err_if_missing=True, err_if_empty=True
):
real_idxs = convert_str_to_indexes(
indexes, len(input_obj), allow_missing=not err_if_missing
)
if err_if_empty and len(real_idxs) == 0:
raise Exception(f"Nothing was selected based on indexes found in '{indexes}'.")
return select_indexes(input_obj, real_idxs)
###
def cv_frame_generator(
video,
force_rate,
@ -295,9 +505,10 @@ def cv_frame_generator(
meta_batch=None,
unique_id=None,
):
video_cap = cv2.VideoCapture(video)
video_cap = cv2.VideoCapture(strip_path(video))
if not video_cap.isOpened():
raise ValueError(f"{video} could not be loaded with cv.")
pbar = None
# extract video metadata
fps = video_cap.get(cv2.CAP_PROP_FPS)
@ -319,6 +530,8 @@ def cv_frame_generator(
target_frame_time = 1 / force_rate
yield (width, height, fps, duration, total_frames, target_frame_time)
if meta_batch is not None:
yield min(frame_load_cap, total_frames)
time_offset = target_frame_time - base_frame_time
while video_cap.isOpened():
@ -349,7 +562,8 @@ def cv_frame_generator(
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# convert frame to comfyui's expected format
# TODO: frame contains no exif information. Check if opencv2 has already applied
frame = np.array(frame, dtype=np.float32) / 255.0
frame = np.array(frame, dtype=np.float32)
torch.from_numpy(frame).div_(255)
if prev_frame is not None:
inp = yield prev_frame
if inp is not None:
@ -357,6 +571,8 @@ def cv_frame_generator(
return
prev_frame = frame
frames_added += 1
if pbar is not None:
pbar.update_absolute(frames_added, frame_load_cap)
# if cap exists and we've reached it, stop processing frames
if frame_load_cap > 0 and frames_added >= frame_load_cap:
break
@ -367,6 +583,17 @@ def cv_frame_generator(
yield prev_frame
def batched(it, n):
while batch := tuple(itertools.islice(it, n)):
yield batch
def batched_vae_encode(images, vae, frames_per_batch):
for batch in batched(images, frames_per_batch):
image_batch = torch.from_numpy(np.array(batch))
yield from vae.encode(image_batch).numpy()
def load_video_cv(
video: str,
force_rate: int,
@ -378,6 +605,8 @@ def load_video_cv(
select_every_nth: int,
meta_batch=None,
unique_id=None,
memory_limit_mb=None,
vae=None,
):
if meta_batch is None or unique_id not in meta_batch.inputs:
gen = cv_frame_generator(
@ -401,30 +630,89 @@ def load_video_cv(
total_frames,
target_frame_time,
)
meta_batch.total_frames = min(meta_batch.total_frames, next(gen))
else:
(gen, width, height, fps, duration, total_frames, target_frame_time) = (
meta_batch.inputs[unique_id]
)
if meta_batch is not None:
gen = itertools.islice(gen, meta_batch.frames_per_batch)
memory_limit = None
if memory_limit_mb is not None:
memory_limit *= 2**20
else:
# TODO: verify if garbage collection should be performed here.
# leaves ~128 MB unreserved for safety
try:
memory_limit = (
psutil.virtual_memory().available + psutil.swap_memory().free
) - 2**27
except:
print(
"Failed to calculate available memory. Memory load limit has been disabled"
)
if memory_limit is not None:
if vae is not None:
# space required to load as f32, exist as latent with wiggle room, decode to f32
max_loadable_frames = int(
memory_limit // (width * height * 3 * (4 + 4 + 1 / 10))
)
else:
# TODO: use better estimate for when vae is not None
# Consider completely ignoring for load_latent case?
max_loadable_frames = int(memory_limit // (width * height * 3 * (0.1)))
if meta_batch is not None:
if meta_batch.frames_per_batch > max_loadable_frames:
raise RuntimeError(
f"Meta Batch set to {meta_batch.frames_per_batch} frames but only {max_loadable_frames} can fit in memory"
)
gen = itertools.islice(gen, meta_batch.frames_per_batch)
else:
original_gen = gen
gen = itertools.islice(gen, max_loadable_frames)
downscale_ratio = getattr(vae, "downscale_ratio", 8)
frames_per_batch = (1920 * 1080 * 16) // (width * height) or 1
if force_size != "Disabled" or vae is not None:
new_size = target_size(
width, height, force_size, custom_width, custom_height, downscale_ratio
)
if new_size[0] != width or new_size[1] != height:
# Some minor wizardry to eliminate a copy and reduce max memory by a factor of ~2
images = torch.from_numpy(
np.fromiter(gen, np.dtype((np.float32, (height, width, 3))))
)
def rescale(frame):
s = torch.from_numpy(
np.fromiter(frame, np.dtype((np.float32, (height, width, 3))))
)
s = s.movedim(-1, 1)
s = common_upscale(s, new_size[0], new_size[1], "lanczos", "center")
return s.movedim(1, -1).numpy()
gen = itertools.chain.from_iterable(
map(rescale, batched(gen, frames_per_batch))
)
else:
new_size = width, height
if vae is not None:
gen = batched_vae_encode(gen, vae, frames_per_batch)
vw, vh = new_size[0] // downscale_ratio, new_size[1] // downscale_ratio
images = torch.from_numpy(np.fromiter(gen, np.dtype((np.float32, (4, vh, vw)))))
else:
# Some minor wizardry to eliminate a copy and reduce max memory by a factor of ~2
images = torch.from_numpy(
np.fromiter(gen, np.dtype((np.float32, (new_size[1], new_size[0], 3))))
)
if meta_batch is None and memory_limit is not None:
try:
next(original_gen)
raise RuntimeError(
f"Memory limit hit after loading {len(images)} frames. Stopping execution."
)
except StopIteration:
pass
if len(images) == 0:
raise RuntimeError("No frames generated")
if force_size != "Disabled":
new_size = target_size(width, height, force_size, custom_width, custom_height)
if new_size[0] != width or new_size[1] != height:
s = images.movedim(-1, 1)
s = common_upscale(s, new_size[0], new_size[1], "lanczos", "center")
images = s.movedim(1, -1)
# Setup lambda for lazy audio capture
audio = lambda: get_audio(
audio = lazy_get_audio(
video,
skip_first_frames * target_frame_time,
frame_load_cap * target_frame_time * select_every_nth,
@ -440,11 +728,13 @@ def load_video_cv(
"loaded_fps": 1 / target_frame_time,
"loaded_frame_count": len(images),
"loaded_duration": len(images) * target_frame_time,
"loaded_width": images.shape[2],
"loaded_height": images.shape[1],
"loaded_width": new_size[0],
"loaded_height": new_size[1],
}
return (images, len(images), lazy_eval(audio), video_info)
if vae is None:
return (images, len(images), audio, video_info, None)
else:
return (None, len(images), audio, video_info, {"samples": images})
class ComfyUIDeployExternalVideo:
@ -457,68 +747,38 @@ class ComfyUIDeployExternalVideo:
file_parts = f.split(".")
if len(file_parts) > 1 and (file_parts[-1] in video_extensions):
files.append(f)
return {
"required": {
"input_id": (
"STRING",
{"multiline": False, "default": "input_video"},
),
"force_rate": ("INT", {"default": 0, "min": 0, "max": 60, "step": 1}),
"force_size": (
[
"Disabled",
"Custom Height",
"Custom Width",
"Custom",
"256x?",
"?x256",
"256x256",
"512x?",
"?x512",
"512x512",
],
),
"custom_width": (
"INT",
{"default": 512, "min": 0, "max": DIMMAX, "step": 8},
),
"custom_height": (
"INT",
{"default": 512, "min": 0, "max": DIMMAX, "step": 8},
),
"frame_load_cap": (
"INT",
{"default": 0, "min": 0, "max": BIGMAX, "step": 1},
),
"skip_first_frames": (
"INT",
{"default": 0, "min": 0, "max": BIGMAX, "step": 1},
),
"select_every_nth": (
"INT",
{"default": 1, "min": 1, "max": BIGMAX, "step": 1},
),
},
"optional": {
"meta_batch": ("VHS_BatchManager",),
"default_value": (sorted(files),),
},
"hidden": {"unique_id": "UNIQUE_ID"},
}
return {"required": {
"input_id": (
"STRING",
{"multiline": False, "default": "input_video"},
),
"force_rate": ("INT", {"default": 0, "min": 0, "max": 60, "step": 1}),
"force_size": (["Disabled", "Custom Height", "Custom Width", "Custom", "256x?", "?x256", "256x256", "512x?", "?x512", "512x512"],),
"custom_width": ("INT", {"default": 512, "min": 0, "max": DIMMAX, "step": 8}),
"custom_height": ("INT", {"default": 512, "min": 0, "max": DIMMAX, "step": 8}),
"frame_load_cap": ("INT", {"default": 0, "min": 0, "max": BIGMAX, "step": 1}),
"skip_first_frames": ("INT", {"default": 0, "min": 0, "max": BIGMAX, "step": 1}),
"select_every_nth": ("INT", {"default": 1, "min": 1, "max": BIGMAX, "step": 1}),
},
"optional": {
"meta_batch": ("VHS_BatchManager",),
"vae": ("VAE",),
"default_value": (sorted(files),),
},
"hidden": {
"unique_id": "UNIQUE_ID"
},
}
CATEGORY = "Video Helper Suite 🎥🅥🅗🅢"
RETURN_TYPES = (
"IMAGE",
"INT",
"AUDIO",
"VHS_VIDEOINFO",
)
RETURN_TYPES = ("IMAGE", "INT", "AUDIO", "VHS_VIDEOINFO", "LATENT")
RETURN_NAMES = (
"IMAGE",
"frame_count",
"audio",
"video_info",
"LATENT",
)
FUNCTION = "load_video"