baka999 commited on
Commit
936fc8b
·
verified ·
1 Parent(s): f7e5510

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.md +10 -0
  2. app.py +986 -0
  3. packages.txt +1 -0
  4. requirements.txt +18 -0
README.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: SeedVR2 ZeroGPU
3
+ emoji: 🎬
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 5.0.0
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
app.py ADDED
@@ -0,0 +1,986 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import json
3
+ import math
4
+ import os
5
+ import shutil
6
+ import subprocess
7
+ import sys
8
+ import tempfile
9
+ import threading
10
+ import time
11
+ import urllib.request
12
+ import uuid
13
+ import zipfile
14
+ from pathlib import Path
15
+ from typing import Any, Dict, List, Optional, Tuple
16
+
17
+ import cv2
18
+ import gradio as gr
19
+ from huggingface_hub import HfApi, hf_hub_download
20
+ from PIL import Image, ImageOps
21
+
22
+ try:
23
+ import spaces
24
+ except Exception:
25
+ class _DummySpaces:
26
+ @staticmethod
27
+ def GPU(*args, **kwargs):
28
+ if args and callable(args[0]) and len(args) == 1 and not kwargs:
29
+ return args[0]
30
+
31
+ def decorator(fn):
32
+ return fn
33
+
34
+ return decorator
35
+
36
+ spaces = _DummySpaces()
37
+
38
+
39
+ APP_ROOT = Path(__file__).resolve().parent
40
+ WORK_ROOT = APP_ROOT / "workspace"
41
+ BACKEND_DIR = WORK_ROOT / "ComfyUI-SeedVR2_VideoUpscaler"
42
+ MODEL_DIR = APP_ROOT / "models" / "SEEDVR2"
43
+ JOBS_DIR = APP_ROOT / "jobs"
44
+ OUTPUTS_DIR = APP_ROOT / "outputs"
45
+
46
+ SEEDVR_BACKEND_GIT = "https://github.com/numz/ComfyUI-SeedVR2_VideoUpscaler.git"
47
+ SEEDVR_BACKEND_ZIP = (
48
+ "https://codeload.github.com/numz/ComfyUI-SeedVR2_VideoUpscaler/zip/refs/heads/main"
49
+ )
50
+
51
+ MODEL_SOURCES = {
52
+ "numz/SeedVR2_comfyUI": ".safetensors",
53
+ "cmeka/SeedVR2-GGUF": ".gguf",
54
+ }
55
+ VAE_REPO = "numz/SeedVR2_comfyUI"
56
+ VAE_FILE = "ema_vae_fp16.safetensors"
57
+
58
+ FALLBACK_MODELS = {
59
+ "numz/SeedVR2_comfyUI": [
60
+ "seedvr2_ema_3b_fp16.safetensors",
61
+ "seedvr2_ema_3b_fp8_e4m3fn.safetensors",
62
+ "seedvr2_ema_7b_fp16.safetensors",
63
+ "seedvr2_ema_7b_fp8_e4m3fn.safetensors",
64
+ "seedvr2_ema_7b_sharp_fp16.safetensors",
65
+ "seedvr2_ema_7b_sharp_fp8_e4m3fn.safetensors",
66
+ ],
67
+ "cmeka/SeedVR2-GGUF": [
68
+ "seedvr2_ema_3b-Q3_K_M.gguf",
69
+ "seedvr2_ema_3b-Q4_K_M.gguf",
70
+ "seedvr2_ema_3b-Q5_K_M.gguf",
71
+ "seedvr2_ema_3b-Q6_K.gguf",
72
+ "seedvr2_ema_3b-Q8_0.gguf",
73
+ "seedvr2_ema_7b-Q3_K_M.gguf",
74
+ "seedvr2_ema_7b-Q4_K_M.gguf",
75
+ "seedvr2_ema_7b-Q5_K_M.gguf",
76
+ "seedvr2_ema_7b-Q6_K.gguf",
77
+ "seedvr2_ema_7b-Q8_0.gguf",
78
+ "seedvr2_ema_7b_sharp-Q3_K_M.gguf",
79
+ "seedvr2_ema_7b_sharp-Q4_K_M.gguf",
80
+ "seedvr2_ema_7b_sharp-Q5_K_M.gguf",
81
+ "seedvr2_ema_7b_sharp-Q6_K.gguf",
82
+ "seedvr2_ema_7b_sharp-Q8_0.gguf",
83
+ ],
84
+ }
85
+
86
+ DEFAULT_MODELS = {
87
+ "numz/SeedVR2_comfyUI": "seedvr2_ema_3b_fp8_e4m3fn.safetensors",
88
+ "cmeka/SeedVR2-GGUF": "seedvr2_ema_3b-Q4_K_M.gguf",
89
+ }
90
+
91
+ RESIZE_MODE_LABELS = {
92
+ "pad": "保持比例并补边",
93
+ "crop": "保持比例并裁切",
94
+ "stretch": "强制拉伸到目标尺寸",
95
+ }
96
+
97
+ IMAGE_EXTS = {".png", ".jpg", ".jpeg", ".webp", ".bmp", ".tif", ".tiff"}
98
+ VIDEO_EXTS = {".mp4", ".mov", ".avi", ".mkv", ".webm", ".m4v"}
99
+
100
+ SETUP_LOCK = threading.Lock()
101
+ DOWNLOAD_LOCK = threading.Lock()
102
+ MODEL_CACHE: Dict[str, List[str]] = {}
103
+ API = HfApi()
104
+
105
+ for folder in (WORK_ROOT, MODEL_DIR, JOBS_DIR, OUTPUTS_DIR):
106
+ folder.mkdir(parents=True, exist_ok=True)
107
+
108
+
109
+ def tail_text(text: str, limit: int = 6000) -> str:
110
+ text = (text or "").strip()
111
+ if len(text) <= limit:
112
+ return text
113
+ return "...\n" + text[-limit:]
114
+
115
+
116
+ def ensure_even(value: float) -> int:
117
+ value_int = max(2, int(round(float(value))))
118
+ if value_int % 2 == 1:
119
+ value_int += 1
120
+ return value_int
121
+
122
+
123
+ def optional_positive_int(value: Any) -> Optional[int]:
124
+ if value in (None, ""):
125
+ return None
126
+ value = int(float(value))
127
+ if value <= 0:
128
+ return None
129
+ return value
130
+
131
+
132
+ def cleanup_old_jobs(max_age_hours: int = 12, keep_last: int = 30) -> None:
133
+ job_dirs = [p for p in JOBS_DIR.iterdir() if p.is_dir()]
134
+ job_dirs.sort(key=lambda p: p.stat().st_mtime, reverse=True)
135
+ cutoff = time.time() - max_age_hours * 3600
136
+ for idx, job_dir in enumerate(job_dirs):
137
+ if idx < keep_last and job_dir.stat().st_mtime >= cutoff:
138
+ continue
139
+ try:
140
+ shutil.rmtree(job_dir, ignore_errors=True)
141
+ except Exception:
142
+ pass
143
+
144
+
145
+ def choose_default_model(repo_id: str, choices: List[str]) -> Optional[str]:
146
+ preferred = DEFAULT_MODELS.get(repo_id)
147
+ if preferred in choices:
148
+ return preferred
149
+ return choices[0] if choices else None
150
+
151
+
152
+ def is_image_file(path: str) -> bool:
153
+ return Path(path).suffix.lower() in IMAGE_EXTS
154
+
155
+
156
+ def is_video_file(path: str) -> bool:
157
+ return Path(path).suffix.lower() in VIDEO_EXTS
158
+
159
+
160
+ def probe_media(path: str) -> Dict[str, Any]:
161
+ path_obj = Path(path)
162
+ ext = path_obj.suffix.lower()
163
+ if ext in IMAGE_EXTS:
164
+ with Image.open(path) as img:
165
+ return {
166
+ "kind": "image",
167
+ "width": int(img.width),
168
+ "height": int(img.height),
169
+ "frames": 1,
170
+ "fps": 30.0,
171
+ "duration": 0.0,
172
+ }
173
+ if ext in VIDEO_EXTS:
174
+ cap = cv2.VideoCapture(path)
175
+ if not cap.isOpened():
176
+ raise gr.Error(f"无法读取视频:{path_obj.name}")
177
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) or 0)
178
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) or 0)
179
+ fps = float(cap.get(cv2.CAP_PROP_FPS) or 30.0)
180
+ frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0)
181
+ cap.release()
182
+ duration = (frames / fps) if fps > 0 else 0.0
183
+ return {
184
+ "kind": "video",
185
+ "width": width,
186
+ "height": height,
187
+ "frames": frames,
188
+ "fps": fps,
189
+ "duration": duration,
190
+ }
191
+ raise gr.Error("仅支持图片或视频文件。")
192
+
193
+
194
+ def compute_target_size(
195
+ src_w: int,
196
+ src_h: int,
197
+ scale_factor: Any,
198
+ out_w: Any,
199
+ out_h: Any,
200
+ ) -> Tuple[int, int, str]:
201
+ width = optional_positive_int(out_w)
202
+ height = optional_positive_int(out_h)
203
+ factor = 2.0 if scale_factor in (None, "") else float(scale_factor)
204
+ if factor <= 0:
205
+ raise gr.Error("超分倍率必须大于 0。")
206
+
207
+ if width and height:
208
+ target_w = ensure_even(width)
209
+ target_h = ensure_even(height)
210
+ reason = "使用自定义宽高"
211
+ elif width:
212
+ target_w = ensure_even(width)
213
+ target_h = ensure_even(width * src_h / src_w)
214
+ reason = "仅指定输出宽度,按原始比例推算高度"
215
+ elif height:
216
+ target_h = ensure_even(height)
217
+ target_w = ensure_even(height * src_w / src_h)
218
+ reason = "仅指定输出高度,按原始比例推算宽度"
219
+ else:
220
+ target_w = ensure_even(src_w * factor)
221
+ target_h = ensure_even(src_h * factor)
222
+ reason = f"按 {factor:.3f}x 倍率计算输出尺寸"
223
+
224
+ return target_w, target_h, reason
225
+
226
+
227
+ def ensure_seedvr_backend() -> Path:
228
+ with SETUP_LOCK:
229
+ cli_file = BACKEND_DIR / "inference_cli.py"
230
+ if cli_file.exists():
231
+ return BACKEND_DIR
232
+
233
+ tmp_dir = BACKEND_DIR.with_name(BACKEND_DIR.name + "_tmp")
234
+ shutil.rmtree(tmp_dir, ignore_errors=True)
235
+
236
+ try:
237
+ subprocess.run(
238
+ ["git", "clone", "--depth", "1", SEEDVR_BACKEND_GIT, str(tmp_dir)],
239
+ check=True,
240
+ stdout=subprocess.PIPE,
241
+ stderr=subprocess.PIPE,
242
+ text=True,
243
+ )
244
+ except Exception:
245
+ zip_path = WORK_ROOT / "seedvr2_backend.zip"
246
+ extract_root = WORK_ROOT / ("extract_" + uuid.uuid4().hex[:8])
247
+ extract_root.mkdir(parents=True, exist_ok=True)
248
+ try:
249
+ urllib.request.urlretrieve(SEEDVR_BACKEND_ZIP, zip_path)
250
+ with zipfile.ZipFile(zip_path, "r") as zf:
251
+ zf.extractall(extract_root)
252
+ extracted = None
253
+ for item in extract_root.iterdir():
254
+ if item.is_dir() and item.name.startswith("ComfyUI-SeedVR2_VideoUpscaler"):
255
+ extracted = item
256
+ break
257
+ if extracted is None:
258
+ raise RuntimeError("下载后的 SeedVR2 后端目录结构不符合预期。")
259
+ shutil.move(str(extracted), str(tmp_dir))
260
+ finally:
261
+ try:
262
+ if zip_path.exists():
263
+ zip_path.unlink()
264
+ except Exception:
265
+ pass
266
+ shutil.rmtree(extract_root, ignore_errors=True)
267
+
268
+ if not (tmp_dir / "inference_cli.py").exists():
269
+ shutil.rmtree(tmp_dir, ignore_errors=True)
270
+ raise gr.Error("SeedVR2 后端拉取失败,缺少 inference_cli.py。")
271
+
272
+ if BACKEND_DIR.exists():
273
+ shutil.rmtree(BACKEND_DIR, ignore_errors=True)
274
+ tmp_dir.rename(BACKEND_DIR)
275
+ return BACKEND_DIR
276
+
277
+
278
+ def fetch_models_from_repo(repo_id: str, force: bool = False) -> List[str]:
279
+ if not force and repo_id in MODEL_CACHE:
280
+ return MODEL_CACHE[repo_id]
281
+
282
+ ext = MODEL_SOURCES[repo_id]
283
+ try:
284
+ files = API.list_repo_files(repo_id, repo_type="model")
285
+ models = sorted(
286
+ file_name
287
+ for file_name in files
288
+ if "/" not in file_name
289
+ and file_name.startswith("seedvr2_")
290
+ and file_name.endswith(ext)
291
+ )
292
+ if models:
293
+ MODEL_CACHE[repo_id] = models
294
+ return models
295
+ except Exception:
296
+ pass
297
+
298
+ fallback = FALLBACK_MODELS[repo_id][:]
299
+ MODEL_CACHE[repo_id] = fallback
300
+ return fallback
301
+
302
+
303
+ def update_model_dropdown(repo_id: str, force: bool = False):
304
+ choices = fetch_models_from_repo(repo_id, force=force)
305
+ return gr.update(choices=choices, value=choose_default_model(repo_id, choices))
306
+
307
+
308
+ def ensure_model_files(model_repo: str, model_file: str) -> Tuple[Path, Path]:
309
+ with DOWNLOAD_LOCK:
310
+ dit_path = Path(
311
+ hf_hub_download(
312
+ repo_id=model_repo,
313
+ filename=model_file,
314
+ repo_type="model",
315
+ local_dir=str(MODEL_DIR),
316
+ )
317
+ )
318
+ vae_path = Path(
319
+ hf_hub_download(
320
+ repo_id=VAE_REPO,
321
+ filename=VAE_FILE,
322
+ repo_type="model",
323
+ local_dir=str(MODEL_DIR),
324
+ )
325
+ )
326
+ return dit_path, vae_path
327
+
328
+
329
+ def build_job(
330
+ input_path: str,
331
+ model_repo: str,
332
+ model_file: str,
333
+ scale_factor: Any,
334
+ out_w: Any,
335
+ out_h: Any,
336
+ resize_mode: str,
337
+ color_correction: str,
338
+ expected_kind: str,
339
+ batch_size: Optional[int] = None,
340
+ temporal_overlap: Optional[int] = None,
341
+ chunk_size: Optional[int] = None,
342
+ ) -> Tuple[Dict[str, Any], str]:
343
+ cleanup_old_jobs()
344
+
345
+ if not input_path:
346
+ raise gr.Error("请先上传输入文件。")
347
+ if not model_repo or model_repo not in MODEL_SOURCES:
348
+ raise gr.Error("请选择模型仓库。")
349
+ if not model_file:
350
+ raise gr.Error("请选择模型文件。")
351
+ if resize_mode not in RESIZE_MODE_LABELS:
352
+ raise gr.Error("输出尺寸策略不合法。")
353
+
354
+ ensure_seedvr_backend()
355
+ dit_path, vae_path = ensure_model_files(model_repo, model_file)
356
+
357
+ source_meta = probe_media(input_path)
358
+ if source_meta["kind"] != expected_kind:
359
+ raise gr.Error(f"当前标签页只接受{ '图片' if expected_kind == 'image' else '视频' }文件。")
360
+
361
+ if expected_kind == "video":
362
+ if batch_size is None:
363
+ batch_size = 5
364
+ batch_size = int(batch_size)
365
+ if batch_size != 1 and (batch_size - 1) % 4 != 0:
366
+ raise gr.Error("视频 batch_size 必须满足 4n+1,例如 1/5/9/13/17/21。")
367
+ temporal_overlap = int(temporal_overlap or 0)
368
+ chunk_size = int(chunk_size or 0)
369
+ else:
370
+ batch_size = 1
371
+ temporal_overlap = 0
372
+ chunk_size = 0
373
+
374
+ target_w, target_h, size_reason = compute_target_size(
375
+ source_meta["width"],
376
+ source_meta["height"],
377
+ scale_factor,
378
+ out_w,
379
+ out_h,
380
+ )
381
+
382
+ job_id = f"{time.strftime('%Y%m%d-%H%M%S')}-{uuid.uuid4().hex[:8]}"
383
+ job_dir = JOBS_DIR / job_id
384
+ job_dir.mkdir(parents=True, exist_ok=True)
385
+
386
+ staged_input = job_dir / f"input{Path(input_path).suffix.lower()}"
387
+ shutil.copy2(input_path, staged_input)
388
+
389
+ raw_output = job_dir / ("seedvr2_raw.png" if expected_kind == "image" else "seedvr2_raw.mp4")
390
+ final_output = job_dir / ("seedvr2_out.png" if expected_kind == "image" else "seedvr2_out.mp4")
391
+
392
+ job = {
393
+ "job_id": job_id,
394
+ "kind": expected_kind,
395
+ "input_path": str(staged_input),
396
+ "raw_output": str(raw_output),
397
+ "final_output": str(final_output),
398
+ "source_width": source_meta["width"],
399
+ "source_height": source_meta["height"],
400
+ "frames": source_meta["frames"],
401
+ "fps": source_meta["fps"],
402
+ "duration": source_meta["duration"],
403
+ "target_width": target_w,
404
+ "target_height": target_h,
405
+ "cli_resolution": min(target_w, target_h),
406
+ "cli_max_resolution": max(target_w, target_h),
407
+ "model_repo": model_repo,
408
+ "model_file": model_file,
409
+ "dit_path": str(dit_path),
410
+ "vae_path": str(vae_path),
411
+ "batch_size": batch_size,
412
+ "temporal_overlap": temporal_overlap,
413
+ "chunk_size": chunk_size,
414
+ "resize_mode": resize_mode,
415
+ "color_correction": color_correction,
416
+ "size_reason": size_reason,
417
+ }
418
+
419
+ summary_lines = [
420
+ f"任务已准备:{job_id}",
421
+ f"输入类型:{'图片' if expected_kind == 'image' else '视频'}",
422
+ f"输入尺寸:{source_meta['width']}x{source_meta['height']}",
423
+ f"目标尺寸:{target_w}x{target_h}",
424
+ f"尺寸来源:{size_reason}",
425
+ f"尺寸策略:{RESIZE_MODE_LABELS[resize_mode]}",
426
+ f"模型仓库:{model_repo}",
427
+ f"模型文件:{model_file}",
428
+ f"本地模型:{dit_path.name} / {vae_path.name}",
429
+ ]
430
+ if expected_kind == "video":
431
+ summary_lines.extend(
432
+ [
433
+ f"视频信息:{source_meta['frames']} 帧,{source_meta['fps']:.2f} FPS,{source_meta['duration']:.2f} 秒",
434
+ f"batch_size={batch_size},temporal_overlap={temporal_overlap},chunk_size={chunk_size}",
435
+ ]
436
+ )
437
+ return job, "\n".join(summary_lines)
438
+
439
+
440
+ def estimate_job_duration(job: Optional[Dict[str, Any]]) -> int:
441
+ if not job:
442
+ return 180
443
+
444
+ megapixels = (job.get("target_width", 1280) * job.get("target_height", 720)) / 1_000_000
445
+ model_name = str(job.get("model_file", "")).lower()
446
+ is_7b = "7b" in model_name
447
+ is_gguf = model_name.endswith(".gguf")
448
+
449
+ if job.get("kind") == "image":
450
+ estimate = 120 + megapixels * 35
451
+ if is_7b:
452
+ estimate += 80
453
+ if is_gguf:
454
+ estimate += 20
455
+ return int(max(120, min(600, estimate)))
456
+
457
+ frames = max(1, int(job.get("frames", 1)))
458
+ per_frame = 0.25 if not is_7b else 0.40
459
+ if is_gguf:
460
+ per_frame *= 1.15
461
+ estimate = 120 + frames * per_frame * max(1.0, megapixels / 0.9)
462
+ if int(job.get("chunk_size", 0)) > 0:
463
+ estimate += 45
464
+ return int(max(180, min(1200, estimate)))
465
+
466
+
467
+ def build_cli_command(job: Dict[str, Any]) -> List[str]:
468
+ backend_dir = ensure_seedvr_backend()
469
+ cmd = [
470
+ sys.executable,
471
+ str(backend_dir / "inference_cli.py"),
472
+ job["input_path"],
473
+ "--output",
474
+ job["raw_output"],
475
+ "--output_format",
476
+ "png" if job["kind"] == "image" else "mp4",
477
+ "--model_dir",
478
+ str(MODEL_DIR),
479
+ "--dit_model",
480
+ job["model_file"],
481
+ "--resolution",
482
+ str(job["cli_resolution"]),
483
+ "--max_resolution",
484
+ str(job["cli_max_resolution"]),
485
+ "--batch_size",
486
+ str(job["batch_size"]),
487
+ "--color_correction",
488
+ str(job["color_correction"]),
489
+ ]
490
+
491
+ if job["kind"] == "video":
492
+ cmd.extend(["--video_backend", "opencv"])
493
+ if int(job.get("temporal_overlap", 0)) > 0:
494
+ cmd.extend(["--temporal_overlap", str(job["temporal_overlap"])])
495
+ if int(job.get("chunk_size", 0)) > 0:
496
+ cmd.extend(["--chunk_size", str(job["chunk_size"])])
497
+ if int(job.get("batch_size", 1)) > 1:
498
+ cmd.append("--uniform_batch_size")
499
+ return cmd
500
+
501
+
502
+ def resize_image(
503
+ input_path: str,
504
+ output_path: str,
505
+ width: int,
506
+ height: int,
507
+ mode: str,
508
+ ) -> None:
509
+ with Image.open(input_path) as img:
510
+ has_alpha = img.mode in ("RGBA", "LA") or "transparency" in img.info
511
+ if has_alpha:
512
+ img = img.convert("RGBA")
513
+ else:
514
+ img = img.convert("RGB")
515
+
516
+ if mode == "stretch":
517
+ out = img.resize((width, height), resample=Image.LANCZOS)
518
+ elif mode == "crop":
519
+ out = ImageOps.fit(img, (width, height), method=Image.LANCZOS, centering=(0.5, 0.5))
520
+ else:
521
+ contained = ImageOps.contain(img, (width, height), method=Image.LANCZOS)
522
+ if has_alpha:
523
+ bg_color = (0, 0, 0, 0)
524
+ out = Image.new("RGBA", (width, height), bg_color)
525
+ out.alpha_composite(contained, ((width - contained.width) // 2, (height - contained.height) // 2))
526
+ else:
527
+ out = Image.new("RGB", (width, height), (0, 0, 0))
528
+ out.paste(contained, ((width - contained.width) // 2, (height - contained.height) // 2))
529
+
530
+ Path(output_path).parent.mkdir(parents=True, exist_ok=True)
531
+ out.save(output_path)
532
+
533
+
534
+ def resize_frame(frame, width: int, height: int, mode: str):
535
+ src_h, src_w = frame.shape[:2]
536
+ if mode == "stretch":
537
+ return cv2.resize(frame, (width, height), interpolation=cv2.INTER_LANCZOS4)
538
+
539
+ scale = max(width / src_w, height / src_h) if mode == "crop" else min(width / src_w, height / src_h)
540
+ scaled_w = max(1, int(round(src_w * scale)))
541
+ scaled_h = max(1, int(round(src_h * scale)))
542
+ resized = cv2.resize(frame, (scaled_w, scaled_h), interpolation=cv2.INTER_LANCZOS4)
543
+
544
+ if mode == "crop":
545
+ x0 = max(0, (scaled_w - width) // 2)
546
+ y0 = max(0, (scaled_h - height) // 2)
547
+ return resized[y0:y0 + height, x0:x0 + width]
548
+
549
+ channels = resized.shape[2] if len(resized.shape) == 3 else 1
550
+ border_value = (0, 0, 0, 0) if channels == 4 else (0, 0, 0)
551
+ return cv2.copyMakeBorder(
552
+ resized,
553
+ (height - scaled_h) // 2,
554
+ height - scaled_h - (height - scaled_h) // 2,
555
+ (width - scaled_w) // 2,
556
+ width - scaled_w - (width - scaled_w) // 2,
557
+ cv2.BORDER_CONSTANT,
558
+ value=border_value,
559
+ )
560
+
561
+
562
+ def resize_video_cv2(
563
+ input_path: str,
564
+ output_path: str,
565
+ width: int,
566
+ height: int,
567
+ mode: str,
568
+ fallback_fps: float = 30.0,
569
+ ) -> None:
570
+ cap = cv2.VideoCapture(input_path)
571
+ if not cap.isOpened():
572
+ raise RuntimeError(f"无法读取中间视频:{input_path}")
573
+ fps = float(cap.get(cv2.CAP_PROP_FPS) or fallback_fps or 30.0)
574
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v")
575
+ Path(output_path).parent.mkdir(parents=True, exist_ok=True)
576
+ writer = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
577
+ if not writer.isOpened():
578
+ cap.release()
579
+ raise RuntimeError("无法创建输出视频,请检查编码器。")
580
+
581
+ try:
582
+ while True:
583
+ ok, frame = cap.read()
584
+ if not ok:
585
+ break
586
+ frame = resize_frame(frame, width, height, mode)
587
+ if frame.shape[1] != width or frame.shape[0] != height:
588
+ frame = cv2.resize(frame, (width, height), interpolation=cv2.INTER_LANCZOS4)
589
+ if frame.shape[2] == 4:
590
+ frame = cv2.cvtColor(frame, cv2.COLOR_BGRA2BGR)
591
+ writer.write(frame)
592
+ finally:
593
+ cap.release()
594
+ writer.release()
595
+
596
+
597
+ def resize_video_ffmpeg(
598
+ input_path: str,
599
+ output_path: str,
600
+ width: int,
601
+ height: int,
602
+ mode: str,
603
+ ) -> None:
604
+ if mode == "stretch":
605
+ vf = f"scale={width}:{height}:flags=lanczos"
606
+ elif mode == "crop":
607
+ vf = (
608
+ f"scale={width}:{height}:flags=lanczos:force_original_aspect_ratio=increase,"
609
+ f"crop={width}:{height}"
610
+ )
611
+ else:
612
+ vf = (
613
+ f"scale={width}:{height}:flags=lanczos:force_original_aspect_ratio=decrease,"
614
+ f"pad={width}:{height}:(ow-iw)/2:(oh-ih)/2:color=black"
615
+ )
616
+
617
+ cmd = [
618
+ "ffmpeg",
619
+ "-y",
620
+ "-i",
621
+ input_path,
622
+ "-vf",
623
+ vf,
624
+ "-an",
625
+ "-c:v",
626
+ "libx264",
627
+ "-pix_fmt",
628
+ "yuv420p",
629
+ "-movflags",
630
+ "+faststart",
631
+ output_path,
632
+ ]
633
+ subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
634
+
635
+
636
+ def finalize_output(job: Dict[str, Any]) -> Dict[str, Any]:
637
+ raw_output = Path(job["raw_output"])
638
+ final_output = Path(job["final_output"])
639
+ target_w = int(job["target_width"])
640
+ target_h = int(job["target_height"])
641
+ resize_mode = str(job["resize_mode"])
642
+
643
+ if not raw_output.exists():
644
+ raise RuntimeError("SeedVR2 已运行,但没有找到输出文件。")
645
+
646
+ raw_meta = probe_media(str(raw_output))
647
+ already_exact = (
648
+ raw_meta["width"] == target_w
649
+ and raw_meta["height"] == target_h
650
+ )
651
+
652
+ if already_exact:
653
+ shutil.move(str(raw_output), str(final_output))
654
+ elif job["kind"] == "image":
655
+ resize_image(str(raw_output), str(final_output), target_w, target_h, resize_mode)
656
+ else:
657
+ try:
658
+ if shutil.which("ffmpeg"):
659
+ resize_video_ffmpeg(str(raw_output), str(final_output), target_w, target_h, resize_mode)
660
+ else:
661
+ resize_video_cv2(
662
+ str(raw_output),
663
+ str(final_output),
664
+ target_w,
665
+ target_h,
666
+ resize_mode,
667
+ fallback_fps=float(job.get("fps", 30.0) or 30.0),
668
+ )
669
+ except subprocess.CalledProcessError as exc:
670
+ raise RuntimeError(tail_text(exc.stderr or str(exc), 2500)) from exc
671
+
672
+ final_meta = probe_media(str(final_output))
673
+ return {
674
+ "raw_width": raw_meta["width"],
675
+ "raw_height": raw_meta["height"],
676
+ "final_width": final_meta["width"],
677
+ "final_height": final_meta["height"],
678
+ "path": str(final_output),
679
+ }
680
+
681
+
682
+ def run_seedvr_job_core(job: Dict[str, Any]) -> Tuple[str, str]:
683
+ if not job:
684
+ raise gr.Error("任务状态为空,请重新点击运行。")
685
+
686
+ ensure_seedvr_backend()
687
+ cmd = build_cli_command(job)
688
+ env = os.environ.copy()
689
+ env.setdefault("PYTHONUNBUFFERED", "1")
690
+
691
+ proc = subprocess.run(
692
+ cmd,
693
+ cwd=str(BACKEND_DIR),
694
+ env=env,
695
+ stdout=subprocess.PIPE,
696
+ stderr=subprocess.PIPE,
697
+ text=True,
698
+ )
699
+ logs = ((proc.stdout or "") + "\n" + (proc.stderr or "")).strip()
700
+ if proc.returncode != 0:
701
+ raise gr.Error("SeedVR2 运行失败:\n\n" + tail_text(logs, 5000))
702
+
703
+ result_meta = finalize_output(job)
704
+ summary_lines = [
705
+ f"任务完成:{job['job_id']}",
706
+ f"模型:{job['model_repo']} / {job['model_file']}",
707
+ f"原始输入:{job['source_width']}x{job['source_height']}",
708
+ f"SeedVR2 直接输出:{result_meta['raw_width']}x{result_meta['raw_height']}",
709
+ f"最终输出:{result_meta['final_width']}x{result_meta['final_height']}",
710
+ f"尺寸策略:{RESIZE_MODE_LABELS[job['resize_mode']]}",
711
+ f"输出文件:{result_meta['path']}",
712
+ ]
713
+ if job["kind"] == "video":
714
+ summary_lines.append(
715
+ f"视频参数:batch_size={job['batch_size']} / temporal_overlap={job['temporal_overlap']} / chunk_size={job['chunk_size']}"
716
+ )
717
+ summary_lines.append("")
718
+ summary_lines.append("执行日志(末尾截断):")
719
+ summary_lines.append(tail_text(logs, 5000) or "<无日志>")
720
+ return result_meta["path"], "\n".join(summary_lines)
721
+
722
+
723
+ @spaces.GPU(duration=estimate_job_duration)
724
+ def run_image_job(job: Dict[str, Any]):
725
+ output_path, summary = run_seedvr_job_core(job)
726
+ return output_path, output_path, summary
727
+
728
+
729
+ @spaces.GPU(duration=estimate_job_duration)
730
+ def run_video_job(job: Dict[str, Any]):
731
+ output_path, summary = run_seedvr_job_core(job)
732
+ return output_path, output_path, summary
733
+
734
+
735
+ def prepare_image_job(
736
+ image_path: str,
737
+ model_repo: str,
738
+ model_file: str,
739
+ scale_factor: Any,
740
+ out_w: Any,
741
+ out_h: Any,
742
+ resize_mode: str,
743
+ color_correction: str,
744
+ ):
745
+ return build_job(
746
+ input_path=image_path,
747
+ model_repo=model_repo,
748
+ model_file=model_file,
749
+ scale_factor=scale_factor,
750
+ out_w=out_w,
751
+ out_h=out_h,
752
+ resize_mode=resize_mode,
753
+ color_correction=color_correction,
754
+ expected_kind="image",
755
+ )
756
+
757
+
758
+ def prepare_video_job(
759
+ video_path: str,
760
+ model_repo: str,
761
+ model_file: str,
762
+ scale_factor: Any,
763
+ out_w: Any,
764
+ out_h: Any,
765
+ resize_mode: str,
766
+ color_correction: str,
767
+ batch_size: int,
768
+ temporal_overlap: int,
769
+ chunk_size: Any,
770
+ ):
771
+ return build_job(
772
+ input_path=video_path,
773
+ model_repo=model_repo,
774
+ model_file=model_file,
775
+ scale_factor=scale_factor,
776
+ out_w=out_w,
777
+ out_h=out_h,
778
+ resize_mode=resize_mode,
779
+ color_correction=color_correction,
780
+ expected_kind="video",
781
+ batch_size=batch_size,
782
+ temporal_overlap=temporal_overlap,
783
+ chunk_size=chunk_size,
784
+ )
785
+
786
+
787
+ def on_repo_change(repo_id: str):
788
+ return update_model_dropdown(repo_id, force=False)
789
+
790
+
791
+ def on_repo_refresh(repo_id: str):
792
+ return update_model_dropdown(repo_id, force=True)
793
+
794
+
795
+ INITIAL_REPO = "numz/SeedVR2_comfyUI"
796
+ INITIAL_MODELS = fetch_models_from_repo(INITIAL_REPO)
797
+ INITIAL_MODEL = choose_default_model(INITIAL_REPO, INITIAL_MODELS)
798
+
799
+
800
+ with gr.Blocks(title="SeedVR2 ZeroGPU Space", fill_width=True) as demo:
801
+ gr.Markdown(
802
+ "# SeedVR2 ZeroGPU 超分\n"
803
+ "使用官方 `ComfyUI-SeedVR2_VideoUpscaler` CLI 作为后端,支持图片/视频、"
804
+ "`numz/SeedVR2_comfyUI` 的 `.safetensors` 与 `cmeka/SeedVR2-GGUF` 的 `.gguf`。\n\n"
805
+ "- **超分倍率**:当输出宽高为空时生效\n"
806
+ "- **自定义输出分辨率**:宽高可都填,也可只填一个\n"
807
+ "- **输出尺寸策略**:支持补边 / 裁切 / 拉伸\n"
808
+ "- **ZeroGPU**:模型下载和文件准备走 CPU,真正推理阶段才申请 GPU"
809
+ )
810
+
811
+ with gr.Tab("图片"):
812
+ image_job_state = gr.State()
813
+ with gr.Row():
814
+ image_input = gr.File(
815
+ label="上传图片",
816
+ file_count="single",
817
+ type="filepath",
818
+ file_types=sorted(IMAGE_EXTS),
819
+ )
820
+ image_preview = gr.Image(label="输出预览", type="filepath")
821
+
822
+ with gr.Row():
823
+ image_repo = gr.Dropdown(
824
+ label="模型仓库",
825
+ choices=list(MODEL_SOURCES.keys()),
826
+ value=INITIAL_REPO,
827
+ )
828
+ image_model = gr.Dropdown(
829
+ label="模型文件",
830
+ choices=INITIAL_MODELS,
831
+ value=INITIAL_MODEL,
832
+ allow_custom_value=False,
833
+ )
834
+ image_refresh = gr.Button("刷新模型列表")
835
+
836
+ with gr.Row():
837
+ image_scale = gr.Number(label="超分倍率", value=2.0, precision=3)
838
+ image_out_w = gr.Number(label="输出宽度(可选)", value=None, precision=0)
839
+ image_out_h = gr.Number(label="输出高度(可选)", value=None, precision=0)
840
+
841
+ with gr.Row():
842
+ image_resize_mode = gr.Dropdown(
843
+ label="输出尺寸策略",
844
+ choices=[
845
+ ("保持比例并补边", "pad"),
846
+ ("保持比例并裁切", "crop"),
847
+ ("强制拉伸到目标尺寸", "stretch"),
848
+ ],
849
+ value="pad",
850
+ )
851
+ image_color = gr.Dropdown(
852
+ label="颜色校正",
853
+ choices=["lab", "wavelet", "wavelet_adaptive", "hsv", "adain", "none"],
854
+ value="lab",
855
+ )
856
+
857
+ image_run = gr.Button("开始图片超分", variant="primary")
858
+ image_file_out = gr.File(label="下载结果")
859
+ image_status = gr.Textbox(label="运行日志", lines=18)
860
+
861
+ image_repo.change(on_repo_change, inputs=image_repo, outputs=image_model)
862
+ image_refresh.click(on_repo_refresh, inputs=image_repo, outputs=image_model)
863
+ image_run.click(
864
+ prepare_image_job,
865
+ inputs=[
866
+ image_input,
867
+ image_repo,
868
+ image_model,
869
+ image_scale,
870
+ image_out_w,
871
+ image_out_h,
872
+ image_resize_mode,
873
+ image_color,
874
+ ],
875
+ outputs=[image_job_state, image_status],
876
+ ).then(
877
+ run_image_job,
878
+ inputs=image_job_state,
879
+ outputs=[image_preview, image_file_out, image_status],
880
+ )
881
+
882
+ with gr.Tab("视频"):
883
+ video_job_state = gr.State()
884
+ with gr.Row():
885
+ video_input = gr.File(
886
+ label="上传视频",
887
+ file_count="single",
888
+ type="filepath",
889
+ file_types=sorted(VIDEO_EXTS),
890
+ )
891
+ video_preview = gr.Video(label="输出预览")
892
+
893
+ with gr.Row():
894
+ video_repo = gr.Dropdown(
895
+ label="模型仓库",
896
+ choices=list(MODEL_SOURCES.keys()),
897
+ value=INITIAL_REPO,
898
+ )
899
+ video_model = gr.Dropdown(
900
+ label="模型文件",
901
+ choices=INITIAL_MODELS,
902
+ value=INITIAL_MODEL,
903
+ allow_custom_value=False,
904
+ )
905
+ video_refresh = gr.Button("刷新模型列表")
906
+
907
+ with gr.Row():
908
+ video_scale = gr.Number(label="超分倍率", value=2.0, precision=3)
909
+ video_out_w = gr.Number(label="输出宽度(可选)", value=None, precision=0)
910
+ video_out_h = gr.Number(label="输出高度(可选)", value=None, precision=0)
911
+
912
+ with gr.Row():
913
+ video_resize_mode = gr.Dropdown(
914
+ label="输出尺寸策略",
915
+ choices=[
916
+ ("保持比例并补边", "pad"),
917
+ ("保持比例并裁切", "crop"),
918
+ ("强制拉伸到目标尺寸", "stretch"),
919
+ ],
920
+ value="pad",
921
+ )
922
+ video_color = gr.Dropdown(
923
+ label="颜色校正",
924
+ choices=["lab", "wavelet", "wavelet_adaptive", "hsv", "adain", "none"],
925
+ value="lab",
926
+ )
927
+
928
+ with gr.Row():
929
+ video_batch = gr.Dropdown(
930
+ label="batch_size(必须是 4n+1)",
931
+ choices=[1, 5, 9, 13, 17, 21, 25, 33],
932
+ value=5,
933
+ )
934
+ video_overlap = gr.Slider(
935
+ label="temporal_overlap",
936
+ minimum=0,
937
+ maximum=16,
938
+ step=1,
939
+ value=3,
940
+ )
941
+ video_chunk = gr.Number(
942
+ label="chunk_size(0=整段加载)",
943
+ value=0,
944
+ precision=0,
945
+ )
946
+
947
+ video_run = gr.Button("开始视频超分", variant="primary")
948
+ video_file_out = gr.File(label="下载结果")
949
+ video_status = gr.Textbox(label="运行日志", lines=20)
950
+
951
+ video_repo.change(on_repo_change, inputs=video_repo, outputs=video_model)
952
+ video_refresh.click(on_repo_refresh, inputs=video_repo, outputs=video_model)
953
+ video_run.click(
954
+ prepare_video_job,
955
+ inputs=[
956
+ video_input,
957
+ video_repo,
958
+ video_model,
959
+ video_scale,
960
+ video_out_w,
961
+ video_out_h,
962
+ video_resize_mode,
963
+ video_color,
964
+ video_batch,
965
+ video_overlap,
966
+ video_chunk,
967
+ ],
968
+ outputs=[video_job_state, video_status],
969
+ ).then(
970
+ run_video_job,
971
+ inputs=video_job_state,
972
+ outputs=[video_preview, video_file_out, video_status],
973
+ )
974
+
975
+ gr.Markdown(
976
+ "### 说明\n"
977
+ "1. `numz/SeedVR2_comfyUI` 的 VAE 会自动下载到 `models/SEEDVR2`。\n"
978
+ "2. `cmeka/SeedVR2-GGUF` 只提供 GGUF DiT,因此仍会同时下载官方 VAE。\n"
979
+ "3. 若仓库后续新增模型,点 **刷新模型列表** 就能拉到最新文件名。\n"
980
+ "4. 没有填写输出宽高时,使用倍率计算目标分辨率;填写宽/高后会优先按宽高输出。"
981
+ )
982
+
983
+ demo.queue(default_concurrency_limit=1, max_size=16)
984
+
985
+ if __name__ == "__main__":
986
+ demo.launch()
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio>=5.0.0
2
+ spaces
3
+ huggingface_hub>=0.30.0
4
+ Pillow>=10.0.0
5
+ opencv-python-headless
6
+ torch
7
+ torchvision
8
+ safetensors
9
+ numpy
10
+ tqdm
11
+ psutil
12
+ einops
13
+ omegaconf>=2.3.0
14
+ diffusers>=0.33.1
15
+ peft>=0.17.0
16
+ rotary_embedding_torch>=0.5.3
17
+ gguf
18
+ matplotlib