Spaces:
Running on Zero
Running on Zero
fix: simplify to single @spaces.GPU function, remove inference_mode decorator, fix type hint
Browse files
README.md
CHANGED
|
@@ -6,7 +6,7 @@ colorTo: indigo
|
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 5.23.0
|
| 8 |
app_file: app.py
|
| 9 |
-
hardware:
|
| 10 |
pinned: true
|
| 11 |
license: apache-2.0
|
| 12 |
models:
|
|
|
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 5.23.0
|
| 8 |
app_file: app.py
|
| 9 |
+
hardware: zero-a10g
|
| 10 |
pinned: true
|
| 11 |
license: apache-2.0
|
| 12 |
models:
|
app.py
CHANGED
|
@@ -4,6 +4,7 @@ AD-Copilot Demo: Comparison-Aware Anomaly Detection with Vision-Language Model
|
|
| 4 |
|
| 5 |
import os
|
| 6 |
import traceback
|
|
|
|
| 7 |
import gradio as gr
|
| 8 |
import torch
|
| 9 |
from transformers import AutoModelForImageTextToText, AutoProcessor
|
|
@@ -11,8 +12,8 @@ from qwen_vl_utils import process_vision_info
|
|
| 11 |
from PIL import Image
|
| 12 |
|
| 13 |
# ---------------------------------------------------------------------------
|
| 14 |
-
# Model loading
|
| 15 |
-
#
|
| 16 |
# ---------------------------------------------------------------------------
|
| 17 |
MODEL_ID = "jiang-cc/AD-Copilot"
|
| 18 |
|
|
@@ -26,7 +27,6 @@ processor = AutoProcessor.from_pretrained(
|
|
| 26 |
model = AutoModelForImageTextToText.from_pretrained(
|
| 27 |
MODEL_ID,
|
| 28 |
torch_dtype=torch.bfloat16,
|
| 29 |
-
device_map="auto",
|
| 30 |
trust_remote_code=True,
|
| 31 |
).eval()
|
| 32 |
|
|
@@ -34,11 +34,12 @@ model = AutoModelForImageTextToText.from_pretrained(
|
|
| 34 |
# ---------------------------------------------------------------------------
|
| 35 |
# Inference
|
| 36 |
# ---------------------------------------------------------------------------
|
|
|
|
| 37 |
def predict(
|
| 38 |
reference_image: Image.Image,
|
| 39 |
test_image: Image.Image,
|
| 40 |
prompt: str,
|
| 41 |
-
max_new_tokens:
|
| 42 |
):
|
| 43 |
if reference_image is None or test_image is None:
|
| 44 |
return "Please upload both a reference (good) image and a test image."
|
|
@@ -68,26 +69,25 @@ def predict(
|
|
| 68 |
)
|
| 69 |
image_inputs, video_inputs = process_vision_info(messages)
|
| 70 |
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
)[0]
|
| 91 |
return output
|
| 92 |
except Exception as e:
|
| 93 |
tb = traceback.format_exc()
|
|
|
|
| 4 |
|
| 5 |
import os
|
| 6 |
import traceback
|
| 7 |
+
import spaces
|
| 8 |
import gradio as gr
|
| 9 |
import torch
|
| 10 |
from transformers import AutoModelForImageTextToText, AutoProcessor
|
|
|
|
| 12 |
from PIL import Image
|
| 13 |
|
| 14 |
# ---------------------------------------------------------------------------
|
| 15 |
+
# Model loading (happens once at Space startup; weights stay on CPU until
|
| 16 |
+
# @spaces.GPU moves them to GPU on demand)
|
| 17 |
# ---------------------------------------------------------------------------
|
| 18 |
MODEL_ID = "jiang-cc/AD-Copilot"
|
| 19 |
|
|
|
|
| 27 |
model = AutoModelForImageTextToText.from_pretrained(
|
| 28 |
MODEL_ID,
|
| 29 |
torch_dtype=torch.bfloat16,
|
|
|
|
| 30 |
trust_remote_code=True,
|
| 31 |
).eval()
|
| 32 |
|
|
|
|
| 34 |
# ---------------------------------------------------------------------------
|
| 35 |
# Inference
|
| 36 |
# ---------------------------------------------------------------------------
|
| 37 |
+
@spaces.GPU(duration=120)
|
| 38 |
def predict(
|
| 39 |
reference_image: Image.Image,
|
| 40 |
test_image: Image.Image,
|
| 41 |
prompt: str,
|
| 42 |
+
max_new_tokens: float,
|
| 43 |
):
|
| 44 |
if reference_image is None or test_image is None:
|
| 45 |
return "Please upload both a reference (good) image and a test image."
|
|
|
|
| 69 |
)
|
| 70 |
image_inputs, video_inputs = process_vision_info(messages)
|
| 71 |
|
| 72 |
+
inputs = processor(
|
| 73 |
+
text=[text],
|
| 74 |
+
images=image_inputs,
|
| 75 |
+
videos=video_inputs,
|
| 76 |
+
padding=True,
|
| 77 |
+
return_tensors="pt",
|
| 78 |
+
).to(model.device)
|
| 79 |
+
|
| 80 |
+
generated_ids = model.generate(
|
| 81 |
+
**inputs, max_new_tokens=max_new_tokens, do_sample=False
|
| 82 |
+
)
|
| 83 |
+
generated_ids_trimmed = [
|
| 84 |
+
out[len(inp) :] for inp, out in zip(inputs.input_ids, generated_ids)
|
| 85 |
+
]
|
| 86 |
+
output = processor.batch_decode(
|
| 87 |
+
generated_ids_trimmed,
|
| 88 |
+
skip_special_tokens=True,
|
| 89 |
+
clean_up_tokenization_spaces=False,
|
| 90 |
+
)[0]
|
|
|
|
| 91 |
return output
|
| 92 |
except Exception as e:
|
| 93 |
tb = traceback.format_exc()
|