Spaces:
Running on A10G
Running on A10G
Update app.py
Browse files
app.py
CHANGED
|
@@ -38,7 +38,7 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
| 38 |
# 1. MODEL CONFIG
|
| 39 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 40 |
REPO_ID = "FINAL-Bench/Darwin-35B-A3B-Opus-Q8-GGUF"
|
| 41 |
-
GGUF_FILE = "
|
| 42 |
MODEL_NAME = "Darwin-35B-A3B-Opus-Q8"
|
| 43 |
MODEL_CAP = {
|
| 44 |
"arch": "MoE", "active": "3B / 35B total",
|
|
@@ -63,7 +63,8 @@ def detect_gpu_layers() -> int:
|
|
| 63 |
try:
|
| 64 |
import torch
|
| 65 |
if torch.cuda.is_available():
|
| 66 |
-
|
|
|
|
| 67 |
print(f"[GPU] {torch.cuda.get_device_name(0)} β {vram_gb:.1f} GB VRAM", flush=True)
|
| 68 |
if vram_gb >= 40: # A100 40GB β μ 체 λ μ΄μ΄ GPU
|
| 69 |
return -1 # -1 = all layers
|
|
|
|
| 38 |
# 1. MODEL CONFIG
|
| 39 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 40 |
REPO_ID = "FINAL-Bench/Darwin-35B-A3B-Opus-Q8-GGUF"
|
| 41 |
+
GGUF_FILE = "merged_109838c2-q8_0-00001-of-00003.gguf"
|
| 42 |
MODEL_NAME = "Darwin-35B-A3B-Opus-Q8"
|
| 43 |
MODEL_CAP = {
|
| 44 |
"arch": "MoE", "active": "3B / 35B total",
|
|
|
|
| 63 |
try:
|
| 64 |
import torch
|
| 65 |
if torch.cuda.is_available():
|
| 66 |
+
props = torch.cuda.get_device_properties(0)
|
| 67 |
+
vram_gb = (getattr(props, 'total_memory', 0) or getattr(props, 'total_mem', 0)) / (1024**3)
|
| 68 |
print(f"[GPU] {torch.cuda.get_device_name(0)} β {vram_gb:.1f} GB VRAM", flush=True)
|
| 69 |
if vram_gb >= 40: # A100 40GB β μ 체 λ μ΄μ΄ GPU
|
| 70 |
return -1 # -1 = all layers
|