Darwin-9B-Opus

Running on A10G

SeaWolf-AI commited on 5 days ago

Commit

cfad124

verified ·

1 Parent(s): 3cafd3b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -38,7 +38,7 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 # 1.  MODEL CONFIG
 # ══════════════════════════════════════════════════════════════════════════════
 REPO_ID    = "FINAL-Bench/Darwin-35B-A3B-Opus-Q8-GGUF"
-GGUF_FILE  = "darwin-35b-a3b-opus-q8_0-00001-of-00003.gguf"
 MODEL_NAME = "Darwin-35B-A3B-Opus-Q8"
 MODEL_CAP  = {
     "arch": "MoE", "active": "3B / 35B total",
@@ -63,7 +63,8 @@ def detect_gpu_layers() -> int:
     try:
         import torch
         if torch.cuda.is_available():
-            vram_gb = torch.cuda.get_device_properties(0).total_mem / (1024**3)
             print(f"[GPU] {torch.cuda.get_device_name(0)} — {vram_gb:.1f} GB VRAM", flush=True)
             if vram_gb >= 40:      # A100 40GB — 전체 레이어 GPU
                 return -1          # -1 = all layers

 # 1.  MODEL CONFIG
 # ══════════════════════════════════════════════════════════════════════════════
 REPO_ID    = "FINAL-Bench/Darwin-35B-A3B-Opus-Q8-GGUF"
+GGUF_FILE  = "merged_109838c2-q8_0-00001-of-00003.gguf"
 MODEL_NAME = "Darwin-35B-A3B-Opus-Q8"
 MODEL_CAP  = {
     "arch": "MoE", "active": "3B / 35B total",
     try:
         import torch
         if torch.cuda.is_available():
+            props = torch.cuda.get_device_properties(0)
+            vram_gb = (getattr(props, 'total_memory', 0) or getattr(props, 'total_mem', 0)) / (1024**3)
             print(f"[GPU] {torch.cuda.get_device_name(0)} — {vram_gb:.1f} GB VRAM", flush=True)
             if vram_gb >= 40:      # A100 40GB — 전체 레이어 GPU
                 return -1          # -1 = all layers