Darwin-9B-Opus

Running on A10G

SeaWolf-AI commited on 4 days ago

Commit

d75309f

verified ·

1 Parent(s): cfad124

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -87,9 +87,25 @@ N_CTX        = int(os.getenv("N_CTX", "32768"))
 print(f"[MODEL] Loading {REPO_ID} ...", flush=True)
 print(f"[MODEL] n_gpu_layers={N_GPU_LAYERS}, n_ctx={N_CTX}", flush=True)
-llm = Llama.from_pretrained(
-    repo_id=REPO_ID,
-    filename=GGUF_FILE,
     n_gpu_layers=N_GPU_LAYERS,
     n_ctx=N_CTX,
     verbose=True,

 print(f"[MODEL] Loading {REPO_ID} ...", flush=True)
 print(f"[MODEL] n_gpu_layers={N_GPU_LAYERS}, n_ctx={N_CTX}", flush=True)
+# ── Split GGUF: 3개 샤드 전부 다운로드 필수 ──
+from huggingface_hub import hf_hub_download
+GGUF_SHARDS = [
+    "merged_109838c2-q8_0-00001-of-00003.gguf",
+    "merged_109838c2-q8_0-00002-of-00003.gguf",
+    "merged_109838c2-q8_0-00003-of-00003.gguf",
+]
+shard_paths = []
+for shard in GGUF_SHARDS:
+    print(f"[MODEL] Downloading {shard} ...", flush=True)
+    p = hf_hub_download(repo_id=REPO_ID, filename=shard)
+    shard_paths.append(p)
+    print(f"[MODEL]   → {p}", flush=True)
+# 첫 번째 샤드 경로로 로드 (llama.cpp가 같은 폴더의 나머지 자동 감지)
+llm = Llama(
+    model_path=shard_paths[0],
     n_gpu_layers=N_GPU_LAYERS,
     n_ctx=N_CTX,
     verbose=True,