Spaces:
Running on A10G
Running on A10G
Update app.py
Browse files
app.py
CHANGED
|
@@ -87,9 +87,25 @@ N_CTX = int(os.getenv("N_CTX", "32768"))
|
|
| 87 |
print(f"[MODEL] Loading {REPO_ID} ...", flush=True)
|
| 88 |
print(f"[MODEL] n_gpu_layers={N_GPU_LAYERS}, n_ctx={N_CTX}", flush=True)
|
| 89 |
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
n_gpu_layers=N_GPU_LAYERS,
|
| 94 |
n_ctx=N_CTX,
|
| 95 |
verbose=True,
|
|
|
|
| 87 |
print(f"[MODEL] Loading {REPO_ID} ...", flush=True)
|
| 88 |
print(f"[MODEL] n_gpu_layers={N_GPU_LAYERS}, n_ctx={N_CTX}", flush=True)
|
| 89 |
|
| 90 |
+
# โโ Split GGUF: 3๊ฐ ์ค๋ ์ ๋ถ ๋ค์ด๋ก๋ ํ์ โโ
|
| 91 |
+
from huggingface_hub import hf_hub_download
|
| 92 |
+
|
| 93 |
+
GGUF_SHARDS = [
|
| 94 |
+
"merged_109838c2-q8_0-00001-of-00003.gguf",
|
| 95 |
+
"merged_109838c2-q8_0-00002-of-00003.gguf",
|
| 96 |
+
"merged_109838c2-q8_0-00003-of-00003.gguf",
|
| 97 |
+
]
|
| 98 |
+
|
| 99 |
+
shard_paths = []
|
| 100 |
+
for shard in GGUF_SHARDS:
|
| 101 |
+
print(f"[MODEL] Downloading {shard} ...", flush=True)
|
| 102 |
+
p = hf_hub_download(repo_id=REPO_ID, filename=shard)
|
| 103 |
+
shard_paths.append(p)
|
| 104 |
+
print(f"[MODEL] โ {p}", flush=True)
|
| 105 |
+
|
| 106 |
+
# ์ฒซ ๋ฒ์งธ ์ค๋ ๊ฒฝ๋ก๋ก ๋ก๋ (llama.cpp๊ฐ ๊ฐ์ ํด๋์ ๋๋จธ์ง ์๋ ๊ฐ์ง)
|
| 107 |
+
llm = Llama(
|
| 108 |
+
model_path=shard_paths[0],
|
| 109 |
n_gpu_layers=N_GPU_LAYERS,
|
| 110 |
n_ctx=N_CTX,
|
| 111 |
verbose=True,
|