SeaWolf-AI commited on
Commit
d75309f
ยท
verified ยท
1 Parent(s): cfad124

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -3
app.py CHANGED
@@ -87,9 +87,25 @@ N_CTX = int(os.getenv("N_CTX", "32768"))
87
  print(f"[MODEL] Loading {REPO_ID} ...", flush=True)
88
  print(f"[MODEL] n_gpu_layers={N_GPU_LAYERS}, n_ctx={N_CTX}", flush=True)
89
 
90
- llm = Llama.from_pretrained(
91
- repo_id=REPO_ID,
92
- filename=GGUF_FILE,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  n_gpu_layers=N_GPU_LAYERS,
94
  n_ctx=N_CTX,
95
  verbose=True,
 
87
  print(f"[MODEL] Loading {REPO_ID} ...", flush=True)
88
  print(f"[MODEL] n_gpu_layers={N_GPU_LAYERS}, n_ctx={N_CTX}", flush=True)
89
 
90
+ # โ”€โ”€ Split GGUF: 3๊ฐœ ์ƒค๋“œ ์ „๋ถ€ ๋‹ค์šด๋กœ๋“œ ํ•„์ˆ˜ โ”€โ”€
91
+ from huggingface_hub import hf_hub_download
92
+
93
+ GGUF_SHARDS = [
94
+ "merged_109838c2-q8_0-00001-of-00003.gguf",
95
+ "merged_109838c2-q8_0-00002-of-00003.gguf",
96
+ "merged_109838c2-q8_0-00003-of-00003.gguf",
97
+ ]
98
+
99
+ shard_paths = []
100
+ for shard in GGUF_SHARDS:
101
+ print(f"[MODEL] Downloading {shard} ...", flush=True)
102
+ p = hf_hub_download(repo_id=REPO_ID, filename=shard)
103
+ shard_paths.append(p)
104
+ print(f"[MODEL] โ†’ {p}", flush=True)
105
+
106
+ # ์ฒซ ๋ฒˆ์งธ ์ƒค๋“œ ๊ฒฝ๋กœ๋กœ ๋กœ๋“œ (llama.cpp๊ฐ€ ๊ฐ™์€ ํด๋”์˜ ๋‚˜๋จธ์ง€ ์ž๋™ ๊ฐ์ง€)
107
+ llm = Llama(
108
+ model_path=shard_paths[0],
109
  n_gpu_layers=N_GPU_LAYERS,
110
  n_ctx=N_CTX,
111
  verbose=True,