viraman commited on
Commit
e4e8594
·
verified ·
1 Parent(s): a9a15de

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +2 -2
README.md CHANGED
@@ -294,7 +294,7 @@ print(response.choices[0].message.content)
294
  Launch the model using TRT-LLM
295
 
296
  ```shell
297
- docker run -v /home/root/.cache/huggingface/:/root/.cache/huggingface/ --rm --ulimit memlock=-1 --ulimit stack=67108864 --gpus=all --ipc=host --network host -d -e MODEL=NVIDIA-Nemotron-3-Nano-4B-BF16 -e HF_TOKEN=$HF_TOKEN nvcr.io/nvidia/tensorrt-llm/release:1.3.0rc6 bash -c '
298
  cat > /tmp/extra-llm-api-config.yml <<EOF
299
  kv_cache_config:
300
  dtype: "auto"
@@ -308,7 +308,7 @@ moe_config:
308
  EOF
309
 
310
  trtllm-serve \
311
- NVIDIA-Nemotron-3-Nano-4B-BF16 \
312
  --host 0.0.0.0 \
313
  --port 8123 \
314
  --max_batch_size 32 \
 
294
  Launch the model using TRT-LLM
295
 
296
  ```shell
297
+ docker run -v /home/root/.cache/huggingface/:/root/.cache/huggingface/ --rm --ulimit memlock=-1 --ulimit stack=67108864 --gpus=all --ipc=host --network host -d -e MODEL=nvidia/NVIDIA-Nemotron-3-Nano-4B-BF16 -e HF_TOKEN=$HF_TOKEN nvcr.io/nvidia/tensorrt-llm/release:1.3.0rc6 bash -c '
298
  cat > /tmp/extra-llm-api-config.yml <<EOF
299
  kv_cache_config:
300
  dtype: "auto"
 
308
  EOF
309
 
310
  trtllm-serve \
311
+ nvidia/NVIDIA-Nemotron-3-Nano-4B-BF16 \
312
  --host 0.0.0.0 \
313
  --port 8123 \
314
  --max_batch_size 32 \