Update README.md
Browse files
README.md
CHANGED
|
@@ -43,7 +43,7 @@ torchrun --standalone --nproc_per_node=8 run_vllm_fp8.py
|
|
| 43 |
from vllm import LLM, SamplingParams
|
| 44 |
prompt = "Write me an essay about bear and knight"
|
| 45 |
|
| 46 |
-
model_name="
|
| 47 |
tp=8 # 8 GPUs
|
| 48 |
|
| 49 |
model = LLM(model=model_name, tensor_parallel_size=tp, max_model_len=8192, trust_remote_code=True, dtype="float16", quantization="fp8", quantized_weights_path="/llama.safetensors")
|
|
@@ -59,7 +59,7 @@ print(result)
|
|
| 59 |
|
| 60 |
```sh
|
| 61 |
# 8 GPUs
|
| 62 |
-
torchrun --standalone --nproc_per_node=8
|
| 63 |
```
|
| 64 |
|
| 65 |
```python
|
|
@@ -67,7 +67,7 @@ torchrun --standalone --nproc_per_node=8 run_vllm_fp8.py
|
|
| 67 |
from vllm import LLM, SamplingParams
|
| 68 |
prompt = "Write me an essay about bear and knight"
|
| 69 |
|
| 70 |
-
model_name="
|
| 71 |
tp=8 # 8 GPUs
|
| 72 |
model = LLM(model=model_name, tensor_parallel_size=tp, max_model_len=8192, trust_remote_code=True, dtype="bfloat16")
|
| 73 |
sampling_params = SamplingParams(
|
|
|
|
| 43 |
from vllm import LLM, SamplingParams
|
| 44 |
prompt = "Write me an essay about bear and knight"
|
| 45 |
|
| 46 |
+
model_name="models--meta-llama--Meta-Llama-3.1-405B-Instruct/snapshots/069992c75aed59df00ec06c17177e76c63296a26/"
|
| 47 |
tp=8 # 8 GPUs
|
| 48 |
|
| 49 |
model = LLM(model=model_name, tensor_parallel_size=tp, max_model_len=8192, trust_remote_code=True, dtype="float16", quantization="fp8", quantized_weights_path="/llama.safetensors")
|
|
|
|
| 59 |
|
| 60 |
```sh
|
| 61 |
# 8 GPUs
|
| 62 |
+
torchrun --standalone --nproc_per_node=8 run_vllm_fp16.py
|
| 63 |
```
|
| 64 |
|
| 65 |
```python
|
|
|
|
| 67 |
from vllm import LLM, SamplingParams
|
| 68 |
prompt = "Write me an essay about bear and knight"
|
| 69 |
|
| 70 |
+
model_name="models--meta-llama--Meta-Llama-3.1-405B-Instruct/snapshots/069992c75aed59df00ec06c17177e76c63296a26/"
|
| 71 |
tp=8 # 8 GPUs
|
| 72 |
model = LLM(model=model_name, tensor_parallel_size=tp, max_model_len=8192, trust_remote_code=True, dtype="bfloat16")
|
| 73 |
sampling_params = SamplingParams(
|