Commit
·
004f3a4
1
Parent(s):
3aaff40
update readme
Browse files
README.md
CHANGED
|
@@ -71,13 +71,9 @@ To deploy the quantized FP4 checkpoint with [TensorRT-LLM](https://github.com/NV
|
|
| 71 |
```
|
| 72 |
from tensorrt_llm import SamplingParams
|
| 73 |
from tensorrt_llm._torch import LLM
|
| 74 |
-
from tensorrt_llm._torch.pyexecutor.config import PyTorchConfig
|
| 75 |
-
|
| 76 |
|
| 77 |
def main():
|
| 78 |
|
| 79 |
-
pytorch_config = PyTorchConfig()
|
| 80 |
-
|
| 81 |
prompts = [
|
| 82 |
"Hello, my name is",
|
| 83 |
"The president of the United States is",
|
|
@@ -86,7 +82,7 @@ def main():
|
|
| 86 |
]
|
| 87 |
sampling_params = SamplingParams(max_tokens=32)
|
| 88 |
|
| 89 |
-
llm = LLM(model="nvidia/DeepSeek-R1-FP4", tensor_parallel_size=8,
|
| 90 |
|
| 91 |
outputs = llm.generate(prompts, sampling_params)
|
| 92 |
|
|
|
|
| 71 |
```
|
| 72 |
from tensorrt_llm import SamplingParams
|
| 73 |
from tensorrt_llm._torch import LLM
|
|
|
|
|
|
|
| 74 |
|
| 75 |
def main():
|
| 76 |
|
|
|
|
|
|
|
| 77 |
prompts = [
|
| 78 |
"Hello, my name is",
|
| 79 |
"The president of the United States is",
|
|
|
|
| 82 |
]
|
| 83 |
sampling_params = SamplingParams(max_tokens=32)
|
| 84 |
|
| 85 |
+
llm = LLM(model="nvidia/DeepSeek-R1-FP4", tensor_parallel_size=8, enable_attention_dp=True)
|
| 86 |
|
| 87 |
outputs = llm.generate(prompts, sampling_params)
|
| 88 |
|