nvidia
/

DeepSeek-R1-NVFP4

Text Generation

8-bit precision

Model card Files Files and versions

zhiyucheng commited on Feb 24

Commit

004f3a4

·

1 Parent(s): 3aaff40

update readme

Files changed (1) hide show

README.md +1 -5

README.md CHANGED Viewed

@@ -71,13 +71,9 @@ To deploy the quantized FP4 checkpoint with [TensorRT-LLM](https://github.com/NV
 ```
 from tensorrt_llm import SamplingParams
 from tensorrt_llm._torch import LLM
-from tensorrt_llm._torch.pyexecutor.config import PyTorchConfig
 def main():
-    pytorch_config = PyTorchConfig()
     prompts = [
         "Hello, my name is",
         "The president of the United States is",
@@ -86,7 +82,7 @@ def main():
     ]
     sampling_params = SamplingParams(max_tokens=32)
-    llm = LLM(model="nvidia/DeepSeek-R1-FP4", tensor_parallel_size=8, pytorch_backend_config=pytorch_config, enable_attention_dp=True)
     outputs = llm.generate(prompts, sampling_params)

 ```
 from tensorrt_llm import SamplingParams
 from tensorrt_llm._torch import LLM
 def main():
     prompts = [
         "Hello, my name is",
         "The president of the United States is",
     ]
     sampling_params = SamplingParams(max_tokens=32)
+    llm = LLM(model="nvidia/DeepSeek-R1-FP4", tensor_parallel_size=8, enable_attention_dp=True)
     outputs = llm.generate(prompts, sampling_params)