| # vLLM and dependencies | |
| vllm==0.10.1 | |
| torch==2.7.1 | |
| transformers==4.56.2 | |
| tokenizers==0.22.1 | |
| xformers==0.0.31 | |
| # FastAPI and server | |
| fastapi==0.117.1 | |
| uvicorn==0.37.0 | |
| pydantic==2.11.9 | |
| starlette==0.48.0 | |
| # Core dependencies | |
| numpy==2.2.6 | |
| requests==2.32.5 | |
| aiohttp==3.12.15 | |
| Jinja2==3.1.6 | |
| packaging==25.0 | |
| filelock==3.19.1 | |
| huggingface-hub==0.35.1 | |
| tqdm==4.67.1 | |
| PyYAML==6.0.2 | |
| # vLLM specific | |
| ray==2.49.2 | |
| triton==3.3.1 | |
| xgrammar==0.1.21 | |
| outlines_core==0.2.10 | |
| compressed-tensors==0.10.2 | |
| safetensors==0.6.2 | |
| sentencepiece==0.2.1 | |
| # NVIDIA CUDA packages | |
| nvidia-cublas-cu12==12.6.4.1 | |
| nvidia-cuda-cupti-cu12==12.6.80 | |
| nvidia-cuda-nvrtc-cu12==12.6.77 | |
| nvidia-cuda-runtime-cu12==12.6.77 | |
| nvidia-cudnn-cu12==9.5.1.17 | |
| nvidia-cufft-cu12==11.3.0.4 | |
| nvidia-curand-cu12==10.3.7.77 | |
| nvidia-cusolver-cu12==11.7.1.2 | |
| nvidia-cusparse-cu12==12.5.4.2 | |
| nvidia-nccl-cu12==2.26.2 | |
| nvidia-nvjitlink-cu12==12.6.85 | |
| # OpenAI compatibility | |
| openai==1.109.0 | |
| tiktoken==0.11.0 | |
| typing_extensions==4.14.1 | |
| einops==0.8.1 | |
| fsspec==2025.9.0 | |
| psutil==7.1.0 | |