# vLLM and dependencies vllm==0.10.1 torch==2.7.1 transformers==4.56.2 tokenizers==0.22.1 xformers==0.0.31 # FastAPI and server fastapi==0.117.1 uvicorn==0.37.0 pydantic==2.11.9 starlette==0.48.0 # Core dependencies numpy==2.2.6 requests==2.32.5 aiohttp==3.12.15 Jinja2==3.1.6 packaging==25.0 filelock==3.19.1 huggingface-hub==0.35.1 tqdm==4.67.1 PyYAML==6.0.2 # vLLM specific ray==2.49.2 triton==3.3.1 xgrammar==0.1.21 outlines_core==0.2.10 compressed-tensors==0.10.2 safetensors==0.6.2 sentencepiece==0.2.1 # NVIDIA CUDA packages nvidia-cublas-cu12==12.6.4.1 nvidia-cuda-cupti-cu12==12.6.80 nvidia-cuda-nvrtc-cu12==12.6.77 nvidia-cuda-runtime-cu12==12.6.77 nvidia-cudnn-cu12==9.5.1.17 nvidia-cufft-cu12==11.3.0.4 nvidia-curand-cu12==10.3.7.77 nvidia-cusolver-cu12==11.7.1.2 nvidia-cusparse-cu12==12.5.4.2 nvidia-nccl-cu12==2.26.2 nvidia-nvjitlink-cu12==12.6.85 # OpenAI compatibility openai==1.109.0 tiktoken==0.11.0 typing_extensions==4.14.1 einops==0.8.1 fsspec==2025.9.0 psutil==7.1.0