Files
localgenai/pyinfra/framework/compose/vllm.yml

37 lines
910 B
YAML
Raw Normal View History

# vLLM, ROCm backend.
#
# NOTE: vLLM's official ROCm support targets datacenter cards (MI300X /
# gfx942). Strix Halo is gfx1151 — support varies by image tag and
# release. If `rocm/vllm:latest` doesn't run on this iGPU, try
# `rocm/vllm-dev:nightly` or build from source against ROCm 7.x.
services:
vllm:
image: rocm/vllm:latest
container_name: vllm
restart: unless-stopped
devices:
- /dev/kfd:/dev/kfd
- /dev/dri:/dev/dri
cap_add:
- SYS_PTRACE
security_opt:
- seccomp=unconfined
# Numeric GIDs of host's video (44) and render (991) groups — names
# don't exist inside the container.
group_add:
- "44"
- "991"
shm_size: 16g
ipc: host
volumes:
- /models:/models:ro
ports:
- "8000:8000"
command:
- --model
- /models/REPLACE/ME
- --host
- 0.0.0.0
- --port
- "8000"