localgenai/pyinfra/framework/compose/vllm.yml

# vLLM, ROCm backend.
#
# NOTE: vLLM's official ROCm support targets datacenter cards (MI300X /
# gfx942). Strix Halo is gfx1151 — support varies by image tag and
# release. If `rocm/vllm:latest` doesn't run on this iGPU, try
# `rocm/vllm-dev:nightly` or build from source against ROCm 7.x.
services:
  vllm:
    image: rocm/vllm:latest
    container_name: vllm
    restart: unless-stopped
    devices:
      - /dev/kfd:/dev/kfd
      - /dev/dri:/dev/dri
    cap_add:
      - SYS_PTRACE
    security_opt:
      - seccomp=unconfined
    # Numeric GIDs of host's video (44) and render (991) groups — names
    # don't exist inside the container.
    group_add:
      - "44"
      - "991"
    shm_size: 16g
    ipc: host
    volumes:
      - /models:/models:ro
    ports:
      - "8000:8000"
    command:
      - --model
      - /models/REPLACE/ME
      - --host
      - 0.0.0.0
      - --port
      - "8000"