37 lines
910 B
YAML
37 lines
910 B
YAML
|
|
# vLLM, ROCm backend.
|
||
|
|
#
|
||
|
|
# NOTE: vLLM's official ROCm support targets datacenter cards (MI300X /
|
||
|
|
# gfx942). Strix Halo is gfx1151 — support varies by image tag and
|
||
|
|
# release. If `rocm/vllm:latest` doesn't run on this iGPU, try
|
||
|
|
# `rocm/vllm-dev:nightly` or build from source against ROCm 7.x.
|
||
|
|
services:
|
||
|
|
vllm:
|
||
|
|
image: rocm/vllm:latest
|
||
|
|
container_name: vllm
|
||
|
|
restart: unless-stopped
|
||
|
|
devices:
|
||
|
|
- /dev/kfd:/dev/kfd
|
||
|
|
- /dev/dri:/dev/dri
|
||
|
|
cap_add:
|
||
|
|
- SYS_PTRACE
|
||
|
|
security_opt:
|
||
|
|
- seccomp=unconfined
|
||
|
|
# Numeric GIDs of host's video (44) and render (991) groups — names
|
||
|
|
# don't exist inside the container.
|
||
|
|
group_add:
|
||
|
|
- "44"
|
||
|
|
- "991"
|
||
|
|
shm_size: 16g
|
||
|
|
ipc: host
|
||
|
|
volumes:
|
||
|
|
- /models:/models:ro
|
||
|
|
ports:
|
||
|
|
- "8000:8000"
|
||
|
|
command:
|
||
|
|
- --model
|
||
|
|
- /models/REPLACE/ME
|
||
|
|
- --host
|
||
|
|
- 0.0.0.0
|
||
|
|
- --port
|
||
|
|
- "8000"
|