# llama.cpp server, Vulkan backend (RADV on Strix Halo). # Edit the --model path before `docker compose up -d`. services: llama: image: ghcr.io/ggml-org/llama.cpp:server-vulkan container_name: llama restart: unless-stopped devices: - /dev/dri:/dev/dri volumes: - /models:/models:ro ports: - "8080:8080" command: - --model - /models/REPLACE/ME/model.gguf - --host - 0.0.0.0 - --port - "8080" - --n-gpu-layers - "999" - --ctx-size - "32768"