folder-per-station

2026-05-07 07:37:40 -04:00
parent f27b96a68e
commit 7b594c71b1
8 changed files with 496 additions and 65 deletions
--- a/framework/compose/llama.yml
+++ b/framework/compose/llama.yml
@@ -0,0 +1,24 @@
+# llama.cpp server, Vulkan backend (RADV on Strix Halo).
+# Edit the --model path before `docker compose up -d`.
+services:
+  llama:
+    image: ghcr.io/ggml-org/llama.cpp:server-vulkan
+    container_name: llama
+    restart: unless-stopped
+    devices:
+      - /dev/dri:/dev/dri
+    volumes:
+      - /models:/models:ro
+    ports:
+      - "8080:8080"
+    command:
+      - --model
+      - /models/REPLACE/ME/model.gguf
+      - --host
+      - 0.0.0.0
+      - --port
+      - "8080"
+      - --n-gpu-layers
+      - "999"
+      - --ctx-size
+      - "32768"
--- a/framework/compose/ollama.yml
+++ b/framework/compose/ollama.yml
@@ -0,0 +1,27 @@
+# Ollama, ROCm backend. Serves models on demand — safe to start before
+# you've put anything in /models.
+#
+# Storage: Ollama's content-addressed blob store is bind-mounted under
+# /models/ollama so all model data on the host lives under /models.
+# Note: Ollama's blobs are SHA256-named, not raw GGUFs — llama.cpp/vLLM
+# can't load them directly. Keep curated GGUFs at /models/<vendor>/...
+# for those engines.
+services:
+  ollama:
+    image: ollama/ollama:rocm
+    container_name: ollama
+    restart: unless-stopped
+    devices:
+      - /dev/kfd:/dev/kfd
+      - /dev/dri:/dev/dri
+    # Numeric GIDs of host's video (44) and render (991) groups — names
+    # don't exist inside the container, but the GIDs need to match the
+    # host so /dev/kfd + /dev/dri are accessible.
+    group_add:
+      - "44"
+      - "991"
+    volumes:
+      - /models/ollama:/root/.ollama
+      - /models:/models:ro
+    ports:
+      - "11434:11434"
--- a/framework/compose/vllm.yml
+++ b/framework/compose/vllm.yml
@@ -0,0 +1,36 @@
+# vLLM, ROCm backend.
+#
+# NOTE: vLLM's official ROCm support targets datacenter cards (MI300X /
+# gfx942). Strix Halo is gfx1151 — support varies by image tag and
+# release. If `rocm/vllm:latest` doesn't run on this iGPU, try
+# `rocm/vllm-dev:nightly` or build from source against ROCm 7.x.
+services:
+  vllm:
+    image: rocm/vllm:latest
+    container_name: vllm
+    restart: unless-stopped
+    devices:
+      - /dev/kfd:/dev/kfd
+      - /dev/dri:/dev/dri
+    cap_add:
+      - SYS_PTRACE
+    security_opt:
+      - seccomp=unconfined
+    # Numeric GIDs of host's video (44) and render (991) groups — names
+    # don't exist inside the container.
+    group_add:
+      - "44"
+      - "991"
+    shm_size: 16g
+    ipc: host
+    volumes:
+      - /models:/models:ro
+    ports:
+      - "8000:8000"
+    command:
+      - --model
+      - /models/REPLACE/ME
+      - --host
+      - 0.0.0.0
+      - --port
+      - "8000"