Initial commit: localgenai stack

Containerized local LLM stack for the Framework Desktop / Strix Halo, plus the OpenCode harness on the Mac side. - pyinfra/framework/: pyinfra deploy targeting the box - llama.cpp (Vulkan), vLLM (ROCm), Ollama (ROCm with HSA override for gfx1151), OpenWebUI - Beszel (host + container + AMD GPU dashboard via sysfs) - OpenLIT (LLM fleet metrics) - Phoenix (per-trace agent waterfall) - OpenHands (autonomous agent in a Docker sandbox) - opencode/: OpenCode config + Phoenix bridge plugin (OTel exporter) - install.sh deploys to ~/.config/opencode/ - StrixHaloSetup.md / StrixHaloMemory.md / Roadmap.md / TODO.md: documentation and planning - testing/qwen3-coder-30b/: small evaluation harness Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-08 11:35:10 -04:00
commit 2c4bfefa95
36 changed files with 5265 additions and 0 deletions
--- a/pyinfra/framework/compose/vllm.yml
+++ b/pyinfra/framework/compose/vllm.yml
@@ -0,0 +1,36 @@
+# vLLM, ROCm backend.
+#
+# NOTE: vLLM's official ROCm support targets datacenter cards (MI300X /
+# gfx942). Strix Halo is gfx1151 — support varies by image tag and
+# release. If `rocm/vllm:latest` doesn't run on this iGPU, try
+# `rocm/vllm-dev:nightly` or build from source against ROCm 7.x.
+services:
+  vllm:
+    image: rocm/vllm:latest
+    container_name: vllm
+    restart: unless-stopped
+    devices:
+      - /dev/kfd:/dev/kfd
+      - /dev/dri:/dev/dri
+    cap_add:
+      - SYS_PTRACE
+    security_opt:
+      - seccomp=unconfined
+    # Numeric GIDs of host's video (44) and render (991) groups — names
+    # don't exist inside the container.
+    group_add:
+      - "44"
+      - "991"
+    shm_size: 16g
+    ipc: host
+    volumes:
+      - /models:/models:ro
+    ports:
+      - "8000:8000"
+    command:
+      - --model
+      - /models/REPLACE/ME
+      - --host
+      - 0.0.0.0
+      - --port
+      - "8000"