Initial commit: localgenai stack
Containerized local LLM stack for the Framework Desktop / Strix Halo,
plus the OpenCode harness on the Mac side.
- pyinfra/framework/: pyinfra deploy targeting the box
- llama.cpp (Vulkan), vLLM (ROCm), Ollama (ROCm with HSA override
for gfx1151), OpenWebUI
- Beszel (host + container + AMD GPU dashboard via sysfs)
- OpenLIT (LLM fleet metrics)
- Phoenix (per-trace agent waterfall)
- OpenHands (autonomous agent in a Docker sandbox)
- opencode/: OpenCode config + Phoenix bridge plugin (OTel exporter)
- install.sh deploys to ~/.config/opencode/
- StrixHaloSetup.md / StrixHaloMemory.md / Roadmap.md / TODO.md:
documentation and planning
- testing/qwen3-coder-30b/: small evaluation harness
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
36
pyinfra/framework/compose/vllm.yml
Normal file
36
pyinfra/framework/compose/vllm.yml
Normal file
@@ -0,0 +1,36 @@
|
||||
# vLLM, ROCm backend.
|
||||
#
|
||||
# NOTE: vLLM's official ROCm support targets datacenter cards (MI300X /
|
||||
# gfx942). Strix Halo is gfx1151 — support varies by image tag and
|
||||
# release. If `rocm/vllm:latest` doesn't run on this iGPU, try
|
||||
# `rocm/vllm-dev:nightly` or build from source against ROCm 7.x.
|
||||
services:
|
||||
vllm:
|
||||
image: rocm/vllm:latest
|
||||
container_name: vllm
|
||||
restart: unless-stopped
|
||||
devices:
|
||||
- /dev/kfd:/dev/kfd
|
||||
- /dev/dri:/dev/dri
|
||||
cap_add:
|
||||
- SYS_PTRACE
|
||||
security_opt:
|
||||
- seccomp=unconfined
|
||||
# Numeric GIDs of host's video (44) and render (991) groups — names
|
||||
# don't exist inside the container.
|
||||
group_add:
|
||||
- "44"
|
||||
- "991"
|
||||
shm_size: 16g
|
||||
ipc: host
|
||||
volumes:
|
||||
- /models:/models:ro
|
||||
ports:
|
||||
- "8000:8000"
|
||||
command:
|
||||
- --model
|
||||
- /models/REPLACE/ME
|
||||
- --host
|
||||
- 0.0.0.0
|
||||
- --port
|
||||
- "8000"
|
||||
Reference in New Issue
Block a user