Document current coding-workflow stack state

Snapshot of where opencode + Qwen3-Coder + MCPs + Kimi-Linear + voice
  + Phoenix tracing land today, plus in-flight (oc-tree, kimi-linear
  context ramp) and next (ComfyUI) items with pointers to per-project
  NEXT_STEPS.md guides.
This commit is contained in:
2026-05-10 21:14:43 -04:00
parent 228fe8d1ac
commit a29793032d
35 changed files with 2067 additions and 37 deletions

View File

@@ -343,18 +343,23 @@ server.user(
_sudo=True,
)
# Kernel cmdline tuning per Gygeek/Framework-strix-halo-llm-setup:
# - amd_iommu=off — ~6 % memory-read improvement on Strix Halo
# - amdgpu.gttsize=117760 — ~115 GB GTT ceiling so the GPU can borrow
# most of system RAM dynamically. Acts as a
# ceiling, not an allocation. See ../../StrixHaloMemory.md
# for the UMA-vs-GTT trade-off discussion.
# Kernel cmdline tuning. The Strix Halo unified-memory recipe (kyuz0
# vllm-toolboxes "Kernel Parameters and Unified Memory" + Framework's
# "Linux + ROCm: January 2026 Stable Configurations" thread):
# - amd_iommu=off — ~6 % memory-read improvement
# - amdgpu.gttsize=131072 — 128 GiB GTT ceiling (deprecated knob
# but still honored on kernel 6.16+)
# - ttm.pages_limit=33554432 — 128 GiB in 4 KiB pages; forward-
# compatible TTM page cap
# Combined with BIOS UMA at 0.5 GB and HSA_FORCE_FINE_GRAIN_PCIE=1 in the
# container, PyTorch's HIP allocator merges the two rocminfo pools into a
# single ~110 GB arena. See ../../StrixHaloMemory.md for context.
# Requires a reboot to take effect; pyinfra leaves that to you.
files.line(
name="GRUB cmdline (amd_iommu, gttsize)",
name="GRUB cmdline (amd_iommu, gttsize, ttm)",
path="/etc/default/grub",
line=r"^GRUB_CMDLINE_LINUX_DEFAULT=.*",
replace='GRUB_CMDLINE_LINUX_DEFAULT="quiet splash amd_iommu=off amdgpu.gttsize=117760"',
replace='GRUB_CMDLINE_LINUX_DEFAULT="quiet splash amd_iommu=off amdgpu.gttsize=131072 ttm.pages_limit=33554432"',
_sudo=True,
)
server.shell(
@@ -418,6 +423,7 @@ for svc in (
"llama",
"vllm",
"ollama",
"kimi-linear",
"openwebui",
"beszel",
"openlit",
@@ -559,6 +565,27 @@ for cfg in (
_sudo=True,
)
# Kimi-Linear container assets (build script, smoke test, operator doc).
# The compose file itself is copied by the for-loop above; the rest of
# the build context lives under compose/kimi-linear/ on the source side
# and at /srv/docker/kimi-linear/ on the box. Source is the source of
# truth — pyinfra overwrites drift.
for asset, mode in (
("Dockerfile", "0664"),
("build.sh", "0775"),
("smoke.sh", "0775"),
("patch-tokenizer.sh", "0775"),
("README.md", "0664"),
):
files.put(
name=f"kimi-linear: {asset}",
src=f"compose/kimi-linear/{asset}",
dest=f"{COMPOSE_DIR}/kimi-linear/{asset}",
group="docker",
mode=mode,
_sudo=True,
)
# Voice stack — Wyoming-protocol Whisper (STT) and Piper (TTS). Models
# are downloaded on first start; bind-mounting these dirs survives
# container recreation.