From a29793032dea88bb952358ecb8281a46a6b29be7 Mon Sep 17 00:00:00 2001
From: noisedestroyers <noisedestroyers@gmail.com>
Date: Sun, 10 May 2026 21:14:43 -0400
Subject: [PATCH] Document current coding-workflow stack state

  Snapshot of where opencode + Qwen3-Coder + MCPs + Kimi-Linear + voice
  + Phoenix tracing land today, plus in-flight (oc-tree, kimi-linear
  context ramp) and next (ComfyUI) items with pointers to per-project
  NEXT_STEPS.md guides.
---
 README.md                                     |  17 ++
 Roadmap.md                                    |  39 ++++
 .../.dmux-hooks/README.md                     |  38 ++++
 gitea-opencode-agent-project/README.md        |  64 ++++++
 gitea-opencode-agent-project/agents/README.md |  57 +++++
 .../agents/deployment-agent.sh                |  26 +++
 .../agents/documentation-agent.sh             |  26 +++
 .../agents/monitoring-agent.sh                |  26 +++
 .../agents/scripting-agent.sh                 |  26 +++
 kimi-linear/NEXT_STEPS.md                     | 146 ++++++++++++
 oc-tree/.python-version                       |   1 +
 oc-tree/NEXT_STEPS.md                         | 120 ++++++++++
 oc-tree/README.md                             |  72 ++++++
 oc-tree/pyproject.toml                        |  21 ++
 oc-tree/src/oc_tree/__init__.py               |   0
 oc-tree/src/oc_tree/__main__.py               |  20 ++
 oc-tree/src/oc_tree/client.py                 |  93 ++++++++
 oc-tree/src/oc_tree/probe.py                  | 100 +++++++++
 oc-tree/src/oc_tree/widgets/__init__.py       |   0
 oc-tree/uv.lock                               | 212 ++++++++++++++++++
 opencode/README.md                            | 121 +++++++++-
 opencode/install.sh                           | 141 ++++++++++--
 opencode/opencode.json                        |  58 ++++-
 opencode/serena-ide-trim.yml                  |  92 ++++++++
 pyinfra/framework/compose/kimi-linear.yml     | 112 +++++++++
 .../framework/compose/kimi-linear/Dockerfile  |  35 +++
 .../framework/compose/kimi-linear/README.md   | 124 ++++++++++
 .../framework/compose/kimi-linear/build.sh    |  51 +++++
 .../compose/kimi-linear/patch-tokenizer.sh    |  66 ++++++
 .../framework/compose/kimi-linear/smoke.sh    |  24 ++
 pyinfra/framework/compose/openwebui.yml       |   5 +
 pyinfra/framework/deploy.py                   |  43 +++-
 pyinfra/framework/run.sh                      |   2 +-
 qwen-large-codebase-roadmap.md                |  77 +++++++
 vscode-continue-config.yml                    |  49 ++++
 35 files changed, 2067 insertions(+), 37 deletions(-)
 create mode 100644 gitea-opencode-agent-project/.dmux-hooks/README.md
 create mode 100644 gitea-opencode-agent-project/README.md
 create mode 100644 gitea-opencode-agent-project/agents/README.md
 create mode 100644 gitea-opencode-agent-project/agents/deployment-agent.sh
 create mode 100644 gitea-opencode-agent-project/agents/documentation-agent.sh
 create mode 100644 gitea-opencode-agent-project/agents/monitoring-agent.sh
 create mode 100644 gitea-opencode-agent-project/agents/scripting-agent.sh
 create mode 100644 kimi-linear/NEXT_STEPS.md
 create mode 100644 oc-tree/.python-version
 create mode 100644 oc-tree/NEXT_STEPS.md
 create mode 100644 oc-tree/README.md
 create mode 100644 oc-tree/pyproject.toml
 create mode 100644 oc-tree/src/oc_tree/__init__.py
 create mode 100644 oc-tree/src/oc_tree/__main__.py
 create mode 100644 oc-tree/src/oc_tree/client.py
 create mode 100644 oc-tree/src/oc_tree/probe.py
 create mode 100644 oc-tree/src/oc_tree/widgets/__init__.py
 create mode 100644 oc-tree/uv.lock
 create mode 100644 opencode/serena-ide-trim.yml
 create mode 100644 pyinfra/framework/compose/kimi-linear.yml
 create mode 100644 pyinfra/framework/compose/kimi-linear/Dockerfile
 create mode 100644 pyinfra/framework/compose/kimi-linear/README.md
 create mode 100755 pyinfra/framework/compose/kimi-linear/build.sh
 create mode 100755 pyinfra/framework/compose/kimi-linear/patch-tokenizer.sh
 create mode 100755 pyinfra/framework/compose/kimi-linear/smoke.sh
 create mode 100644 qwen-large-codebase-roadmap.md
 create mode 100644 vscode-continue-config.yml

diff --git a/README.md b/README.md
index 18771b4..5cf53ea 100644
--- a/README.md
+++ b/README.md
@@ -68,6 +68,23 @@ opencode
 
 Send a prompt; watch it land in Phoenix at <http://framework:6006>.
 
+## Coding workflows — state of the stack (2026-05-10)
+
+| Component | Status | Notes |
+|---|---|---|
+| **Primary**: opencode → `framework/qwen3-coder:30b` (Ollama) | Working daily-driver | Full MCP toolbox (playwright, searxng, serena, basic-memory, sequential-thinking, task-master). Phoenix bridge emits OTel traces for every call. |
+| **Secondary**: opencode → `framework-vllm/kimi-linear` (vLLM) | Configured, experimental | `tool_call: false` — Kimi-Linear is a research model and isn't strongly tool-trained. Long-context chat only. Switch via `/model framework-vllm/kimi-linear`. |
+| **Chat front-end for Kimi-Linear**: OpenWebUI | Working | vLLM endpoint wired via `OPENAI_API_BASE_URLS`. Pick `kimi-linear` from the model selector in OpenWebUI. |
+| **Voice loop**: faster-whisper + Kokoro | Deployed | OpenAI-compatible endpoints; not yet wired into a hands-free chat loop. |
+| **Observability**: Phoenix per-trace + OpenLIT fleet | Working | All opencode prompts traced via `.opencode/plugin/phoenix-bridge.js`. |
+| **In flight**: oc-tree TUI sidecar | M0 skeleton done, awaiting probe | Live tree view of opencode's SSE event stream; tmux-side companion to Phoenix's post-hoc traces. See [`oc-tree/NEXT_STEPS.md`](oc-tree/NEXT_STEPS.md). |
+| **In flight**: Kimi-Linear context ramp | P0 done at 32K, BIOS+GTT recipe applied | Next: ramp `--max-model-len` toward 1M and benchmark. See [`kimi-linear/NEXT_STEPS.md`](kimi-linear/NEXT_STEPS.md). |
+| **Configured but not deployed**: VS Code Continue | YAML ready | Drop [`vscode-continue-config.yml`](vscode-continue-config.yml) into `~/.continue/config.yaml`; `ollama pull nomic-embed-text` on the box for `@codebase` indexing. |
+| **Planned next**: ComfyUI for image generation | Phase plan written | Flux.1-Dev via kyuz0 toolbox, same pyinfra pattern as kimi-linear. See task list (CF-P0..P4). |
+| **Roadmap-only** (not started): Aider, Cline, OpenHands, LiteLLM, Multica | — | See `Roadmap.md`. |
+
+**Single-line summary**: opencode + Qwen3-Coder + the MCP toolbox is the working coding harness; Kimi-Linear is alongside for long-context chat; oc-tree and ComfyUI are the next two infra additions.
+
 ## Why this stack
 
 - **Bandwidth, not VRAM, is the ceiling on Strix Halo.** 256 GB/s memory
diff --git a/Roadmap.md b/Roadmap.md
index 27d3bdd..5ad82ea 100644
--- a/Roadmap.md
+++ b/Roadmap.md
@@ -169,6 +169,38 @@ Already wired in `localgenai/opencode/`:
 - **[mcp-server-fetch](https://github.com/modelcontextprotocol/servers/tree/main/src/fetch)** —
   not yet wired; useful if Playwright feels heavy for plain page reads.
 
+**Next: cited-search variant.** Fork `mcp-searxng` (or wrap it) to expose
+a `search_cited` tool that auto-fetches the top 3-5 results, extracts
+readable text via mozilla/readability or trafilatura, and returns
+numbered `[n] URL\n<excerpt>` blocks with a tool description telling the
+model to cite as `[n]`. Closes the citation-quality gap with
+[opencode-websearch-cited](https://github.com/ghoulr/opencode-websearch-cited)
+(which delegates to Gemini/OpenAI/OpenRouter) while staying fully local.
+Tradeoff: 3-5 HTTP fetches per query → slower; needs timeout + graceful
+skip for sites that block scrapers.
+
+### Observability — partial
+
+- **[Arize Phoenix](https://github.com/Arize-ai/phoenix)** wired via
+  `opencode/.opencode/plugin/phoenix-bridge.js`: OpenCode's experimental
+  OTel spans go to Phoenix at `framework:6006` with the OpenInference
+  span processor for LLM/tool-aware attributes. Subagents nest under the
+  parent session as a unified trace tree. **External** trace viewing is
+  solved.
+- **Gap: in-harness visibility.** OpenCode's TUI has no sidebar plugin
+  API ([sst/opencode#5971](https://github.com/sst/opencode/issues/5971),
+  open) and no built-in tree/timeline view. Plugins can only show
+  toasts. While driving the agent, there's no live "what's it doing
+  right now" pane — you have to alt-tab to Phoenix.
+- **Next: SSE sidecar.** `opencode serve` exposes documented SSE streams
+  at `/global/event` and `/session/{id}/event` carrying every tool call,
+  message part, and child-session creation. A small TUI sidecar
+  (Bubbletea/textual, ~200 lines) running in a tmux pane next to
+  opencode can subscribe and render a live tree:
+  `session → tool call → child session → tool call`. Subagents *are*
+  child sessions in the data model, so the hierarchy comes for free.
+  Phoenix stays the deep-trace store; sidecar is the live pane.
+
 ### Persistent memory
 
 OpenCode's memory is per-session unless you wire MCP for it.
@@ -242,6 +274,13 @@ In rough order of impact-per-hour-spent:
     OpenCode's per-task UX feels too hands-on.
 11. **OpenWebUI for household-shared chat access.** Adds value for
     non-coding LLM use, fills a small gap in the multi-user story.
+12. **SSE sidecar for in-harness visibility.** Live tree pane subscribed
+    to opencode's `/session/{id}/event` SSE; closes the "what's it
+    doing right now" gap that Phoenix doesn't fill (Phoenix is for
+    after-the-fact deep traces, not glanceable status).
+13. **Cited-search MCP.** Fork `mcp-searxng` to add `search_cited` with
+    auto-fetch + readability extraction. Smaller-impact than the
+    sidecar but cheap and keeps everything local.
 
 ## Reference catalogs to monitor
 
diff --git a/gitea-opencode-agent-project/.dmux-hooks/README.md b/gitea-opencode-agent-project/.dmux-hooks/README.md
new file mode 100644
index 0000000..12fdd84
--- /dev/null
+++ b/gitea-opencode-agent-project/.dmux-hooks/README.md
@@ -0,0 +1,38 @@
+# Agent Orchestration Hooks
+
+This directory contains dmux hooks that define how agents work in the OpenCode environment.
+
+## Available Hooks
+
+- `worktree_created` - Triggered when a new worktree is created
+- `run_test` - For running tests
+- `run_dev` - For development tasks
+- `post_merge` - After merging changes
+
+## Hook Usage
+
+Each hook should be executable and contain logic for the specific agent role:
+
+```bash
+#!/bin/bash
+# Example hook for documentation agent
+# .dmux-hooks/worktree_created
+
+# Set environment for qwen3-coder
+export DMUX_AGENT="opencode"
+export QWEN_MODEL="qwen3-coder:30b"
+export AGENT_CONTEXT="homelab-documentation"
+
+# Run qwen3-coder documentation tasks
+echo "Initializing homelab documentation agent..."
+qwen3-coder --generate-docs \
+  --model "qwen3-coder:30b" \
+  --output "$DMUX_WORKTREE_PATH/docs/homelab.md"
+```
+
+## Agent Roles
+
+- `documentation-agent.sh` - Generates documentation using qwen3-coder
+- `scripting-agent.sh` - Creates automation scripts
+- `deployment-agent.sh` - Manages deployments
+- `monitoring-agent.sh` - Tracks system performance
\ No newline at end of file
diff --git a/gitea-opencode-agent-project/README.md b/gitea-opencode-agent-project/README.md
new file mode 100644
index 0000000..6a7598d
--- /dev/null
+++ b/gitea-opencode-agent-project/README.md
@@ -0,0 +1,64 @@
+# Gitea OpenCode Agent Project
+
+This is a default project structure for an agentic OpenCode development environment using the qwen3-coder model (30b).
+
+## Project Structure
+
+```
+gitea-opencode-agent-project/
+├── docs/                     # Documentation
+├── agents/                   # Agent definition files
+├── scripts/                  # Automation scripts
+├── config/                   # Configuration files
+├── src/                      # Source code
+├── README.md                 # Project documentation
+├── AGENTS.md                 # OpenCode agent definitions
+└── .gitignore                # Git ignore patterns
+```
+
+## Agent Definitions
+
+Agent files are stored in the `agents/` directory with the following naming convention:
+- `agent-<role>.sh` for bash agents
+- `agent-<role>.py` for Python agents
+- `agent-<role>.json` for configuration files
+
+## Documentation
+
+The `docs/` directory contains the project documentation, including:
+- Agent documentation
+- Implementation guides
+- Usage examples
+- API documentation
+
+## Setup Instructions
+
+1. Initialize the project with `init`
+2. Configure the qwen3-coder model
+3. Deploy agents using the standard OpenCode Task Master system
+4. Run the environment with `docker compose up -d`
+
+## Configuration
+
+Configuration files are stored in `config/` and include:
+- Agent parameters
+- Environment settings
+- Model specifications
+- Deployment configurations
+
+## OpenCode Integration
+
+This project is designed to work with OpenCode's agent system directly without requiring dmux. The agents can be invoked as specialized tools using OpenCode's task master functionality:
+
+```bash
+# Example OpenCode task master commands
+/opencode agent documentation-agent
+/opencode agent scripting-agent  
+/opencode agent deployment-agent
+```
+
+The qwen3-coder (30b) model integration is handled through OpenCode's provider configuration, allowing each agent to leverage the advanced reasoning capabilities of the model for:
+- Complex problem identification in infrastructure documentation
+- Multi-step configuration automation
+- Intelligent deployment strategy recommendations
+- Analysis of complex dependency chains in network diagrams
\ No newline at end of file
diff --git a/gitea-opencode-agent-project/agents/README.md b/gitea-opencode-agent-project/agents/README.md
new file mode 100644
index 0000000..b71cb46
--- /dev/null
+++ b/gitea-opencode-agent-project/agents/README.md
@@ -0,0 +1,57 @@
+# Agent Orchestration
+
+This project uses OpenCode's Task Master system for agent orchestration instead of dmux.
+
+## Available Agents
+
+- `documentation-agent.sh` - Generates documentation using qwen3-coder
+- `scripting-agent.sh` - Creates automation scripts and configurations  
+- `deployment-agent.sh` - Manages deployments and optimizations
+- `monitoring-agent.sh` - Tracks system health and performance
+
+## Usage with OpenCode
+
+Agents can be invoked using OpenCode's Task Master system:
+
+```bash
+# Initialize the project
+/init
+
+# Run specific agents
+/opencode agent documentation-agent
+/opencode agent scripting-agent
+/opencode agent deployment-agent
+
+# Run all agents
+/opencode agents all
+```
+
+## Integration with qwen3-coder (30b)
+
+Each agent leverages the qwen3-coder model's advanced reasoning capabilities:
+- Complex problem identification in infrastructure documentation
+- Multi-step configuration automation
+- Intelligent deployment strategy recommendations
+- Analysis of complex dependency chains in network diagrams
+
+## Agent Responsibilities
+
+### Documentation Agent
+- Generates documentation from code analysis
+- Creates standardized documentation templates
+- Updates documentation based on infrastructure changes
+
+### Scripting Agent
+- Creates Docker Compose configurations
+- Generates setup scripts
+- Writes automation routines
+
+### Deployment Agent
+- Analyzes deployment strategies
+- Optimizes container configurations
+- Manages multi-service deployments
+
+### Monitoring Agent
+- Monitors system performance
+- Analyzes infrastructure health
+- Detects potential issues
\ No newline at end of file
diff --git a/gitea-opencode-agent-project/agents/deployment-agent.sh b/gitea-opencode-agent-project/agents/deployment-agent.sh
new file mode 100644
index 0000000..bbfc8bf
--- /dev/null
+++ b/gitea-opencode-agent-project/agents/deployment-agent.sh
@@ -0,0 +1,26 @@
+# Deployment Agent
+
+## Overview
+This agent specializes in managing deployments and configurations using the qwen3-coder model (30b) for the homelab infrastructure project.
+
+## Capabilities
+- Analyzes deployment strategies
+- Optimizes container configurations
+- Manages multi-service deployments
+- Provides intelligent deployment recommendations
+
+## Usage
+```bash
+# Optimize deployment with qwen3-coder
+qwen3-coder --optimize-deployment \
+  --model "qwen3-coder:30b" \
+  --config "docker-compose.yml" \
+  --target "homelab-system"
+```
+
+## Integration
+This agent integrates with:
+- Docker Compose configurations
+- CI/CD pipelines
+- Network documentation
+- Security configurations
\ No newline at end of file
diff --git a/gitea-opencode-agent-project/agents/documentation-agent.sh b/gitea-opencode-agent-project/agents/documentation-agent.sh
new file mode 100644
index 0000000..ae857f2
--- /dev/null
+++ b/gitea-opencode-agent-project/agents/documentation-agent.sh
@@ -0,0 +1,26 @@
+# Documentation Agent
+
+## Overview
+This agent specializes in generating and maintaining documentation using the qwen3-coder model (30b) for the homelab infrastructure project.
+
+## Capabilities
+- Generates documentation from code analysis
+- Creates standardized documentation templates
+- Updates documentation based on infrastructure changes
+- Analyzes documentation gaps and suggests improvements
+
+## Usage
+```bash
+# Generate documentation with qwen3-coder
+qwen3-coder --generate-docs \
+  --model "qwen3-coder:30b" \
+  --project "homelab-infrastructure" \
+  --output "docs/homelab.md"
+```
+
+## Integration
+This agent integrates with:
+- Code repositories
+- Network diagrams
+- Configuration files
+- Deployment scripts
\ No newline at end of file
diff --git a/gitea-opencode-agent-project/agents/monitoring-agent.sh b/gitea-opencode-agent-project/agents/monitoring-agent.sh
new file mode 100644
index 0000000..99f1f66
--- /dev/null
+++ b/gitea-opencode-agent-project/agents/monitoring-agent.sh
@@ -0,0 +1,26 @@
+# Monitoring Agent
+
+## Overview
+This agent specializes in monitoring system health and performance using the qwen3-coder model (30b) for the homelab infrastructure project.
+
+## Capabilities
+- Monitors system performance
+- Analyzes infrastructure health
+- Detects potential issues
+- Provides optimization suggestions
+
+## Usage
+```bash
+# Monitor system with qwen3-coder
+qwen3-coder --monitor-system \
+  --model "qwen3-coder:30b" \
+  --target "homelab-network" \
+  --output "reports/health.md"
+```
+
+## Integration
+This agent integrates with:
+- System logs
+- Performance metrics
+- Configuration files
+- Documentation updates
\ No newline at end of file
diff --git a/gitea-opencode-agent-project/agents/scripting-agent.sh b/gitea-opencode-agent-project/agents/scripting-agent.sh
new file mode 100644
index 0000000..be6bb88
--- /dev/null
+++ b/gitea-opencode-agent-project/agents/scripting-agent.sh
@@ -0,0 +1,26 @@
+# Scripting Agent
+
+## Overview
+This agent specializes in generating automation scripts and configurations using the qwen3-coder model (30b) for the homelab infrastructure project.
+
+## Capabilities
+- Creates Docker Compose configurations
+- Generates setup scripts
+- Writes automation routines
+- Creates deployment manifests
+
+## Usage
+```bash
+# Generate automation script with qwen3-coder
+qwen3-coder --generate-script \
+  --model "qwen3-coder:30b" \
+  --task "docker-compose generation" \
+  --output "docker-compose.yml"
+```
+
+## Integration
+This agent integrates with:
+- Docker Compose templates
+- Configuration files
+- Deployment workflows
+- Network documentation
\ No newline at end of file
diff --git a/kimi-linear/NEXT_STEPS.md b/kimi-linear/NEXT_STEPS.md
new file mode 100644
index 0000000..66389fa
--- /dev/null
+++ b/kimi-linear/NEXT_STEPS.md
@@ -0,0 +1,146 @@
+# kimi-linear — resumption guide
+
+Open this first when picking the work back up.
+
+## What this project is
+
+Kimi-Linear-48B-A3B-Instruct on the Strix Halo box via vLLM, ROCm/TheRock 7.x,
+gfx1151. Sits beside Ollama+Qwen3-Coder. Goal: long-context (256K-1M)
+local inference using the model architecturally best-suited to the box's
+unified-memory shape.
+
+Roadmap entry: `localgenai/Roadmap.md` → "Layer 0: Inference + tools"
+(to be added after BIOS unblock).
+
+Container artifacts: `pyinfra/framework/compose/kimi-linear.yml` +
+`pyinfra/framework/compose/kimi-linear/` (Dockerfile, build.sh,
+patch-tokenizer.sh, smoke.sh, README.md). Deploy push:
+`cd pyinfra/framework && ./run.sh`.
+
+## Where we are (2026-05-10)
+
+**P0 — DONE, constrained.** First-ever locally-served Kimi-Linear
+generation on a Strix Halo iGPU. Smoke test passes:
+`/v1/models` returns `kimi-linear`; tiny generation returns "ok".
+
+Current runtime cap: `--max-model-len 4096`, `--max-num-seqs 1`,
+`--num-gpu-blocks-override 32`. The long-context point of the model is
+locked behind a **VRAM ceiling** — see "What blocks progress" below.
+
+**P1-P4 — pending.**
+
+## The gauntlet of fixes that got us here (don't re-derive)
+
+All of these are baked into the repo. Reproducing P0 from a clean box is
+push the repo + run the steps in `compose/kimi-linear/README.md`.
+
+1. **Image entrypoint missing.** kyuz0 toolboxes drop into a shell;
+   compose's `command:` gets exec'd as the program. Fix: explicit
+   `entrypoint: ["vllm", "serve"]` in the compose file, model path as
+   positional first arg.
+
+2. **Tokenizer ImportError.** `tokenization_kimi.py` imports
+   `bytes_to_unicode` from a transformers internal that's been removed.
+   Fix: `patch-tokenizer.sh` inlines the function. Idempotent.
+
+3. **Missing AITER gfx1151 GEMM configs.** kyuz0 image is built for
+   gfx1151 but doesn't ship the AITER autotuning JSONs for every op
+   Kimi's MLA layers hit (validated against Qwen/MiniMax, not
+   MLA-heavy models). Fix: derived `Dockerfile` copies gfx1100 (RDNA3)
+   configs into gfx1151-named slots — kernels compile + run, tile
+   sizes not optimal but functional.
+
+4. **MLA AITER FP8 BMM tries to materialize a 30 GB intermediate.**
+   On top of resident weights, that's ~58 GB needed and we have 31 GB.
+   Fix: `VLLM_ROCM_USE_AITER_MLA=0` — bypasses AITER for just the MLA
+   path, keeps it for everything else.
+
+5. **`HSA_XNACK=1` is a trap with vLLM.** It enables HIP demand-paging
+   into GTT (115 GB ceiling per kernel cmdline `amdgpu.gttsize=117760`)
+   so vLLM computes "Available KV cache memory: 73.6 GiB", but
+   PyTorch's actual allocator stays capped at the GPU pool (~31 GB).
+   vLLM then OOMs trying to allocate the budget it computed. Fix: turn
+   XNACK *off*, live within the 31 GB pool until BIOS UMA gives a
+   single bigger pool.
+
+6. **`--swap-space` was removed in modern vLLM.** Don't pass it.
+
+7. **`--num-gpu-blocks-override 32`** as belt-and-braces against
+   vLLM's KV pool auto-discovery picking a too-big number even without
+   XNACK.
+
+## What blocks long context
+
+PyTorch's discoverable VRAM equals the **BIOS UMA Frame Buffer Size**,
+not `amdgpu.gttsize`. The kernel cmdline is necessary but insufficient.
+
+- 128 GB physical → 64 GB UMA → ~62 GB visible to Linux.
+- `rocminfo` reports two ~31 GB GPU pools (Pool 1 coarse / Pool 2 fine).
+- PyTorch's allocator only uses one pool (~31 GB) → OOMs at ~30 GB
+  Kimi weights with little KV headroom.
+- User has previously found Framework's BIOS caps UMA at 64 GB.
+
+**Research outcome (2026-05-10):** the right unblock isn't a higher BIOS
+UMA cap — it's the inverse. Set UMA *small* and merge the two pools.
+
+| Layer | Setting |
+| --- | --- |
+| BIOS | Update to **3.05 stable** (Apr 2026); set UMA Frame Buffer = **0.5 GB** or **8 GB** (counter-intuitive but documented — frees pages for GTT) |
+| Kernel | **≥ 6.16.9.** Earlier kernels cap ROCm visibility at 15.5 GB. pyinfra's `linux-generic-hwe-24.04` may be on 6.8/6.11 — verify with `uname -r` and upgrade if needed. |
+| Cmdline | `amd_iommu=off amdgpu.gttsize=131072 ttm.pages_limit=33554432` (`ttm.pages_limit` in 4 KiB pages = 128 GiB) |
+| Env | `HSA_XNACK=1` **+** `HSA_FORCE_FINE_GRAIN_PCIE=1` (the piece our earlier XNACK attempt was missing) |
+| Env | `PYTORCH_HIP_ALLOC_CONF="backend:native,expandable_segments:True,garbage_collection_threshold:0.9"` (HIP variant, not CUDA) |
+| PyTorch | TheRock gfx1151 wheels — kyuz0:stable image already uses these |
+
+Confirmed working in kyuz0/amd-strix-halo-vllm-toolboxes (DeepWiki:
+"Kernel Parameters and Unified Memory") and the Framework community
+"Linux + ROCm: January 2026 Stable Configurations Update" thread.
+Single-process allocation budget after this: ~110 GB.
+
+**The hard ceiling**, if all of the above doesn't yield enough: 96 GB
+direct UMA. AMD AGESA / StrixHaloPI limit, not Framework's. No path
+past 96 GB on signed firmware as of May 2026; 192 GB Strix Halo refresh
+is rumored but unreleased.
+
+**Fallback if the merge approach doesn't work for vLLM specifically**:
+llama.cpp ROCm + bartowski Q4_K_M GGUF. Different memory model, splits
+layers across CPU/GPU. Lower throughput, more flexible.
+
+## When you come back
+
+1. Read research output for BIOS UMA limits. If it landed in the chat,
+   it's the most recent note in this project's session. If not yet,
+   re-dispatch (prompt is in conversation history — Framework Desktop
+   May 2026 UMA Frame Buffer cap research).
+2. Decide path:
+   - **BIOS bump available** → flash, reboot, drop the constraints in
+     `compose/kimi-linear.yml` (max-model-len, num-gpu-blocks-override),
+     re-`./smoke.sh`, ramp context.
+   - **BIOS capped at 64 GB** → pivot to llama.cpp ROCm path or accept
+     4K context as the long-term reality.
+3. Then advance through P1 → P2 → P3 per the roadmap.
+
+## Files of record
+
+- `pyinfra/framework/compose/kimi-linear.yml` — service def, all the
+  flag/env tradeoffs documented inline.
+- `pyinfra/framework/compose/kimi-linear/` — Dockerfile + scripts.
+- `pyinfra/framework/deploy.py` — wired into the service loop +
+  asset-copy block.
+- `Roadmap.md` — strategy.
+- `StrixHaloMemory.md` — the UMA-vs-GTT discussion that needs a
+  follow-up paragraph from this work (PyTorch caps at UMA, XNACK is a
+  trap).
+
+## Decisions worth not relitigating
+
+- **vLLM 0.19.x via kyuz0:stable** chosen over source-build with
+  `v0.11.2` pin (build.sh exists as fallback). The recipe pin advice
+  was based on an earlier Moonshot doc; current upstream works.
+- **4-bit compressed-tensors** (cyankiwi) chosen over 8-bit. With the
+  31 GB ceiling, 8-bit wouldn't even fit weights resident.
+- **VLLM_ROCM_USE_AITER_MLA=0**, NOT `VLLM_MLA_DISABLE=1`. Granular
+  disable preserves AITER for non-MLA paths. The full disable is the
+  next escalation if needed.
+- **No upstream filings.** Findings stay in this repo per project
+  policy (memory: `feedback_private_findings`).
diff --git a/oc-tree/.python-version b/oc-tree/.python-version
new file mode 100644
index 0000000..2c07333
--- /dev/null
+++ b/oc-tree/.python-version
@@ -0,0 +1 @@
+3.11
diff --git a/oc-tree/NEXT_STEPS.md b/oc-tree/NEXT_STEPS.md
new file mode 100644
index 0000000..9a7c019
--- /dev/null
+++ b/oc-tree/NEXT_STEPS.md
@@ -0,0 +1,120 @@
+# oc-tree — resumption guide
+
+Open this file first when picking the work back up.
+
+## What this project is
+
+A Python TUI sidecar that subscribes to `opencode serve`'s SSE event
+stream and renders a live tree of sessions → messages → tool calls in a
+terminal pane next to the opencode TUI. Subagents nest under their
+parent via `session.created.info.parentID`.
+
+Roadmap entry: `localgenai/Roadmap.md` → "Layer 0: Cross-cutting
+capabilities" → "Observability — partial" + prioritized step #12.
+
+Phoenix (`opencode/.opencode/plugin/phoenix-bridge.js`) already handles
+the *external* deep-trace store. oc-tree is the glanceable "what's it
+doing right now" pane that lives in-harness (well, in tmux next to it).
+
+## Where we are
+
+- Plan agreed: Python + textual, lives at `localgenai/oc-tree/`.
+- Phases: M0 → M1 → M2 → M3 → M4. See descriptions in this repo's task
+  list (`TaskList`) or the roadmap entry.
+- **M0 — DONE** (skeleton). `uv sync` installs cleanly; `oc-tree` and
+  `oc-tree-probe` entry points resolve. Imports verified.
+- **M0 — AWAITING USER ACTION** (schema verification). Probe is built
+  but hasn't been run against a live `opencode serve`. Three open
+  schema questions still unanswered.
+
+## What blocks progress
+
+User needs to run the probe against a real opencode session and report
+back. Without these answers, M2's reducer design is guesswork.
+
+Run in a tmux pane while `opencode serve` is up:
+
+```sh
+cd ~/Documents/obsidian/localgenai/oc-tree
+uv run oc-tree-probe
+```
+
+Drive opencode through a session that hits **all three** triggers:
+- Spawn a Task-tool subagent
+- Trigger at least one permission prompt
+- Make at least one regular tool call (Read/Bash/etc.)
+
+Ctrl-C the probe. Then run:
+
+```sh
+# Q1: does session.created.info.parentID populate for subagents?
+jq -r 'select(.type=="session.created") | .raw.properties.info.parentID' \
+   /tmp/oc-tree-probe.jsonl
+
+# Q2: does message.part.updated carry full part or delta?
+jq -c 'select(.type=="message.part.updated") | .raw.properties.part' \
+   /tmp/oc-tree-probe.jsonl | head
+
+# Q3: what permission.* events actually fire?
+jq -r '.type' /tmp/oc-tree-probe.jsonl | grep -i permission | sort -u
+```
+
+Paste the output (or the JSONL file path) into the next session.
+
+## What happens next
+
+Once probe answers are in:
+
+1. Mark M0 complete, start **M1 (flat session list)** — textual app,
+   live-updating list of sessions with status, no nesting yet. Proves
+   the reducer + render loop. Independent of the schema answers, so
+   could start in parallel.
+2. **M2 (tree view)** — needs probe answers to know:
+   - Whether to nest by `parentID` directly (Q1 yes) or fall back to
+     inferring subagents from `Task` tool-part response payloads.
+   - Whether the part-update reducer replaces by `partID` (Q2 = full
+     part) or merges a delta (Q2 = delta).
+   - What permission events to render (Q3).
+3. **M3 (reconnect + state rebuild)** — heartbeat watchdog, REST replay
+   on disconnect. Driven by sst/opencode#15149/#22198 known leaks.
+4. **M4 (polish)** — keybindings, theme, tmux layout doc.
+
+## File layout
+
+```
+localgenai/oc-tree/
+├── pyproject.toml              uv project (textual, httpx, httpx-sse)
+├── README.md                   user-facing readme
+├── NEXT_STEPS.md               this file
+├── .python-version             3.11
+└── src/oc_tree/
+    ├── client.py               OpenCodeClient: REST + SSE
+    ├── probe.py                schema-verification CLI
+    ├── __main__.py             stub for `oc-tree` (real TUI in M1)
+    └── widgets/                empty (populated in M1+)
+```
+
+## Key references
+
+- opencode server docs: <https://opencode.ai/docs/server/>
+- Authoritative schema: `GET /doc` on a running `opencode serve` (do
+  not hardcode — fetch per-version).
+- sst/opencode#7451 — no per-session SSE endpoint; we filter `/event`
+  client-side.
+- sst/opencode#6573 — Task subagent over `opencode serve` may have
+  bugs; this is what Q1 verifies.
+- sst/opencode#11424 — `message.part.updated` sometimes replays full
+  state; this is what Q2 verifies.
+- sst/opencode#15149, #22198 — SSE disconnect leaks; informs M3
+  shutdown discipline.
+
+## Decisions worth not relitigating
+
+- **Python + textual** chosen over Go+Bubbletea (faster iteration,
+  matches stack — uvx already in use) and Node+ink (worse SSE/UI
+  ergonomics; phoenix-bridge.js doesn't justify matching).
+- **Read-only v1.** No sending messages, no editing. Just visibility.
+- **Lives in `localgenai/oc-tree/`** rather than its own repo; can be
+  extracted later if it warrants a standalone release.
+- **State rebuild via REST on every (re)connect** rather than trusting
+  SSE catchup or `Last-Event-ID` (server doesn't honor it).
diff --git a/oc-tree/README.md b/oc-tree/README.md
new file mode 100644
index 0000000..f36ba0e
--- /dev/null
+++ b/oc-tree/README.md
@@ -0,0 +1,72 @@
+# oc-tree
+
+Live tree-view sidecar for [opencode](https://opencode.ai). Subscribes
+to the `opencode serve` SSE event stream and renders a live hierarchy of
+sessions → messages → tool calls in a terminal pane next to the opencode
+TUI.
+
+Phoenix (`opencode/.opencode/plugin/phoenix-bridge.js`) handles
+after-the-fact deep traces; oc-tree is the glanceable "what's it doing
+right now" pane.
+
+## Status
+
+- **M0** — skeleton + schema probe (current).
+- M1 — flat session list (textual).
+- M2 — tree view with subagent nesting via `parentID`.
+- M3 — heartbeat watchdog + REST replay on reconnect.
+- M4 — polish (keybindings, theme, tmux layout doc).
+
+Picking the work back up: read [`NEXT_STEPS.md`](./NEXT_STEPS.md) first.
+
+## Requirements
+
+- Python 3.11+
+- `uv`
+- A running `opencode serve` (default `127.0.0.1:4096`)
+
+## Quickstart
+
+```sh
+cd ~/Documents/obsidian/localgenai/oc-tree
+uv sync
+uv run oc-tree-probe
+```
+
+Drive opencode in another terminal. Probe writes JSONL frames to
+`/tmp/oc-tree-probe.jsonl` and a live counter to stdout. Ctrl-C to stop;
+event-type counts print on exit.
+
+### M0 verification queries
+
+After driving a session that includes a Task subagent, a permission
+prompt, and a tool call:
+
+```sh
+# 1. Does session.created.info.parentID populate for subagents?
+jq -r 'select(.type=="session.created") | .raw.properties.info.parentID' \
+   /tmp/oc-tree-probe.jsonl
+
+# 2. Does message.part.updated carry full parts or deltas?
+jq -c 'select(.type=="message.part.updated") | .raw.properties.part' \
+   /tmp/oc-tree-probe.jsonl | head
+
+# 3. Which permission.* events actually fire?
+jq -r '.type' /tmp/oc-tree-probe.jsonl | grep -i permission | sort -u
+```
+
+## Configuration
+
+| Env var                     | Default                  |
+| --------------------------- | ------------------------ |
+| `OPENCODE_URL`              | `http://127.0.0.1:4096`  |
+| `OPENCODE_SERVER_USERNAME`  | `opencode` (if pw set)   |
+| `OPENCODE_SERVER_PASSWORD`  | _(unset → no auth)_      |
+
+## References
+
+- [opencode server docs](https://opencode.ai/docs/server/)
+- [sst/opencode#7451](https://github.com/sst/opencode/issues/7451) — no per-session SSE endpoint
+- [sst/opencode#6573](https://github.com/sst/opencode/issues/6573) — Task subagent over `opencode serve`
+- [sst/opencode#11424](https://github.com/sst/opencode/issues/11424) — replayed message.part.updated frames
+- [sst/opencode#15149](https://github.com/sst/opencode/issues/15149) — SSE disconnect leaves server hung
diff --git a/oc-tree/pyproject.toml b/oc-tree/pyproject.toml
new file mode 100644
index 0000000..f8d66d4
--- /dev/null
+++ b/oc-tree/pyproject.toml
@@ -0,0 +1,21 @@
+[project]
+name = "oc-tree"
+version = "0.0.1"
+description = "Live tree-view sidecar for opencode SSE events"
+requires-python = ">=3.11"
+dependencies = [
+    "textual>=0.80",
+    "httpx>=0.27",
+    "httpx-sse>=0.4",
+]
+
+[project.scripts]
+oc-tree = "oc_tree.__main__:main"
+oc-tree-probe = "oc_tree.probe:main"
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/oc_tree"]
diff --git a/oc-tree/src/oc_tree/__init__.py b/oc-tree/src/oc_tree/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/oc-tree/src/oc_tree/__main__.py b/oc-tree/src/oc_tree/__main__.py
new file mode 100644
index 0000000..62d5f30
--- /dev/null
+++ b/oc-tree/src/oc_tree/__main__.py
@@ -0,0 +1,20 @@
+"""Entry point. M0: stub that points users at the probe.
+
+The textual app lands in M1+. Until then, this command exists so the
+script registration in pyproject.toml resolves.
+"""
+
+from __future__ import annotations
+
+
+def main() -> int:
+    print(
+        "oc-tree TUI ships in M1.\n"
+        "For now, run the schema probe:\n"
+        "    uv run oc-tree-probe\n"
+    )
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/oc-tree/src/oc_tree/client.py b/oc-tree/src/oc_tree/client.py
new file mode 100644
index 0000000..cd7acb6
--- /dev/null
+++ b/oc-tree/src/oc_tree/client.py
@@ -0,0 +1,93 @@
+"""Thin async client for opencode's HTTP + SSE API.
+
+Talks to `opencode serve` (default 127.0.0.1:4096). Auth is off unless
+OPENCODE_SERVER_PASSWORD is set, matching upstream defaults.
+
+The single SSE endpoint is `GET /event`; per-session streams don't exist
+(sst/opencode#7451), so callers filter by sessionID client-side.
+"""
+
+from __future__ import annotations
+
+import base64
+import os
+from collections.abc import AsyncIterator
+from dataclasses import dataclass
+from typing import Any
+
+import httpx
+from httpx_sse import aconnect_sse
+
+
+@dataclass(frozen=True)
+class Event:
+    type: str
+    properties: dict[str, Any]
+    raw: dict[str, Any]
+
+
+def _auth_header() -> dict[str, str]:
+    pw = os.environ.get("OPENCODE_SERVER_PASSWORD", "")
+    if not pw:
+        return {}
+    user = os.environ.get("OPENCODE_SERVER_USERNAME", "opencode")
+    token = base64.b64encode(f"{user}:{pw}".encode()).decode()
+    return {"Authorization": f"Basic {token}"}
+
+
+class OpenCodeClient:
+    def __init__(
+        self,
+        base_url: str | None = None,
+        *,
+        timeout: float = 30.0,
+    ) -> None:
+        self.base_url = (
+            base_url
+            or os.environ.get("OPENCODE_URL")
+            or "http://127.0.0.1:4096"
+        ).rstrip("/")
+        # SSE needs no read timeout; REST calls cap at `timeout`.
+        self._sse_timeout = httpx.Timeout(timeout, read=None)
+        self._rest_timeout = httpx.Timeout(timeout)
+        self._headers = _auth_header()
+
+    async def list_sessions(
+        self, *, scope: str = "project", limit: int = 50
+    ) -> list[dict[str, Any]]:
+        async with httpx.AsyncClient(
+            base_url=self.base_url,
+            headers=self._headers,
+            timeout=self._rest_timeout,
+        ) as c:
+            r = await c.get("/session", params={"scope": scope, "limit": limit})
+            r.raise_for_status()
+            return r.json()
+
+    async def get_session_messages(self, session_id: str) -> list[dict[str, Any]]:
+        async with httpx.AsyncClient(
+            base_url=self.base_url,
+            headers=self._headers,
+            timeout=self._rest_timeout,
+        ) as c:
+            r = await c.get(f"/session/{session_id}/message")
+            r.raise_for_status()
+            return r.json()
+
+    async def stream_events(self) -> AsyncIterator[Event]:
+        """Yield events from /event. Caller handles reconnect."""
+        async with httpx.AsyncClient(
+            base_url=self.base_url,
+            headers=self._headers,
+            timeout=self._sse_timeout,
+        ) as c:
+            async with aconnect_sse(c, "GET", "/event") as src:
+                async for sse in src.aiter_sse():
+                    if not sse.data:
+                        continue
+                    payload = sse.json()
+                    yield Event(
+                        type=payload.get("type", ""),
+                        properties=payload.get("properties", {}) or {},
+                        raw=payload,
+                    )
diff --git a/oc-tree/src/oc_tree/probe.py b/oc-tree/src/oc_tree/probe.py
new file mode 100644
index 0000000..31e340b
--- /dev/null
+++ b/oc-tree/src/oc_tree/probe.py
@@ -0,0 +1,100 @@
+"""Schema probe: dump raw /event frames to JSONL for inspection.
+
+Used in M0 to verify three open questions before building the UI:
+  1. Does session.created.info.parentID get populated for Task subagents?
+     (sst/opencode#6573 — may be broken over `opencode serve`.)
+  2. Does message.part.updated carry the full part or a delta?
+     (sst/opencode#11424 — frames sometimes replay full state.)
+  3. What permission.* event names actually fire? (Undocumented.)
+
+Run, drive opencode through a session that includes a Task-tool
+subagent + a permission prompt + at least one tool call, then grep the
+output JSONL for `parentID`, `permission`, and successive part frames.
+
+Usage:
+    uv run oc-tree-probe                 # writes /tmp/oc-tree-probe.jsonl
+    uv run oc-tree-probe --out file.jsonl
+"""
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+import json
+import signal
+import sys
+from collections import Counter
+from datetime import datetime, timezone
+from pathlib import Path
+
+from .client import OpenCodeClient
+
+
+async def _run(out_path: Path) -> int:
+    client = OpenCodeClient()
+    counts: Counter[str] = Counter()
+    print(f"oc-tree probe → {out_path}")
+    print(f"  base_url={client.base_url}")
+    print("  drive opencode now; ctrl-c to stop and print summary\n")
+
+    stop = asyncio.Event()
+    loop = asyncio.get_running_loop()
+    for sig in (signal.SIGINT, signal.SIGTERM):
+        loop.add_signal_handler(sig, stop.set)
+
+    with out_path.open("a", encoding="utf-8") as f:
+        try:
+            stream = client.stream_events()
+            stream_task = asyncio.create_task(_consume(stream, f, counts))
+            stop_task = asyncio.create_task(stop.wait())
+            done, pending = await asyncio.wait(
+                {stream_task, stop_task},
+                return_when=asyncio.FIRST_COMPLETED,
+            )
+            for t in pending:
+                t.cancel()
+            for t in done:
+                exc = t.exception()
+                if exc and not isinstance(exc, asyncio.CancelledError):
+                    print(f"\nstream error: {exc!r}", file=sys.stderr)
+        except KeyboardInterrupt:
+            pass
+
+    print("\n--- event counts ---")
+    for t, n in counts.most_common():
+        print(f"  {n:>5}  {t}")
+    return 0
+
+
+async def _consume(stream, f, counts: Counter[str]) -> None:
+    async for event in stream:
+        counts[event.type] += 1
+        line = json.dumps(
+            {
+                "ts": datetime.now(timezone.utc).isoformat(),
+                "type": event.type,
+                "raw": event.raw,
+            },
+            ensure_ascii=False,
+        )
+        f.write(line + "\n")
+        f.flush()
+        # Live tick so the operator knows it's working.
+        sys.stdout.write(f"\r{sum(counts.values()):>6} events  last={event.type:<40}")
+        sys.stdout.flush()
+
+
+def main() -> int:
+    ap = argparse.ArgumentParser(description="opencode SSE schema probe")
+    ap.add_argument(
+        "--out",
+        type=Path,
+        default=Path("/tmp/oc-tree-probe.jsonl"),
+        help="output JSONL file (appended)",
+    )
+    args = ap.parse_args()
+    return asyncio.run(_run(args.out))
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/oc-tree/src/oc_tree/widgets/__init__.py b/oc-tree/src/oc_tree/widgets/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/oc-tree/uv.lock b/oc-tree/uv.lock
new file mode 100644
index 0000000..fd6bc88
--- /dev/null
+++ b/oc-tree/uv.lock
@@ -0,0 +1,212 @@
+version = 1
+requires-python = ">=3.11"
+
+[[package]]
+name = "anyio"
+version = "4.13.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "idna" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/19/14/2c5dd9f512b66549ae92767a9c7b330ae88e1932ca57876909410251fe13/anyio-4.13.0.tar.gz", hash = "sha256:334b70e641fd2221c1505b3890c69882fe4a2df910cba14d97019b90b24439dc", size = 231622 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/da/42/e921fccf5015463e32a3cf6ee7f980a6ed0f395ceeaa45060b61d86486c2/anyio-4.13.0-py3-none-any.whl", hash = "sha256:08b310f9e24a9594186fd75b4f73f4a4152069e3853f1ed8bfbf58369f4ad708", size = 114353 },
+]
+
+[[package]]
+name = "certifi"
+version = "2026.4.22"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/25/ee/6caf7a40c36a1220410afe15a1cc64993a1f864871f698c0f93acb72842a/certifi-2026.4.22.tar.gz", hash = "sha256:8d455352a37b71bf76a79caa83a3d6c25afee4a385d632127b6afb3963f1c580", size = 137077 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/22/30/7cd8fdcdfbc5b869528b079bfb76dcdf6056b1a2097a662e5e8c04f42965/certifi-2026.4.22-py3-none-any.whl", hash = "sha256:3cb2210c8f88ba2318d29b0388d1023c8492ff72ecdde4ebdaddbb13a31b1c4a", size = 135707 },
+]
+
+[[package]]
+name = "h11"
+version = "0.16.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515 },
+]
+
+[[package]]
+name = "httpcore"
+version = "1.0.9"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "certifi" },
+    { name = "h11" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784 },
+]
+
+[[package]]
+name = "httpx"
+version = "0.28.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "certifi" },
+    { name = "httpcore" },
+    { name = "idna" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517 },
+]
+
+[[package]]
+name = "httpx-sse"
+version = "0.4.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/0f/4c/751061ffa58615a32c31b2d82e8482be8dd4a89154f003147acee90f2be9/httpx_sse-0.4.3.tar.gz", hash = "sha256:9b1ed0127459a66014aec3c56bebd93da3c1bc8bb6618c8082039a44889a755d", size = 15943 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d2/fd/6668e5aec43ab844de6fc74927e155a3b37bf40d7c3790e49fc0406b6578/httpx_sse-0.4.3-py3-none-any.whl", hash = "sha256:0ac1c9fe3c0afad2e0ebb25a934a59f4c7823b60792691f779fad2c5568830fc", size = 8960 },
+]
+
+[[package]]
+name = "idna"
+version = "3.13"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ce/cc/762dfb036166873f0059f3b7de4565e1b5bc3d6f28a414c13da27e442f99/idna-3.13.tar.gz", hash = "sha256:585ea8fe5d69b9181ec1afba340451fba6ba764af97026f92a91d4eef164a242", size = 194210 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5d/13/ad7d7ca3808a898b4612b6fe93cde56b53f3034dcde235acb1f0e1df24c6/idna-3.13-py3-none-any.whl", hash = "sha256:892ea0cde124a99ce773decba204c5552b69c3c67ffd5f232eb7696135bc8bb3", size = 68629 },
+]
+
+[[package]]
+name = "linkify-it-py"
+version = "2.1.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "uc-micro-py" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/2e/c9/06ea13676ef354f0af6169587ae292d3e2406e212876a413bf9eece4eb23/linkify_it_py-2.1.0.tar.gz", hash = "sha256:43360231720999c10e9328dc3691160e27a718e280673d444c38d7d3aaa3b98b", size = 29158 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b4/de/88b3be5c31b22333b3ca2f6ff1de4e863d8fe45aaea7485f591970ec1d3e/linkify_it_py-2.1.0-py3-none-any.whl", hash = "sha256:0d252c1594ecba2ecedc444053db5d3a9b7ec1b0dd929c8f1d74dce89f86c05e", size = 19878 },
+]
+
+[[package]]
+name = "markdown-it-py"
+version = "4.2.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mdurl" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/06/ff/7841249c247aa650a76b9ee4bbaeae59370dc8bfd2f6c01f3630c35eb134/markdown_it_py-4.2.0.tar.gz", hash = "sha256:04a21681d6fbb623de53f6f364d352309d4094dd4194040a10fd51833e418d49", size = 82454 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b3/81/4da04ced5a082363ecfa159c010d200ecbd959ae410c10c0264a38cac0f5/markdown_it_py-4.2.0-py3-none-any.whl", hash = "sha256:9f7ebbcd14fe59494226453aed97c1070d83f8d24b6fc3a3bcf9a38092641c4a", size = 91687 },
+]
+
+[package.optional-dependencies]
+linkify = [
+    { name = "linkify-it-py" },
+]
+
+[[package]]
+name = "mdit-py-plugins"
+version = "0.6.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "markdown-it-py" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d8/3d/e0e8d9d1cee04f758120915e2b2a3a07eb41f8cf4654b4734788a522bcd1/mdit_py_plugins-0.6.0.tar.gz", hash = "sha256:2436f14a7295837ac9228a36feeabda867c4abc488c8d019ad5c0bda88eee040", size = 56025 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/71/d6/48f5b9e44e2e760855d7b489b1317cd7620e82dcb73197961e5cc1391348/mdit_py_plugins-0.6.0-py3-none-any.whl", hash = "sha256:f7e7a25d8b616fee99cb1e330da73451d11a8061baf39bb9663ab9ce0e005b90", size = 66655 },
+]
+
+[[package]]
+name = "mdurl"
+version = "0.1.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979 },
+]
+
+[[package]]
+name = "oc-tree"
+version = "0.0.1"
+source = { editable = "." }
+dependencies = [
+    { name = "httpx" },
+    { name = "httpx-sse" },
+    { name = "textual" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "httpx", specifier = ">=0.27" },
+    { name = "httpx-sse", specifier = ">=0.4" },
+    { name = "textual", specifier = ">=0.80" },
+]
+
+[[package]]
+name = "platformdirs"
+version = "4.9.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/9f/4a/0883b8e3802965322523f0b200ecf33d31f10991d0401162f4b23c698b42/platformdirs-4.9.6.tar.gz", hash = "sha256:3bfa75b0ad0db84096ae777218481852c0ebc6c727b3168c1b9e0118e458cf0a", size = 29400 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/75/a6/a0a304dc33b49145b21f4808d763822111e67d1c3a32b524a1baf947b6e1/platformdirs-4.9.6-py3-none-any.whl", hash = "sha256:e61adb1d5e5cb3441b4b7710bea7e4c12250ca49439228cc1021c00dcfac0917", size = 21348 },
+]
+
+[[package]]
+name = "pygments"
+version = "2.20.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/c3/b2/bc9c9196916376152d655522fdcebac55e66de6603a76a02bca1b6414f6c/pygments-2.20.0.tar.gz", hash = "sha256:6757cd03768053ff99f3039c1a36d6c0aa0b263438fcab17520b30a303a82b5f", size = 4955991 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f4/7e/a72dd26f3b0f4f2bf1dd8923c85f7ceb43172af56d63c7383eb62b332364/pygments-2.20.0-py3-none-any.whl", hash = "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176", size = 1231151 },
+]
+
+[[package]]
+name = "rich"
+version = "15.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "markdown-it-py" },
+    { name = "pygments" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c0/8f/0722ca900cc807c13a6a0c696dacf35430f72e0ec571c4275d2371fca3e9/rich-15.0.0.tar.gz", hash = "sha256:edd07a4824c6b40189fb7ac9bc4c52536e9780fbbfbddf6f1e2502c31b068c36", size = 230680 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/82/3b/64d4899d73f91ba49a8c18a8ff3f0ea8f1c1d75481760df8c68ef5235bf5/rich-15.0.0-py3-none-any.whl", hash = "sha256:33bd4ef74232fb73fe9279a257718407f169c09b78a87ad3d296f548e27de0bb", size = 310654 },
+]
+
+[[package]]
+name = "textual"
+version = "8.2.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "markdown-it-py", extra = ["linkify"] },
+    { name = "mdit-py-plugins" },
+    { name = "platformdirs" },
+    { name = "pygments" },
+    { name = "rich" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/62/1e/1eedc5bac184d00aaa5f9a99095f7e266af3ec46fa926c1051be5d358da1/textual-8.2.5.tar.gz", hash = "sha256:6c894e65a879dadb4f6cf46ddcfedb0173ff7e0cb1fe605ff7b357a597bdbc90", size = 1851596 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cd/01/c4555f9c8a692ff83d84930150540f743ce94c89234f9e9a15ff4baba3a8/textual-8.2.5-py3-none-any.whl", hash = "sha256:247d2aa2faf222749c321f88a736247f37ee2c023604079c7490bfacddfcd4b2", size = 727050 },
+]
+
+[[package]]
+name = "typing-extensions"
+version = "4.15.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614 },
+]
+
+[[package]]
+name = "uc-micro-py"
+version = "2.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/78/67/9a363818028526e2d4579334460df777115bdec1bb77c08f9db88f6389f2/uc_micro_py-2.0.0.tar.gz", hash = "sha256:c53691e495c8db60e16ffc4861a35469b0ba0821fe409a8a7a0a71864d33a811", size = 6611 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/61/73/d21edf5b204d1467e06500080a50f79d49ef2b997c79123a536d4a17d97c/uc_micro_py-2.0.0-py3-none-any.whl", hash = "sha256:3603a3859af53e5a39bc7677713c78ea6589ff188d70f4fee165db88e22b242c", size = 6383 },
+]
diff --git a/opencode/README.md b/opencode/README.md
index 4388aae..297e7bc 100644
--- a/opencode/README.md
+++ b/opencode/README.md
@@ -5,14 +5,61 @@ stack. `install.sh` deploys it to `~/.config/opencode/` on a Mac.
 
 ## What's wired up
 
-- **Local model**: `framework/qwen3-coder:30b` served by Ollama on the
-  Framework Desktop, reachable over Tailscale.
+- **Local models**: two providers, manually switched via `/model`.
+  - `framework/qwen3-coder:30b` — Qwen3-Coder 30B-A3B via Ollama, the
+    daily-driver coding model. 128K context, 11434.
+  - `framework-vllm/kimi-linear` — Kimi-Linear 48B-A3B via vLLM, the
+    long-context play (hybrid KDA/MLA, MoE 3B active). 32K context for
+    now (ramps further in P3 of the kimi-linear roadmap), 8000.
+    **Tools disabled** (`tool_call: false`) — Kimi-Linear is a research
+    architecture release and isn't strongly tool-trained; the model
+    knows the Kimi-K2 tool tokens but emits non-structured output when
+    given an MCP toolbox. Use it for chat / long-context reasoning;
+    switch to `framework/qwen3-coder:30b` for agentic work.
 - **Playwright MCP** ([@playwright/mcp](https://github.com/microsoft/playwright-mcp)) —
   browser automation. The model can navigate pages, click, fill forms,
   read DOM snapshots. Closes the agentic-browsing gap.
 - **SearXNG MCP** ([mcp-searxng](https://github.com/ihor-sokoliuk/mcp-searxng)) —
   web search via your self-hosted instance at <https://searxng.n0n.io>.
   No external API keys, no rate-limit roulette.
+- **Serena MCP** ([oraios/serena](https://github.com/oraios/serena)) —
+  LSP-backed semantic code navigation (find symbol, references, rename,
+  insert before/after). Cuts the tokens a local 70B-class model burns on
+  grep-style flailing by roughly an order of magnitude. Uses a **custom
+  trimmed context** (`serena-ide-trim.yml`) that exposes only the 8
+  unique-LSP-value tools — JetBrains tools, line-level edits redundant
+  with opencode's `Edit`, Serena's own memory tools (basic-memory MCP is
+  canonical), and onboarding/meta noise are all excluded. Down from 46
+  raw → 41 ide-context-filtered → **8 active**. Scoped to the cwd via
+  `--project-from-cwd`.
+- **basic-memory MCP** ([basicmachines-co/basic-memory](https://github.com/basicmachines-co/basic-memory)) —
+  Markdown-backed persistent memory across sessions. Storage lives in
+  `~/Documents/obsidian/AI-memory/` (symlinked from `~/basic-memory`),
+  so notes are browsable in Obsidian's graph and search. Replaces
+  Claude Code's auto-memory write-back, which opencode lacks natively.
+- **sequential-thinking MCP** ([modelcontextprotocol/servers/sequentialthinking](https://github.com/modelcontextprotocol/servers/tree/main/src/sequentialthinking)) —
+  externalizes chain-of-thought as tool calls. Helps weaker local
+  models stay on-plan over multi-step work; near-zero cost when not
+  actively used.
+- **github MCP** ([github/github-mcp-server](https://github.com/github/github-mcp-server)) —
+  GitHub repo / issue / PR / code-search access. Launched with
+  `--read-only` and a narrowed `--toolsets repos,issues,pull_requests,code_security`
+  allowlist. With a **classic** PAT (`ghp_…`), GitHub's auto-scope-filtering
+  (Jan 2026) trims tools further by hiding ones whose scopes the token
+  lacks — saves ~23k tokens of tool-list overhead, meaningful for a 70B's
+  effective context. Requires `GITHUB_PERSONAL_ACCESS_TOKEN` to be exported
+  in your shell env (not in opencode.json). Drop `--read-only` from
+  `opencode.json` once you trust the model's tool calls.
+  
+  **Note**: This MCP is disabled since the user is utilizing a self-hosted Gitea instance instead of GitHub.
+- **task-master MCP** ([eyaltoledano/claude-task-master](https://github.com/eyaltoledano/claude-task-master)) —
+  Workflow / task-gate MCP. File-based: each project gets a
+  `.taskmaster/` dir with tasks, complexity, and config — no DB, no
+  external service. `OLLAMA_BASE_URL` is pre-set in `opencode.json` so
+  task-master's AI features (parse-prd, expand-task) route through your
+  framework Ollama. The npm-global install also provides a `task-master`
+  CLI (`task-master init` to scaffold per-project). Replaces the
+  workflow-gate role originally proposed for Archon, without Supabase.
 - **Phoenix bridge plugin** (`.opencode/plugin/phoenix-bridge.js`) —
   exports OpenTelemetry spans for every LLM call, tool call, and
   subagent invocation to the Phoenix container running on the Framework
@@ -31,13 +78,22 @@ the plugin. Each step checks before doing work. Specifically:
 1. Verifies Homebrew is present (won't install it for you)
 2. `brew install node uv jq sst/tap/opencode` (skips if already at latest)
 3. Pre-caches Playwright's chromium so the first MCP call is instant
-4. `npm install` in `.opencode/plugin/` for the Phoenix bridge OTel deps
-5. Generates `~/.config/opencode/opencode.json` from the repo's
+4. `uv tool install serena-agent@latest --prerelease=allow` so opencode
+   can launch Serena as a plain `serena` binary on PATH (faster than
+   re-resolving via `uvx` on every session)
+5. Creates `~/Documents/obsidian/AI-memory/` and symlinks `~/basic-memory`
+   to it, so basic-memory MCP writes into the Obsidian vault by default
+6. `brew install github-mcp-server` and warns if `GITHUB_PERSONAL_ACCESS_TOKEN`
+   isn't set in your shell — the MCP needs it to authenticate
+7. `npm install -g task-master-ai` (workflow MCP, also exposes the
+   `task-master` CLI for `task-master init` per project)
+8. `npm install` in `.opencode/plugin/` for the Phoenix bridge OTel deps
+9. Generates `~/.config/opencode/opencode.json` from the repo's
    `opencode.json`, rewriting relative plugin paths to absolute so
    OpenCode loads the plugin regardless of which directory it's launched
    from
 
-Step 5 is the reason the deployed config isn't a plain symlink. The
+Step 9 is the reason the deployed config isn't a plain symlink. The
 repo's `opencode.json` uses a relative plugin path (`./...`) so it stays
 valid in place; the deployed copy is generated with that path resolved
 to an absolute one. Edits to the repo's `opencode.json` need a re-run
@@ -57,11 +113,35 @@ Then in opencode:
 
 ```
 opencode
-> /mcp        # should list playwright and searxng as connected
+> /mcp        # should list playwright, searxng, serena, basic-memory,
+              # sequential-thinking, github, task-master as connected
 > search the web for "qwen3-coder benchmarks"
 > open https://example.com and tell me the H1
+> use serena to find the definition of `parse_request`
+> remember: this project ships its memory into the Obsidian vault
+> /sequentialthinking think through the trade-offs of X vs Y
+> list my recent github PRs across all repos
+> task-master init   # then ask the model to plan tasks for this project
 ```
 
+For parallel agents, plain tmux + git worktree is enough at the 70B's
+~2-pane concurrency ceiling. A two-line zsh helper covers the
+"new isolated worktree → split tmux pane → start opencode" loop:
+
+```sh
+work() {
+  local name="${1:?usage: work <branch-name>}"
+  local wt="../$(basename "$PWD")-$name"
+  git worktree add "$wt" -b "$name" && tmux split-window -h -c "$wt" "opencode"
+}
+unwork() { local wt="$PWD"; cd .. && git worktree remove --force "$wt"; }
+```
+
+Serena's first invocation in a project may take a few seconds — it
+indexes the workspace via the language server. basic-memory's first
+write creates the project layout under `~/Documents/obsidian/AI-memory/`
+which Obsidian will pick up on its next vault scan.
+
 ## Phoenix tracing
 
 The plugin at `.opencode/plugin/phoenix-bridge.js` boots an OpenTelemetry
@@ -142,3 +222,32 @@ plugin no-ops — the rest of OpenCode still works fine.
   using the same `type/command/enabled` shape. The
   [official MCP registry](https://registry.modelcontextprotocol.io/)
   and [Awesome MCP Servers](https://mcpservers.org/) catalog options.
+- **Tool-list bloat is real on a local 70B.** Every tool description
+  costs context. Five MCP servers exposing ~10 tools each puts the
+  active-tool list around 50 — manageable, but adding two more
+  full-spectrum servers (e.g. GitHub MCP at ~70 tools without scope
+  filtering, plus Context7) starts crowding effective context. Prefer
+  servers with toolset filtering or per-agent allow-lists in opencode.
+- **basic-memory storage path.** The symlink `~/basic-memory` →
+  `~/Documents/obsidian/AI-memory` is created by `install.sh` only if
+  `~/basic-memory` doesn't already exist. If you'd previously run
+  basic-memory before this setup, move that directory's contents into
+  `AI-memory/` first, then delete `~/basic-memory` and re-run
+  `install.sh`.
+- **Serena PATH gotcha.** `uv tool install` puts `serena` in
+  `~/.local/bin/`. If your shell rc doesn't export that, `opencode`
+  won't find the binary. The script warns; fix is one line in
+  `~/.zshrc`: `export PATH="$HOME/.local/bin:$PATH"`.
+- **Serena tool trim** (`serena-ide-trim.yml`). The custom context
+  excludes 28 tools beyond what the built-in `ide` context already
+  filters. To re-expose any of them, edit
+  [`serena-ide-trim.yml`](serena-ide-trim.yml) and remove the entry
+  from `excluded_tools`, then re-run `./install.sh`. The path injection
+  (`./serena-ide-trim.yml` → absolute) is handled by install.sh's jq
+  pass at deploy time.
+- **GitHub PAT.** Use a **classic** PAT (`ghp_…`) — auto-scope-filtering
+  only kicks in for classic tokens, not fine-grained ones. Without
+  it, the GitHub MCP exposes its full ~70-tool surface, which costs
+  ~23k tokens of context the local 70B can ill afford. Generate at
+  <https://github.com/settings/tokens> with the scopes you actually
+  want exposed.
diff --git a/opencode/install.sh b/opencode/install.sh
index d6b6243..87b4aa3 100755
--- a/opencode/install.sh
+++ b/opencode/install.sh
@@ -8,8 +8,12 @@
 #   1. Verify Homebrew is present
 #   2. Install node, uv, opencode, jq (skips if already at latest)
 #   3. Pre-cache Playwright's chromium so the first MCP call is instant
-#   4. Install the Phoenix bridge plugin's OTel deps
-#   5. Generate ~/.config/opencode/opencode.json from the repo's
+#   4. Install Serena (uv tool — LSP-backed code navigation MCP)
+#   5. Wire basic-memory's storage to the Obsidian vault's AI-memory folder
+#   6. Install github-mcp-server + check for GITHUB_PERSONAL_ACCESS_TOKEN
+#   7. Install task-master-ai (workflow MCP)
+#   8. Install the Phoenix bridge plugin's OTel deps
+#   9. Generate ~/.config/opencode/opencode.json from the repo's
 #      opencode.json with relative plugin paths rewritten to absolute,
 #      so opencode loads the plugin regardless of where it's launched.
 #
@@ -28,14 +32,14 @@ warn()  { printf '  \033[33m!\033[0m %s\n' "$*"; }
 fail()  { printf '  \033[31m✗\033[0m %s\n' "$*"; exit 1; }
 
 # --- 1. Homebrew -------------------------------------------------------------
-bold "[1/5] Homebrew"
+bold "[1/9] Homebrew"
 if ! command -v brew >/dev/null 2>&1; then
   fail "brew not found. Install from https://brew.sh, then re-run."
 fi
 ok "brew $(brew --version | head -1 | awk '{print $2}')"
 
 # --- 2. CLI deps -------------------------------------------------------------
-bold "[2/5] CLI dependencies"
+bold "[2/9] CLI dependencies"
 brew_install_if_missing() {
   local pkg="$1"
   local bin="${2:-$1}"
@@ -60,7 +64,7 @@ else
 fi
 
 # --- 3. Playwright browsers --------------------------------------------------
-bold "[3/5] Playwright browser cache"
+bold "[3/9] Playwright browser cache"
 PW_CACHE="${HOME}/Library/Caches/ms-playwright"
 if [[ -d "$PW_CACHE" ]] && find "$PW_CACHE" -name "chrome" -o -name "Chromium*" 2>/dev/null | grep -q .; then
   ok "browsers already cached at $PW_CACHE"
@@ -70,8 +74,93 @@ else
   ok "browsers cached"
 fi
 
-# --- 4. Phoenix bridge plugin deps ------------------------------------------
-bold "[4/5] Phoenix bridge plugin deps"
+# --- 4. Serena (LSP-backed semantic code navigation MCP) --------------------
+# Installed once as a uv tool so opencode can launch it as `serena
+# start-mcp-server ...` without paying uvx's resolution cost on every
+# session start. --prerelease=allow is required because serena-agent
+# ships pre-1.0 versions.
+bold "[4/9] Serena MCP"
+if uv tool list 2>/dev/null | awk '{print $1}' | grep -qx 'serena-agent'; then
+  ok "serena-agent already installed ($(serena --version 2>/dev/null | head -1 || echo 'version unknown'))"
+else
+  info "installing serena-agent via uv tool (~30s first run)"
+  uv tool install -p 3.13 serena-agent@latest --prerelease=allow
+  ok "serena-agent installed"
+fi
+if ! command -v serena >/dev/null 2>&1; then
+  warn "serena binary not on PATH — uv tool's bin dir may not be exported."
+  warn "Add this to your shell rc:  export PATH=\"\$HOME/.local/bin:\$PATH\""
+fi
+
+# --- 5. basic-memory storage ------------------------------------------------
+# basic-memory defaults its project home to ~/basic-memory. We point that
+# at a folder inside the Obsidian vault via symlink so the AI's notes
+# show up in Obsidian's graph and search. Symlink (not env var) chosen
+# because it's stable across basic-memory's evolving config schema.
+bold "[5/9] basic-memory storage"
+AI_MEM_PATH="${HOME}/Documents/obsidian/AI-memory"
+if [[ ! -d "$AI_MEM_PATH" ]]; then
+  info "creating $AI_MEM_PATH"
+  mkdir -p "$AI_MEM_PATH"
+  ok "AI-memory directory created"
+else
+  ok "AI-memory directory exists at $AI_MEM_PATH"
+fi
+if [[ -L "${HOME}/basic-memory" ]]; then
+  link_target="$(readlink "${HOME}/basic-memory")"
+  if [[ "$link_target" == "$AI_MEM_PATH" ]]; then
+    ok "~/basic-memory already linked to AI-memory"
+  else
+    warn "~/basic-memory points to $link_target — leaving as-is. basic-memory MCP will write there, not to AI-memory."
+  fi
+elif [[ -e "${HOME}/basic-memory" ]]; then
+  warn "~/basic-memory exists and is not a symlink. Move or remove it for AI-memory linkage."
+else
+  info "linking ~/basic-memory -> $AI_MEM_PATH"
+  ln -s "$AI_MEM_PATH" "${HOME}/basic-memory"
+  ok "symlink created"
+fi
+
+# --- 6. github-mcp-server (GitHub MCP, classic-PAT auto-scope-filtered) -----
+# Homebrew formula tracks upstream releases; the binary is a Go single-file.
+# We launch it via opencode.json's mcp.github entry with --read-only and a
+# narrowed --toolsets allowlist; auto-scope-filtering on classic PATs (the
+# Jan 2026 GitHub feature) cuts ~23k tokens of tool-list overhead — significant
+# for a local 70B's effective context. Token itself is NOT in opencode.json
+# (it's git-tracked); github-mcp-server inherits GITHUB_PERSONAL_ACCESS_TOKEN
+# from the user's shell env.
+bold "[6/9] github-mcp-server"
+brew_install_if_missing github-mcp-server github-mcp-server
+if [[ -z "${GITHUB_PERSONAL_ACCESS_TOKEN:-}" ]]; then
+  warn "GITHUB_PERSONAL_ACCESS_TOKEN is not set in your shell environment."
+  warn "  github-mcp-server will fail to connect on opencode startup."
+  warn "  Fix:"
+  warn "    1. Create a classic PAT (starts with ghp_) at"
+  warn "       https://github.com/settings/tokens with the scopes you want"
+  warn "       exposed (auto-filtering hides tools whose scopes the PAT lacks)."
+  warn "    2. Add to ~/.zshrc:"
+  warn "         export GITHUB_PERSONAL_ACCESS_TOKEN=ghp_xxxxxxxxxxxx"
+  warn "    3. Re-source the shell (or open a new terminal) before launching opencode."
+else
+  ok "GITHUB_PERSONAL_ACCESS_TOKEN is set in this shell"
+fi
+
+# --- 7. claude-task-master (workflow MCP, npm global) -----------------------
+# task-master-ai: workflow/task-gate MCP. File-based (.taskmaster/ in each
+# project), no DB, no external service. opencode.json launches it via
+# `npx -y task-master-ai`; the global install also provides the `task-master`
+# CLI (`task-master init` to scaffold a project's tasks).
+bold "[7/9] claude-task-master"
+if command -v task-master >/dev/null 2>&1; then
+  ok "task-master-ai already installed ($(task-master --version 2>/dev/null | head -1 || echo 'version unknown'))"
+else
+  info "installing task-master-ai globally via npm"
+  npm install -g task-master-ai
+  ok "task-master-ai installed"
+fi
+
+# --- 8. Phoenix bridge plugin deps ------------------------------------------
+bold "[8/9] Phoenix bridge plugin deps"
 if [[ -d ".opencode/plugin/node_modules" && -f ".opencode/plugin/package-lock.json" ]]; then
   # Re-run npm install if package.json is newer than the lockfile, otherwise skip.
   if [[ ".opencode/plugin/package.json" -nt ".opencode/plugin/package-lock.json" ]]; then
@@ -87,12 +176,12 @@ else
   ok "deps installed"
 fi
 
-# --- 5. Generate ~/.config/opencode/opencode.json ---------------------------
+# --- 9. Generate ~/.config/opencode/opencode.json ---------------------------
 # The repo's opencode.json uses relative plugin paths so it stays valid
 # in-place. Rewriting them to absolute paths here makes opencode find the
 # plugin regardless of which directory it was launched from. Re-run this
 # script after editing opencode.json.
-bold "[5/5] Deploy global config"
+bold "[9/9] Deploy global config"
 mkdir -p "${HOME}/.config/opencode"
 src="${HERE}/opencode.json"
 dst="${HOME}/.config/opencode/opencode.json"
@@ -109,24 +198,34 @@ if [[ -L "${HOME}/.config/opencode/.opencode" ]]; then
   rm "${HOME}/.config/opencode/.opencode"
 fi
 
-# Rewrite any relative plugin path (./foo, ../foo) to an absolute path
-# rooted at this directory. Absolute paths and npm-package refs pass
-# through untouched.
+# Rewrite any relative path (./foo, ../foo) to an absolute path rooted at
+# this directory. Applies to both the top-level `plugin` array and to any
+# string inside `mcp.<name>.command[]` (used for serena's --context arg
+# pointing at serena-ide-trim.yml). Absolute paths and npm-package refs
+# pass through untouched.
 jq --arg here "$HERE" '
-  .plugin = (
-    (.plugin // [])
-    | map(
-        if type == "string" and (startswith("./") or startswith("../"))
-        then ($here + "/" + ltrimstr("./") | gsub("/\\./"; "/"))
-        else .
-        end
-      )
-  )
+  def rewrite($h):
+    if type == "string" and (startswith("./") or startswith("../"))
+    then ($h + "/" + ltrimstr("./") | gsub("/\\./"; "/"))
+    else .
+    end;
+  .plugin = ((.plugin // []) | map(rewrite($here)))
+  | .mcp = (
+      (.mcp // {})
+      | with_entries(
+          if (.value | type) == "object" and (.value | has("command"))
+          then .value.command |= map(rewrite($here))
+          else .
+          end
+        )
+    )
 ' "$src" > "$dst.tmp"
 mv "$dst.tmp" "$dst"
 ok "wrote $dst"
 info "plugin paths resolved to:"
 jq -r '.plugin[]?' "$dst" | sed 's/^/      /'
+info "mcp.serena context resolved to:"
+jq -r '.mcp.serena.command | map(select(test("\\.yml$"))) | .[]?' "$dst" | sed 's/^/      /'
 
 echo
 bold "Done."
diff --git a/opencode/opencode.json b/opencode/opencode.json
index b1bd98f..ebc3432 100644
--- a/opencode/opencode.json
+++ b/opencode/opencode.json
@@ -7,7 +7,7 @@
   "provider": {
     "framework": {
       "npm": "@ai-sdk/openai-compatible",
-      "name": "Framework Desktop (Strix Halo)",
+      "name": "Framework Desktop (Strix Halo) — Ollama",
       "options": {
         "baseURL": "http://framework:11434/v1"
       },
@@ -20,6 +20,24 @@
           }
         }
       }
+    },
+    "framework-vllm": {
+      "npm": "@ai-sdk/openai-compatible",
+      "name": "Framework Desktop (Strix Halo) — vLLM",
+      "options": {
+        "baseURL": "http://framework:8000/v1",
+        "apiKey": "dummy"
+      },
+      "models": {
+        "kimi-linear": {
+          "name": "Kimi-Linear 48B-A3B (long-context, vLLM)",
+          "limit": {
+            "context": 32768,
+            "output": 8192
+          },
+          "tool_call": false
+        }
+      }
     }
   },
   "mcp": {
@@ -35,6 +53,44 @@
       "environment": {
         "SEARXNG_URL": "https://searxng.n0n.io"
       }
+    },
+    "serena": {
+      "type": "local",
+      "command": [
+        "serena", "start-mcp-server",
+        "--context", "./serena-ide-trim.yml",
+        "--project-from-cwd",
+        "--open-web-dashboard", "false"
+      ],
+      "enabled": true
+    },
+    "basic-memory": {
+      "type": "local",
+      "command": ["uvx", "basic-memory", "mcp"],
+      "enabled": true
+    },
+    "sequential-thinking": {
+      "type": "local",
+      "command": ["npx", "-y", "@modelcontextprotocol/server-sequential-thinking"],
+      "enabled": true
+    },
+    "github": {
+      "type": "local",
+      "command": [
+        "github-mcp-server",
+        "stdio",
+        "--read-only",
+        "--toolsets", "repos,issues,pull_requests,code_security"
+      ],
+      "enabled": false
+    },
+    "task-master": {
+      "type": "local",
+      "command": ["npx", "-y", "task-master-ai"],
+      "enabled": true,
+      "environment": {
+        "OLLAMA_BASE_URL": "http://framework:11434/v1"
+      }
     }
   },
   "model": "framework/qwen3-coder:30b"
diff --git a/opencode/serena-ide-trim.yml b/opencode/serena-ide-trim.yml
new file mode 100644
index 0000000..6ebb9b5
--- /dev/null
+++ b/opencode/serena-ide-trim.yml
@@ -0,0 +1,92 @@
+# Custom Serena context for the localgenai stack.
+#
+# Extends Serena's built-in `ide` context with deeper exclusions tailored to
+# opencode + a local 70B-class model. Loaded by Serena via the
+# `--context /abs/path/to/this.yml` flag (install.sh rewrites the relative
+# path in opencode.json to the repo's absolute path at deploy time).
+#
+# Trim summary (see opencode/README.md for rationale):
+#   46 raw tools  →  ide context excludes 5  →  this context excludes 28 more
+#   →  ~12 visible tools, all unique LSP value
+#
+# Cut categories:
+#   - JetBrains backend (11)         — language_backend: LSP, never JetBrains
+#   - Line-level edits (5)           — opencode's Edit covers them
+#   - Memory tools (6)               — basic-memory MCP is canonical
+#   - Onboarding / meta (5)          — bootstrap noise the model rarely needs
+#   - Destructive / dashboard (2)    — remove_project, open_dashboard
+#
+# Kept (the unique LSP value, the reason we installed Serena):
+#   find_symbol, find_referencing_symbols, get_symbols_overview,
+#   replace_symbol_body, insert_after_symbol, insert_before_symbol,
+#   rename_symbol, safe_delete_symbol, restart_language_server,
+#   list_queryable_projects, query_project (+ activate_project, latent)
+
+description: opencode IDE context, trimmed for local 70B
+
+prompt: |
+  You are running in an IDE assistant context where file operations,
+  basic (line-based) edits and reads, and shell commands are handled by
+  your own, internal tools.
+
+  If Serena's tools can be used to achieve your task, you should
+  prioritize them. In particular, it is important that you avoid reading
+  entire source code files unless it is strictly necessary! Instead, for
+  exploring and reading code in a token-efficient manner, use Serena's
+  symbolic-search tools (find_symbol, find_referencing_symbols,
+  get_symbols_overview).
+
+excluded_tools:
+  # Inherited from built-in ide context (opencode built-ins cover these)
+  - create_text_file
+  - read_file
+  - execute_shell_command
+  - find_file
+  - list_dir
+
+  # Line-level edits — redundant with opencode's Edit and Grep
+  - replace_content
+  - delete_lines
+  - replace_lines
+  - insert_at_line
+  - search_for_pattern
+
+  # Memory tools — basic-memory MCP is the canonical persistent memory
+  - write_memory
+  - read_memory
+  - list_memories
+  - delete_memory
+  - rename_memory
+  - edit_memory
+
+  # Onboarding / meta — bootstrap noise
+  - check_onboarding_performed
+  - onboarding
+  - initial_instructions
+  - serena_info
+  - get_current_config
+
+  # Destructive / dashboard
+  - remove_project
+  - open_dashboard
+
+  # JetBrains backend — never used in this setup (language_backend: LSP)
+  - jet_brains_find_symbol
+  - jet_brains_move
+  - jet_brains_safe_delete
+  - jet_brains_inline_symbol
+  - jet_brains_find_referencing_symbols
+  - jet_brains_get_symbols_overview
+  - jet_brains_type_hierarchy
+  - jet_brains_find_declaration
+  - jet_brains_find_implementations
+  - jet_brains_rename
+  - jet_brains_debug
+
+tool_description_overrides: {}
+
+# When `single_project: true` and a project is given at startup, Serena
+# limits the toolset to what the project actually needs and disables
+# `activate_project`. With opencode launching Serena via
+# `--project-from-cwd`, a project is always present.
+single_project: true
diff --git a/pyinfra/framework/compose/kimi-linear.yml b/pyinfra/framework/compose/kimi-linear.yml
new file mode 100644
index 0000000..2037fca
--- /dev/null
+++ b/pyinfra/framework/compose/kimi-linear.yml
@@ -0,0 +1,112 @@
+# Kimi-Linear-48B-A3B-Instruct on vLLM, gfx1151, via kyuz0's TheRock 7.x
+# toolbox. Pioneer-grade: no public Strix Halo benchmarks exist for this
+# model as of 2026-05.
+#
+# Three risks P0 verifies in one shot:
+#   - KDA Triton kernel on gfx1151 (fla-core)             unverified
+#   - compressed-tensors loader on ROCm                   unverified
+#   - HIP-graph-capture on gfx1151                        broken; mitigated
+#                                                         via --enforce-eager
+#
+# Image strategy. Default `image:` is upstream `kyuz0:stable` (vLLM
+# ~6aa057c from 2026-04-22). If that crashes with the v0.12-class
+# `MLAModules.__init__() missing 'indexer_rotary_emb'`, build a
+# v0.11.2-pinned image locally with ./build.sh and edit `image:` below to
+# `kimi-linear-local:v0.11.2`. Source build is multi-hour.
+#
+# Weights. Despite their HF name, cyankiwi's "AWQ" Kimi-Linear weights
+# are actually `compressed-tensors` int4 group-quantized — see config.json.
+# Download with:
+#   huggingface-cli download cyankiwi/Kimi-Linear-48B-A3B-Instruct-AWQ-4bit \
+#     --local-dir /models/moonshotai/Kimi-Linear-48B-A3B-Instruct-AWQ-4bit
+# Size: ~35 GB on disk (4-bit). 8-bit variant is ~54 GB if quality drives
+# us up later; both fit 128 GB unified comfortably.
+services:
+  kimi-linear:
+    # Derived image: kyuz0:stable + gfx1151 AITER GEMM config fallbacks
+    # (Kimi-Linear's MLA layers hit FP8 BMM ops kyuz0 didn't validate
+    # with their tested models). See ./Dockerfile. Build is fast — just
+    # file copies inside the image.
+    build:
+      context: .
+      dockerfile: Dockerfile
+    image: kimi-linear-local:aiter-fixed
+    container_name: kimi-linear
+    restart: unless-stopped
+    devices:
+      - /dev/kfd:/dev/kfd
+      - /dev/dri:/dev/dri
+    cap_add:
+      - SYS_PTRACE
+    security_opt:
+      - seccomp=unconfined
+    # Numeric GIDs of host's video (44) and render (991) groups — names
+    # don't exist inside the container, but the GIDs need to match the
+    # host so /dev/kfd + /dev/dri are accessible.
+    group_add:
+      - "44"
+      - "991"
+    shm_size: 16g
+    ipc: host
+    environment:
+      # gfx1151 native: kyuz0 image is built with GFX=gfx1151, so unlike
+      # ollama.yml (which uses 11.0.0 to coerce gfx1100 kernels), here we
+      # want the GPU to report its real ISA.
+      - HSA_OVERRIDE_GFX_VERSION=11.5.1
+      # AITER attention path — kyuz0's image patches AITER for RDNA
+      # ds_swizzle fallbacks; the env flag opts vLLM into using it.
+      - VLLM_ROCM_USE_AITER=1
+      # MLA pre-processing via AITER triton_fp8_bmm tries to materialize
+      # a ~30 GB intermediate alongside resident weights. Bypass that op;
+      # other AITER paths stay on.
+      - VLLM_ROCM_USE_AITER_MLA=0
+      # Unified-memory recipe (BIOS UMA=0.5 GB + ttm.pages_limit cmdline
+      # + the env triple below). Lets PyTorch's HIP allocator treat the
+      # two rocminfo pools as one ~110 GB arena. Without the
+      # FINE_GRAIN_PCIE flag, XNACK alone is a trap (vLLM mis-computes
+      # KV budget vs. allocator ceiling).
+      - HSA_XNACK=1
+      - HSA_FORCE_FINE_GRAIN_PCIE=1
+      - PYTORCH_HIP_ALLOC_CONF=backend:native,expandable_segments:True,garbage_collection_threshold:0.9
+    volumes:
+      - /models:/models:ro
+    ports:
+      - "8000:8000"
+    # kyuz0 toolboxes drop into a shell by default; without an explicit
+    # entrypoint, `command:` would be exec'd as a program (the
+    # `exec "--model": executable file not found` failure).
+    entrypoint: ["vllm", "serve"]
+    command:
+      # Positional model path (vllm serve's documented form).
+      - /models/moonshotai/Kimi-Linear-48B-A3B-Instruct-AWQ-4bit
+      - --served-model-name
+      - kimi-linear
+      # Auto-detect would also work — config.json carries quant_method.
+      # Explicit flag makes the failure mode loud if the loader is wrong.
+      - --quantization
+      - compressed-tensors
+      # Conservative restart point after BIOS+cmdline+env unblock.
+      # P3 ramps further: 32K → 128K → 256K → 512K → 1M.
+      - --max-model-len
+      - "32768"
+      - --gpu-memory-utilization
+      - "0.92"
+      - --max-num-seqs
+      - "4"
+      # gfx1151 V1-engine HIP-graph-capture is broken (vllm-project/vllm#32180).
+      # Eager costs throughput, not correctness; do not remove without
+      # verifying upstream fix landed.
+      - --enforce-eager
+      # Kimi-Linear ships custom modeling_kimi.py — required.
+      - --trust-remote-code
+      # Tool-calling support — opencode sends tool_choice:"auto" whenever
+      # MCP servers are connected. vLLM is strict and rejects unless both
+      # flags are present. Moonshot's Kimi family uses the kimi_k2 parser
+      # for tool-call formatting; Kimi-Linear inherits the same template.
+      - --enable-auto-tool-choice
+      - --tool-call-parser
+      - kimi_k2
+      - --host
+      - 0.0.0.0
+      - --port
+      - "8000"
diff --git a/pyinfra/framework/compose/kimi-linear/Dockerfile b/pyinfra/framework/compose/kimi-linear/Dockerfile
new file mode 100644
index 0000000..5dbbc54
--- /dev/null
+++ b/pyinfra/framework/compose/kimi-linear/Dockerfile
@@ -0,0 +1,35 @@
+# Derived image: kyuz0:stable plus gfx1151 AITER GEMM config fallbacks.
+#
+# kyuz0's image is built for gfx1151 but doesn't ship every per-op AITER
+# autotuning config. Kimi-Linear's MLA layers hit FP8 BMM ops
+# (BATCHED_GEMM-A8W8-A_PER_TOKEN_GROUP_PREQUANT_W_PER_BATCHED_TENSOR_QUANT
+# and friends) that have no gfx1151 config in the bundle. We synthesize
+# them by copying from the closest-arch config that does exist (RDNA3
+# gfx1100 is closest to RDNA3.5 gfx1151). Tile sizes won't be optimal
+# but the kernels will compile and run.
+#
+# Idempotent — only fills slots that don't already have a gfx1151 config.
+#
+# If we ever need a vLLM-pinned base (e.g. upstream regresses on
+# Kimi-Linear), build it via ./build.sh first and change FROM here to
+# kimi-linear-local:v0.11.2.
+
+FROM kyuz0/vllm-therock-gfx1151:stable
+
+RUN set -e; \
+    DIR=/opt/venv/lib64/python3.12/site-packages/aiter/ops/triton/configs/gemm; \
+    cd "$DIR"; \
+    filled=0; \
+    for SRC_PREFIX in gfx1100 gfx1101 gfx942 gfx90a; do \
+        for SRC in ${SRC_PREFIX}-*.json; do \
+            [ -f "$SRC" ] || continue; \
+            OP=${SRC#${SRC_PREFIX}-}; \
+            DST=gfx1151-${OP}; \
+            if [ ! -f "$DST" ]; then \
+                cp "$SRC" "$DST"; \
+                echo "[fix-aiter] $SRC -> $DST"; \
+                filled=$((filled+1)); \
+            fi; \
+        done; \
+    done; \
+    echo "[fix-aiter] filled $filled gfx1151 config slots"
diff --git a/pyinfra/framework/compose/kimi-linear/README.md b/pyinfra/framework/compose/kimi-linear/README.md
new file mode 100644
index 0000000..1d9b68d
--- /dev/null
+++ b/pyinfra/framework/compose/kimi-linear/README.md
@@ -0,0 +1,124 @@
+# kimi-linear
+
+Kimi-Linear-48B-A3B-Instruct on vLLM, ROCm/TheRock 7.x, gfx1151. Sits
+beside Ollama (port 11434, Qwen3-Coder) on port 8000. OpenAI-compatible.
+
+This is the **P0 verification stage** — no public Strix Halo numbers
+exist for this model as of 2026-05. Three things are unverified until a
+first generation succeeds: KDA Triton kernel on gfx1151,
+compressed-tensors loader on ROCm, and AITER + Kimi MoE topology.
+Smoke-test below confirms all three at once.
+
+## Prereqs
+
+- Pyinfra deploy has run (`./run.sh` from `pyinfra/framework/`) — gives
+  you `/srv/docker/kimi-linear/`, GPU group membership, `/models/`
+  layout, and `huggingface-cli` on the box.
+- Hugging Face CLI authenticated (`huggingface-cli login`) if the
+  weights repo gates downloads. cyankiwi's repo is currently public.
+
+## Step 1 — Download weights
+
+```sh
+huggingface-cli download \
+    cyankiwi/Kimi-Linear-48B-A3B-Instruct-AWQ-4bit \
+    --local-dir /models/moonshotai/Kimi-Linear-48B-A3B-Instruct-AWQ-4bit
+```
+
+~35 GB. The repo is named `AWQ-4bit` but the actual format is
+`compressed-tensors` int4 group-quantized — see `config.json`.
+
+## Step 2 — Try the upstream image first
+
+```sh
+cd /srv/docker/kimi-linear
+docker compose pull       # ~8.5 GB
+docker compose up -d
+docker compose logs -f
+```
+
+Watch for one of three things:
+
+- **Loads cleanly, model serves on :8000** → P0 passes. Run `./smoke.sh`.
+- **`MLAModules.__init__() missing 'indexer_rotary_emb'`** → upstream
+  image is on vLLM 0.12.x; need the v0.11.2 source build. Skip to
+  Step 3.
+- **KDA / Triton / fla-core compile error** → kernel doesn't work on
+  gfx1151 yet. Fall back path: llama.cpp ROCm + bartowski Q4_K_M GGUF
+  in `compose/llama.yml`. Document the error in
+  `localgenai/kimi-linear/NOTES.md` and stop.
+
+## Step 3 — Source build (if needed)
+
+```sh
+cd /srv/docker/kimi-linear
+tmux new -s kimi-build
+./build.sh        # multi-hour. Detach with C-b d; reattach with `tmux a -t kimi-build`
+```
+
+Builds `kimi-linear-local:v0.11.2` from kyuz0 SHA `e2288d6` with
+`VLLM_COMMIT=v0.11.2`. Then edit `docker-compose.yml`:
+
+```yaml
+    image: kimi-linear-local:v0.11.2
+```
+
+…and `docker compose up -d` again.
+
+## Step 4 — Smoke test
+
+```sh
+./smoke.sh
+```
+
+Expects: `/v1/models` returns `kimi-linear`; a four-token generation
+returns "ok". If both pass, **P0 is done**. Update task #6 and proceed
+to P1.
+
+## Operations
+
+```sh
+docker compose logs -f kimi-linear         # tail
+docker compose restart kimi-linear         # reload
+docker compose down                        # stop
+docker compose exec kimi-linear bash       # shell in
+amdgpu_top                                 # on host: GPU power, mem, util
+```
+
+## Pin manifest
+
+| Component                   | Pin                                |
+| --------------------------- | ---------------------------------- |
+| kyuz0 toolbox               | commit `e2288d6` (2026-04-22)      |
+| vLLM                        | tag `v0.11.2` (Moonshot recipe)    |
+| Image (default)             | `kyuz0/vllm-therock-gfx1151:stable`|
+| Image (pinned, if built)    | `kimi-linear-local:v0.11.2`        |
+| Weights                     | `cyankiwi/Kimi-Linear-48B-A3B-Instruct-AWQ-4bit` (compressed-tensors int4) |
+| ROCm                        | TheRock nightlies via kyuz0 base   |
+| Python                      | 3.12 (hardcoded in kyuz0 Dockerfile) |
+
+Bump policy: don't move vLLM to 0.12.x; don't move kyuz0 commit without
+re-running smoke; bump weights only when an 8-bit A/B is in scope (P3).
+
+## Port collision warning
+
+`compose/vllm.yml` is a placeholder stub that also binds `:8000`. Only
+one of `kimi-linear` and `vllm` can run at a time. Don't `docker compose
+up` both. Long term either delete the stub or move it to a different
+port; not in scope here.
+
+## Known issues / mitigations
+
+- **HIP graph capture broken on gfx1151** (vllm-project/vllm#32180) —
+  `--enforce-eager` mitigates at a throughput cost. Re-test without it
+  once the upstream fix lands.
+- **vLLM 0.12.0 crash on Kimi-Linear** —
+  `MLAModules.__init__() missing 'indexer_rotary_emb'`. Hard pin to
+  0.11.2.
+- **No published gfx1151 numbers** — we are first. Findings stay
+  private (no upstream filings) per project policy.
+
+## Status
+
+P0 in progress. Update `oc-tree`-style `NEXT_STEPS.md` if you set this
+aside mid-verification.
diff --git a/pyinfra/framework/compose/kimi-linear/build.sh b/pyinfra/framework/compose/kimi-linear/build.sh
new file mode 100755
index 0000000..8ab568f
--- /dev/null
+++ b/pyinfra/framework/compose/kimi-linear/build.sh
@@ -0,0 +1,51 @@
+#!/usr/bin/env bash
+# Source-build a vLLM 0.11.2-pinned image from kyuz0's gfx1151 toolbox.
+# Use only when the upstream `kyuz0/vllm-therock-gfx1151:stable` tag
+# crashes on Kimi-Linear with a v0.12-class error
+# (`MLAModules.__init__() missing 'indexer_rotary_emb'`).
+#
+# Compiles flash-attention, AITER+CK, vLLM, and bitsandbytes from source
+# with MAX_JOBS=4 (fixed upstream). Expect a multi-hour wall-clock on
+# Strix Halo. Idempotent — skips if the target tag already exists.
+#
+# Pin policy. KYUZ0_COMMIT is the upstream SHA whose CI build produced
+# the published `:stable` on 2026-04-22; bump only after re-validating
+# Kimi-Linear works with the new toolbox revision. VLLM_COMMIT is the
+# Moonshot recipe pin for Kimi-Linear; do not bump to v0.12.x.
+
+set -euo pipefail
+
+KYUZ0_REPO="https://github.com/kyuz0/amd-strix-halo-vllm-toolboxes.git"
+KYUZ0_COMMIT="e2288d6"
+VLLM_COMMIT="v0.11.2"
+IMAGE_TAG="kimi-linear-local:${VLLM_COMMIT}"
+WORKDIR="/tmp/kimi-linear-build"
+
+if docker image inspect "$IMAGE_TAG" >/dev/null 2>&1; then
+    echo "[build] $IMAGE_TAG already exists. To rebuild: docker rmi $IMAGE_TAG"
+    exit 0
+fi
+
+if [ ! -d "$WORKDIR/.git" ]; then
+    rm -rf "$WORKDIR"
+    git clone "$KYUZ0_REPO" "$WORKDIR"
+fi
+
+cd "$WORKDIR"
+git fetch origin
+git checkout --quiet "$KYUZ0_COMMIT"
+
+echo "[build] kyuz0 toolbox @ $(git rev-parse --short HEAD)"
+echo "[build] vLLM pin: $VLLM_COMMIT"
+echo "[build] image tag: $IMAGE_TAG"
+echo "[build] expected wall-clock: hours. Use tmux."
+echo
+
+docker build \
+    --build-arg "VLLM_COMMIT=${VLLM_COMMIT}" \
+    -t "$IMAGE_TAG" \
+    -f Dockerfile \
+    .
+
+echo
+echo "[build] done. Switch image: in docker-compose.yml to $IMAGE_TAG."
diff --git a/pyinfra/framework/compose/kimi-linear/patch-tokenizer.sh b/pyinfra/framework/compose/kimi-linear/patch-tokenizer.sh
new file mode 100755
index 0000000..5d050cd
--- /dev/null
+++ b/pyinfra/framework/compose/kimi-linear/patch-tokenizer.sh
@@ -0,0 +1,66 @@
+#!/usr/bin/env bash
+# Patch cyankiwi's tokenization_kimi.py to inline `bytes_to_unicode`.
+#
+# Why: tokenization_kimi.py does
+#   from transformers.models.gpt2.tokenization_gpt2 import bytes_to_unicode
+# which fails on recent transformers (the helper was removed/relocated).
+# The function itself is ~10 lines of public BPE byte-mapping math; we
+# inline it. Idempotent — re-running is a no-op once patched.
+#
+# Run on the box, after weights are downloaded, before first
+# `docker compose up`. Recreates the container at the end so
+# `trust_remote_code` re-copies the patched file into its module cache.
+
+set -euo pipefail
+
+MODEL_DIR="${MODEL_DIR:-/models/moonshotai/Kimi-Linear-48B-A3B-Instruct-AWQ-4bit}"
+F="$MODEL_DIR/tokenization_kimi.py"
+
+if [ ! -f "$F" ]; then
+    echo "[patch-tokenizer] not found: $F" >&2
+    echo "[patch-tokenizer] download weights first, or set MODEL_DIR=" >&2
+    exit 1
+fi
+
+if grep -q '__patched_bytes_to_unicode__' "$F"; then
+    echo "[patch-tokenizer] $F already patched. Nothing to do."
+    exit 0
+fi
+
+if ! grep -q 'from transformers.models.gpt2.tokenization_gpt2 import bytes_to_unicode' "$F"; then
+    echo "[patch-tokenizer] expected import line not present in $F." >&2
+    echo "[patch-tokenizer] upstream may have changed — inspect manually:" >&2
+    echo "  grep -n bytes_to_unicode '$F'" >&2
+    exit 2
+fi
+
+python3 - "$F" <<'PYEOF'
+import pathlib, sys
+p = pathlib.Path(sys.argv[1])
+s = p.read_text()
+old = "from transformers.models.gpt2.tokenization_gpt2 import bytes_to_unicode"
+new = (
+    "# __patched_bytes_to_unicode__ — inlined; helper removed from recent transformers\n"
+    "def bytes_to_unicode():\n"
+    "    bs = (list(range(ord(\"!\"), ord(\"~\") + 1))\n"
+    "          + list(range(ord(\"¡\"), ord(\"¬\") + 1))\n"
+    "          + list(range(ord(\"®\"), ord(\"ÿ\") + 1)))\n"
+    "    cs = bs[:]\n"
+    "    n = 0\n"
+    "    for b in range(2**8):\n"
+    "        if b not in bs:\n"
+    "            bs.append(b)\n"
+    "            cs.append(2**8 + n)\n"
+    "            n += 1\n"
+    "    cs = [chr(n) for n in cs]\n"
+    "    return dict(zip(bs, cs))"
+)
+p.write_text(s.replace(old, new))
+print("[patch-tokenizer] patched", p)
+PYEOF
+
+echo "[patch-tokenizer] recreating container to refresh trust_remote_code module cache"
+cd "$(dirname "$0")"
+docker compose down
+docker compose up -d
+echo "[patch-tokenizer] done. Tail logs with: docker compose logs -f"
diff --git a/pyinfra/framework/compose/kimi-linear/smoke.sh b/pyinfra/framework/compose/kimi-linear/smoke.sh
new file mode 100755
index 0000000..67ad4a6
--- /dev/null
+++ b/pyinfra/framework/compose/kimi-linear/smoke.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+# Smoke-test the running kimi-linear vLLM container. Exits non-zero if
+# anything's wrong, so it doubles as a P1 health check.
+set -euo pipefail
+
+HOST="${KIMI_HOST:-127.0.0.1:8000}"
+MODEL="${KIMI_MODEL:-kimi-linear}"
+
+echo "[smoke] GET /v1/models on $HOST"
+curl -fsS "http://$HOST/v1/models" | python3 -m json.tool
+
+echo
+echo "[smoke] POST /v1/chat/completions ($MODEL) — tiny generation"
+curl -fsS "http://$HOST/v1/chat/completions" \
+    -H "Content-Type: application/json" \
+    -d "{
+        \"model\": \"$MODEL\",
+        \"messages\": [{\"role\": \"user\", \"content\": \"Reply with exactly: ok\"}],
+        \"max_tokens\": 16,
+        \"temperature\": 0.0
+    }" | python3 -m json.tool
+
+echo
+echo "[smoke] passed"
diff --git a/pyinfra/framework/compose/openwebui.yml b/pyinfra/framework/compose/openwebui.yml
index 566b53b..3dc248e 100644
--- a/pyinfra/framework/compose/openwebui.yml
+++ b/pyinfra/framework/compose/openwebui.yml
@@ -17,6 +17,11 @@ services:
       - "host.docker.internal:host-gateway"
     environment:
       - OLLAMA_BASE_URL=http://host.docker.internal:11434
+      # vLLM (Kimi-Linear) exposed as an OpenAI-compatible backend. The
+      # model isn't strongly tool-trained — opencode's agentic system
+      # prompt confuses it. OpenWebUI's plain chat UI is the right home.
+      - OPENAI_API_BASE_URLS=http://host.docker.internal:8000/v1
+      - OPENAI_API_KEYS=dummy
       # Built-in web search via the project's SearXNG instance.
       - ENABLE_RAG_WEB_SEARCH=true
       - RAG_WEB_SEARCH_ENGINE=searxng
diff --git a/pyinfra/framework/deploy.py b/pyinfra/framework/deploy.py
index 7db71bd..16f0c0d 100644
--- a/pyinfra/framework/deploy.py
+++ b/pyinfra/framework/deploy.py
@@ -343,18 +343,23 @@ server.user(
     _sudo=True,
 )
 
-# Kernel cmdline tuning per Gygeek/Framework-strix-halo-llm-setup:
-#   - amd_iommu=off       — ~6 % memory-read improvement on Strix Halo
-#   - amdgpu.gttsize=117760 — ~115 GB GTT ceiling so the GPU can borrow
-#                            most of system RAM dynamically. Acts as a
-#                            ceiling, not an allocation. See ../../StrixHaloMemory.md
-#                            for the UMA-vs-GTT trade-off discussion.
+# Kernel cmdline tuning. The Strix Halo unified-memory recipe (kyuz0
+# vllm-toolboxes "Kernel Parameters and Unified Memory" + Framework's
+# "Linux + ROCm: January 2026 Stable Configurations" thread):
+#   - amd_iommu=off                — ~6 % memory-read improvement
+#   - amdgpu.gttsize=131072        — 128 GiB GTT ceiling (deprecated knob
+#                                    but still honored on kernel 6.16+)
+#   - ttm.pages_limit=33554432     — 128 GiB in 4 KiB pages; forward-
+#                                    compatible TTM page cap
+# Combined with BIOS UMA at 0.5 GB and HSA_FORCE_FINE_GRAIN_PCIE=1 in the
+# container, PyTorch's HIP allocator merges the two rocminfo pools into a
+# single ~110 GB arena. See ../../StrixHaloMemory.md for context.
 # Requires a reboot to take effect; pyinfra leaves that to you.
 files.line(
-    name="GRUB cmdline (amd_iommu, gttsize)",
+    name="GRUB cmdline (amd_iommu, gttsize, ttm)",
     path="/etc/default/grub",
     line=r"^GRUB_CMDLINE_LINUX_DEFAULT=.*",
-    replace='GRUB_CMDLINE_LINUX_DEFAULT="quiet splash amd_iommu=off amdgpu.gttsize=117760"',
+    replace='GRUB_CMDLINE_LINUX_DEFAULT="quiet splash amd_iommu=off amdgpu.gttsize=131072 ttm.pages_limit=33554432"',
     _sudo=True,
 )
 server.shell(
@@ -418,6 +423,7 @@ for svc in (
     "llama",
     "vllm",
     "ollama",
+    "kimi-linear",
     "openwebui",
     "beszel",
     "openlit",
@@ -559,6 +565,27 @@ for cfg in (
         _sudo=True,
     )
 
+# Kimi-Linear container assets (build script, smoke test, operator doc).
+# The compose file itself is copied by the for-loop above; the rest of
+# the build context lives under compose/kimi-linear/ on the source side
+# and at /srv/docker/kimi-linear/ on the box. Source is the source of
+# truth — pyinfra overwrites drift.
+for asset, mode in (
+    ("Dockerfile", "0664"),
+    ("build.sh", "0775"),
+    ("smoke.sh", "0775"),
+    ("patch-tokenizer.sh", "0775"),
+    ("README.md", "0664"),
+):
+    files.put(
+        name=f"kimi-linear: {asset}",
+        src=f"compose/kimi-linear/{asset}",
+        dest=f"{COMPOSE_DIR}/kimi-linear/{asset}",
+        group="docker",
+        mode=mode,
+        _sudo=True,
+    )
+
 # Voice stack — Wyoming-protocol Whisper (STT) and Piper (TTS). Models
 # are downloaded on first start; bind-mounting these dirs survives
 # container recreation.
diff --git a/pyinfra/framework/run.sh b/pyinfra/framework/run.sh
index 46111a5..63ecc80 100755
--- a/pyinfra/framework/run.sh
+++ b/pyinfra/framework/run.sh
@@ -4,4 +4,4 @@
 
 set -euo pipefail
 cd "$(dirname "$0")"
-exec pyinfra -v --ssh-password-prompt inventory.py deploy.py "$@"
+exec pyinfra -v --ssh-password-prompt inventory.py deploy.py "$@" 
diff --git a/qwen-large-codebase-roadmap.md b/qwen-large-codebase-roadmap.md
new file mode 100644
index 0000000..eec798f
--- /dev/null
+++ b/qwen-large-codebase-roadmap.md
@@ -0,0 +1,77 @@
+# Qwen Large Codebase Roadmap
+
+This document outlines recommendations for working with large codebases using opencode and your existing vector database setup.
+
+## Current Setup
+- Large codebase enriched by large-context LLM
+- Vector database for searching codebase
+- Opencode with existing MCP servers (Playwright, SearXNG, Serena, Basic-memory, Sequential-thinking, Task-master)
+
+## Recommended Tooling
+
+### 1. Continue.dev Integration
+- **Purpose**: Better codebase indexing and search with nomic-embed-text
+- **Benefits**: Inline completion support, enhanced code understanding capabilities
+- **Implementation**: Deploy continue.dev alongside existing setup
+
+### 2. Enhanced RAG Implementation
+- **Purpose**: Leverage vector DB for efficient codebase navigation
+- **Approach**: 
+  - Custom MCP server that queries your vector DB for relevant code sections
+  - Cited search variants for precise code reference retrieval
+- **Integration**: Combine with existing symbolic search from Serena MCP
+
+### 3. Structured Workflows
+- **Purpose**: Organize complex projects and search activities
+- **Tools**: 
+  - Task-master MCP for task management
+  - Workflow patterns that tie vector DB queries to specific tasks
+- **Benefits**: Better tracking of search results and implementation progress
+
+### 4. Memory Management
+- **Purpose**: Persistent documentation of codebase insights
+- **Approach**:
+  - Leverage basic-memory MCP for notes tied to vector DB queries
+  - Create patterns for documenting important code patterns, API decisions, and insights
+- **Integration**: Connect memory entries to specific vector search results
+
+### 5. Monitoring Integration
+- **Purpose**: Track performance of vector database queries
+- **Tools**:
+  - Phoenix tracing for performance monitoring
+  - Track tool usage patterns for optimizing search strategies
+- **Benefits**: Visibility into query efficiency and system performance
+
+### 6. Code Navigation Enhancement
+- **Purpose**: Combine vector search with symbolic navigation
+- **Approach**:
+  - Use Serena MCP for symbolic code navigation
+  - Augment with vector search results for context
+- **Integration**: Create hybrid search approaches that use both methods
+
+## Implementation Approach
+
+1. **Phase 1**: Install continue.dev for enhanced code understanding
+2. **Phase 2**: Set up vector DB query tools as custom MCP servers
+3. **Phase 3**: Create patterns for combining vector search with symbolic navigation
+4. **Phase 4**: Implement persistent memory patterns for documenting findings
+5. **Phase 5**: Establish monitoring and optimization practices
+
+## Key Integration Points
+
+### Vector DB + Opencode
+- Use vector database queries to find relevant code sections
+- Combine with symbolic search for complete context understanding
+- Enable citation-based referencing of code locations
+
+### Memory + Search
+- Document search results in basic-memory
+- Create connections between vector DB entries and memory notes
+- Maintain a searchable knowledge base of codebase insights
+
+### Monitoring + Performance
+- Track query performance through Phoenix
+- Optimize search strategies based on usage patterns
+- Monitor system efficiency as complexity scales
+
+This roadmap provides a gradual approach to enhancing your codebase management capabilities while leveraging your existing infrastructure.
\ No newline at end of file
diff --git a/vscode-continue-config.yml b/vscode-continue-config.yml
new file mode 100644
index 0000000..fbf24fb
--- /dev/null
+++ b/vscode-continue-config.yml
@@ -0,0 +1,49 @@
+name: framework
+version: 0.0.1
+schema: v1
+
+# Continue.dev config for the framework-backed local LLM stack. Drop at
+# ~/.continue/config.yaml (per-machine) or <repo>/.continue/config.yaml
+# (repo-scoped). One-time prep on the box: `ollama pull nomic-embed-text`.
+#
+# Two chat models, manually switched via Continue's model dropdown.
+# Qwen3-Coder is the daily driver; Kimi-Linear is the long-context play
+# and only works once P0 verification passes on the box.
+
+models:
+  - name: Qwen3-Coder 30B (Ollama)
+    provider: ollama
+    model: qwen3-coder:30b      # Ollama tag (the "A3B" is the MoE active count, descriptive only)
+    apiBase: http://framework:11434   # NO /v1 — Ollama provider speaks /api/* directly
+    roles: [chat, edit, apply]
+    defaultCompletionOptions:
+      contextLength: 65536    # matches OLLAMA_CONTEXT_LENGTH in ollama.yml
+
+  - name: Kimi-Linear 48B-A3B (vLLM)
+    provider: openai          # vLLM exposes OpenAI-compatible /v1
+    model: kimi-linear        # served-model-name in compose/kimi-linear.yml
+    apiBase: http://framework:8000/v1
+    apiKey: dummy             # vLLM doesn't enforce; Continue requires non-empty
+    roles: [chat, edit, apply]
+    defaultCompletionOptions:
+      contextLength: 32768    # P0 narrow start; bump as --max-model-len ramps
+
+  - name: nomic-embed-text
+    provider: ollama
+    model: nomic-embed-text
+    apiBase: http://framework:11434
+    roles: [embed]
+
+context:
+  - provider: code
+  - provider: diff
+  - provider: terminal
+  - provider: problems
+  - provider: folder
+  - provider: codebase        # uses the embed-role model above
+  - provider: file
+  - provider: open
+
+# Tab autocomplete intentionally omitted — Qwen3-Coder-30B is too slow
+# for inline FIM. Add a small FIM-trained model later (qwen2.5-coder:1.5b
+# or starcoder2:3b) and wire as roles: [autocomplete].