added models, model-swap, ...

2026-06-26 08:13:33 -04:00
parent de1635872f
commit 224afbb3a6
18 changed files with 1659 additions and 243 deletions
--- a/pyinfra/framework/deploy.py
+++ b/pyinfra/framework/deploy.py
@@ -80,6 +80,8 @@ apt.packages(
        "ca-certificates",
        "unzip",
        "software-properties-common",   # for add-apt-repository (g++-14 PPA)
+        "jq",                           # JSON parsing in operator scripts (bench-engines)
+        "bc",                           # float math in bench-engines
    ],
    _sudo=True,
 )
@@ -433,6 +435,7 @@ for svc in (
    "ollama",
    "kimi-linear",
    "qwen3-235b",
+    "qwable",
    "litellm",
    "comfyui",
    "openwebui",
@@ -440,6 +443,8 @@ for svc in (
    "openlit",
    "phoenix",
    "openhands",
+    "code-server",
+    "coder",
    "homepage",
    "whisper",
    "piper",
@@ -561,6 +566,89 @@ files.directory(
    _sudo=True,
 )

+# code-server persistent state. The linuxserver image's s6 init drops to
+# PUID/PGID 1000 and treats /config as the container user's $HOME —
+# extensions, settings, ~/.claude (Claude Code OAuth creds + session
+# history), ~/.local/bin. Owned 1000:1000 to match (same pattern as
+# kokoro: the container user isn't in the docker group, so 2775
+# root:docker wouldn't help it).
+files.directory(
+    name="code-server config dir",
+    path=f"{COMPOSE_DIR}/code-server/config",
+    user="1000",
+    group="1000",
+    mode="0755",
+    _sudo=True,
+)
+# Default workspace. Host UID 1000 == container PUID 1000, so files
+# created either side stay owned by the SSH user.
+files.directory(
+    name="code-server workspace dir",
+    path=f"{COMPOSE_DIR}/code-server/workspace",
+    user=SSH_USER,
+    group=SSH_USER,
+    mode="2775",
+    _sudo=True,
+)
+files.put(
+    name="code-server: README.md",
+    src="compose/code-server/README.md",
+    dest=f"{COMPOSE_DIR}/code-server/README.md",
+    group="docker",
+    mode="0664",
+    _sudo=True,
+)
+
+# Coder workspace manager (pilot — see compose/coder/README.md for the
+# evaluation criteria vs the standalone code-server stack). Postgres
+# chowns its own data dir at first start (entrypoint runs as root); we
+# just create the mount point. Templates are repo-sourced Terraform
+# mounted read-only into the server container, pushed with
+# `docker compose exec coder coder templates push`.
+files.directory(
+    name="Coder postgres data dir",
+    path=f"{COMPOSE_DIR}/coder/postgres",
+    group="docker",
+    mode="2775",
+    _sudo=True,
+)
+files.directory(
+    name="Coder templates/code-server dir",
+    path=f"{COMPOSE_DIR}/coder/templates/code-server",
+    group="docker",
+    mode="2775",
+    _sudo=True,
+)
+for asset, mode in (
+    ("templates/code-server/main.tf", "0664"),
+    ("README.md", "0664"),
+):
+    files.put(
+        name=f"coder: {asset}",
+        src=f"compose/coder/{asset}",
+        dest=f"{COMPOSE_DIR}/coder/{asset}",
+        group="docker",
+        mode=mode,
+        _sudo=True,
+    )
+# Sibling .env: DOCKER_GROUP_ID (host-specific GID of the docker socket,
+# needed for the server container's group_add), the access URL, and a
+# random one-time Postgres password. Generated on the box rather than
+# placeholder-then-hand-fill because every value is derivable. Never
+# overwritten once present.
+server.shell(
+    name="Coder .env (generate once)",
+    commands=[
+        f"test -f {COMPOSE_DIR}/coder/.env || {{ "
+        f"printf 'DOCKER_GROUP_ID=%s\\nCODER_ACCESS_URL=http://framework:7080\\nPOSTGRES_PASSWORD=%s\\n' "
+        f'"$(stat -c %g /var/run/docker.sock)" "$(openssl rand -hex 16)" '
+        f"> {COMPOSE_DIR}/coder/.env && "
+        f"chown root:docker {COMPOSE_DIR}/coder/.env && "
+        f"chmod 640 {COMPOSE_DIR}/coder/.env; }}",
+    ],
+    _sudo=True,
+)
+
 # Homepage config. The compose loop above only copies homepage.yml; the
 # YAML config files live in compose/homepage/ on the source side and at
 # /srv/docker/homepage/config/ on the box. Source-of-truth is the repo —
@@ -622,6 +710,22 @@ for asset, mode in (
        _sudo=True,
    )

+# Qwable operator assets. Same image as llama (kyuz0 rocm-7.2.2); dense
+# 27B Qwen3.6 fine-tuned on Fable-5 traces. Weights live at /models/qwen/
+# via manual `hf download` per the README. swap-model `qwable` target.
+for asset, mode in (
+    ("smoke.sh", "0775"),
+    ("README.md", "0664"),
+):
+    files.put(
+        name=f"qwable: {asset}",
+        src=f"compose/qwable/{asset}",
+        dest=f"{COMPOSE_DIR}/qwable/{asset}",
+        group="docker",
+        mode=mode,
+        _sudo=True,
+    )
+
 # LiteLLM router assets. config.yaml is the source-of-truth model
 # routing table — pyinfra syncs it on every run; edits on the box get
 # overwritten. The .env file holds LITELLM_MASTER_KEY + LITELLM_SALT_KEY
@@ -758,6 +862,37 @@ files.directory(
    _sudo=True,
 )

+# --- Operator scripts -------------------------------------------------------
+
+# swap-model — one-command swap between which inference container is
+# GPU-resident. Encodes the coexistence table (235B doesn't fit alongside
+# anything; ollama+kimi do) + per-service health probes. Lives in
+# /usr/local/bin so the SSH user (in the docker group) can run it
+# directly: `ssh framework swap-model 235b`. See scripts/swap-model for
+# the modes and bin/swap-model for the Mac-side wrapper.
+files.put(
+    name="swap-model script (box-side)",
+    src="scripts/swap-model",
+    dest="/usr/local/bin/swap-model",
+    user="root",
+    group="root",
+    mode="0755",
+    _sudo=True,
+)
+
+# bench-engines — one-shot decision tool for the GGUF-tier consolidation
+# (Ollama vs kyuz0 llama.cpp decode t/s on gfx1151). See the framework
+# README "Inference engine consolidation".
+files.put(
+    name="bench-engines script (box-side)",
+    src="scripts/bench-engines",
+    dest="/usr/local/bin/bench-engines",
+    user="root",
+    group="root",
+    mode="0755",
+    _sudo=True,
+)
+
 # --- Cleanup of artifacts from the prior native-build deploy ----------------
 # All idempotent — `present=False` is a no-op when the target is absent.