added models, model-swap, ...

This commit is contained in:
2026-06-26 08:13:33 -04:00
parent de1635872f
commit 224afbb3a6
18 changed files with 1659 additions and 243 deletions

View File

@@ -80,6 +80,8 @@ apt.packages(
"ca-certificates",
"unzip",
"software-properties-common", # for add-apt-repository (g++-14 PPA)
"jq", # JSON parsing in operator scripts (bench-engines)
"bc", # float math in bench-engines
],
_sudo=True,
)
@@ -433,6 +435,7 @@ for svc in (
"ollama",
"kimi-linear",
"qwen3-235b",
"qwable",
"litellm",
"comfyui",
"openwebui",
@@ -440,6 +443,8 @@ for svc in (
"openlit",
"phoenix",
"openhands",
"code-server",
"coder",
"homepage",
"whisper",
"piper",
@@ -561,6 +566,89 @@ files.directory(
_sudo=True,
)
# code-server persistent state. The linuxserver image's s6 init drops to
# PUID/PGID 1000 and treats /config as the container user's $HOME —
# extensions, settings, ~/.claude (Claude Code OAuth creds + session
# history), ~/.local/bin. Owned 1000:1000 to match (same pattern as
# kokoro: the container user isn't in the docker group, so 2775
# root:docker wouldn't help it).
files.directory(
name="code-server config dir",
path=f"{COMPOSE_DIR}/code-server/config",
user="1000",
group="1000",
mode="0755",
_sudo=True,
)
# Default workspace. Host UID 1000 == container PUID 1000, so files
# created either side stay owned by the SSH user.
files.directory(
name="code-server workspace dir",
path=f"{COMPOSE_DIR}/code-server/workspace",
user=SSH_USER,
group=SSH_USER,
mode="2775",
_sudo=True,
)
files.put(
name="code-server: README.md",
src="compose/code-server/README.md",
dest=f"{COMPOSE_DIR}/code-server/README.md",
group="docker",
mode="0664",
_sudo=True,
)
# Coder workspace manager (pilot — see compose/coder/README.md for the
# evaluation criteria vs the standalone code-server stack). Postgres
# chowns its own data dir at first start (entrypoint runs as root); we
# just create the mount point. Templates are repo-sourced Terraform
# mounted read-only into the server container, pushed with
# `docker compose exec coder coder templates push`.
files.directory(
name="Coder postgres data dir",
path=f"{COMPOSE_DIR}/coder/postgres",
group="docker",
mode="2775",
_sudo=True,
)
files.directory(
name="Coder templates/code-server dir",
path=f"{COMPOSE_DIR}/coder/templates/code-server",
group="docker",
mode="2775",
_sudo=True,
)
for asset, mode in (
("templates/code-server/main.tf", "0664"),
("README.md", "0664"),
):
files.put(
name=f"coder: {asset}",
src=f"compose/coder/{asset}",
dest=f"{COMPOSE_DIR}/coder/{asset}",
group="docker",
mode=mode,
_sudo=True,
)
# Sibling .env: DOCKER_GROUP_ID (host-specific GID of the docker socket,
# needed for the server container's group_add), the access URL, and a
# random one-time Postgres password. Generated on the box rather than
# placeholder-then-hand-fill because every value is derivable. Never
# overwritten once present.
server.shell(
name="Coder .env (generate once)",
commands=[
f"test -f {COMPOSE_DIR}/coder/.env || {{ "
f"printf 'DOCKER_GROUP_ID=%s\\nCODER_ACCESS_URL=http://framework:7080\\nPOSTGRES_PASSWORD=%s\\n' "
f'"$(stat -c %g /var/run/docker.sock)" "$(openssl rand -hex 16)" '
f"> {COMPOSE_DIR}/coder/.env && "
f"chown root:docker {COMPOSE_DIR}/coder/.env && "
f"chmod 640 {COMPOSE_DIR}/coder/.env; }}",
],
_sudo=True,
)
# Homepage config. The compose loop above only copies homepage.yml; the
# YAML config files live in compose/homepage/ on the source side and at
# /srv/docker/homepage/config/ on the box. Source-of-truth is the repo —
@@ -622,6 +710,22 @@ for asset, mode in (
_sudo=True,
)
# Qwable operator assets. Same image as llama (kyuz0 rocm-7.2.2); dense
# 27B Qwen3.6 fine-tuned on Fable-5 traces. Weights live at /models/qwen/
# via manual `hf download` per the README. swap-model `qwable` target.
for asset, mode in (
("smoke.sh", "0775"),
("README.md", "0664"),
):
files.put(
name=f"qwable: {asset}",
src=f"compose/qwable/{asset}",
dest=f"{COMPOSE_DIR}/qwable/{asset}",
group="docker",
mode=mode,
_sudo=True,
)
# LiteLLM router assets. config.yaml is the source-of-truth model
# routing table — pyinfra syncs it on every run; edits on the box get
# overwritten. The .env file holds LITELLM_MASTER_KEY + LITELLM_SALT_KEY
@@ -758,6 +862,37 @@ files.directory(
_sudo=True,
)
# --- Operator scripts -------------------------------------------------------
# swap-model — one-command swap between which inference container is
# GPU-resident. Encodes the coexistence table (235B doesn't fit alongside
# anything; ollama+kimi do) + per-service health probes. Lives in
# /usr/local/bin so the SSH user (in the docker group) can run it
# directly: `ssh framework swap-model 235b`. See scripts/swap-model for
# the modes and bin/swap-model for the Mac-side wrapper.
files.put(
name="swap-model script (box-side)",
src="scripts/swap-model",
dest="/usr/local/bin/swap-model",
user="root",
group="root",
mode="0755",
_sudo=True,
)
# bench-engines — one-shot decision tool for the GGUF-tier consolidation
# (Ollama vs kyuz0 llama.cpp decode t/s on gfx1151). See the framework
# README "Inference engine consolidation".
files.put(
name="bench-engines script (box-side)",
src="scripts/bench-engines",
dest="/usr/local/bin/bench-engines",
user="root",
group="root",
mode="0755",
_sudo=True,
)
# --- Cleanup of artifacts from the prior native-build deploy ----------------
# All idempotent — `present=False` is a no-op when the target is absent.