added models, model-swap, ...
This commit is contained in:
@@ -80,6 +80,8 @@ apt.packages(
|
||||
"ca-certificates",
|
||||
"unzip",
|
||||
"software-properties-common", # for add-apt-repository (g++-14 PPA)
|
||||
"jq", # JSON parsing in operator scripts (bench-engines)
|
||||
"bc", # float math in bench-engines
|
||||
],
|
||||
_sudo=True,
|
||||
)
|
||||
@@ -433,6 +435,7 @@ for svc in (
|
||||
"ollama",
|
||||
"kimi-linear",
|
||||
"qwen3-235b",
|
||||
"qwable",
|
||||
"litellm",
|
||||
"comfyui",
|
||||
"openwebui",
|
||||
@@ -440,6 +443,8 @@ for svc in (
|
||||
"openlit",
|
||||
"phoenix",
|
||||
"openhands",
|
||||
"code-server",
|
||||
"coder",
|
||||
"homepage",
|
||||
"whisper",
|
||||
"piper",
|
||||
@@ -561,6 +566,89 @@ files.directory(
|
||||
_sudo=True,
|
||||
)
|
||||
|
||||
# code-server persistent state. The linuxserver image's s6 init drops to
|
||||
# PUID/PGID 1000 and treats /config as the container user's $HOME —
|
||||
# extensions, settings, ~/.claude (Claude Code OAuth creds + session
|
||||
# history), ~/.local/bin. Owned 1000:1000 to match (same pattern as
|
||||
# kokoro: the container user isn't in the docker group, so 2775
|
||||
# root:docker wouldn't help it).
|
||||
files.directory(
|
||||
name="code-server config dir",
|
||||
path=f"{COMPOSE_DIR}/code-server/config",
|
||||
user="1000",
|
||||
group="1000",
|
||||
mode="0755",
|
||||
_sudo=True,
|
||||
)
|
||||
# Default workspace. Host UID 1000 == container PUID 1000, so files
|
||||
# created either side stay owned by the SSH user.
|
||||
files.directory(
|
||||
name="code-server workspace dir",
|
||||
path=f"{COMPOSE_DIR}/code-server/workspace",
|
||||
user=SSH_USER,
|
||||
group=SSH_USER,
|
||||
mode="2775",
|
||||
_sudo=True,
|
||||
)
|
||||
files.put(
|
||||
name="code-server: README.md",
|
||||
src="compose/code-server/README.md",
|
||||
dest=f"{COMPOSE_DIR}/code-server/README.md",
|
||||
group="docker",
|
||||
mode="0664",
|
||||
_sudo=True,
|
||||
)
|
||||
|
||||
# Coder workspace manager (pilot — see compose/coder/README.md for the
|
||||
# evaluation criteria vs the standalone code-server stack). Postgres
|
||||
# chowns its own data dir at first start (entrypoint runs as root); we
|
||||
# just create the mount point. Templates are repo-sourced Terraform
|
||||
# mounted read-only into the server container, pushed with
|
||||
# `docker compose exec coder coder templates push`.
|
||||
files.directory(
|
||||
name="Coder postgres data dir",
|
||||
path=f"{COMPOSE_DIR}/coder/postgres",
|
||||
group="docker",
|
||||
mode="2775",
|
||||
_sudo=True,
|
||||
)
|
||||
files.directory(
|
||||
name="Coder templates/code-server dir",
|
||||
path=f"{COMPOSE_DIR}/coder/templates/code-server",
|
||||
group="docker",
|
||||
mode="2775",
|
||||
_sudo=True,
|
||||
)
|
||||
for asset, mode in (
|
||||
("templates/code-server/main.tf", "0664"),
|
||||
("README.md", "0664"),
|
||||
):
|
||||
files.put(
|
||||
name=f"coder: {asset}",
|
||||
src=f"compose/coder/{asset}",
|
||||
dest=f"{COMPOSE_DIR}/coder/{asset}",
|
||||
group="docker",
|
||||
mode=mode,
|
||||
_sudo=True,
|
||||
)
|
||||
# Sibling .env: DOCKER_GROUP_ID (host-specific GID of the docker socket,
|
||||
# needed for the server container's group_add), the access URL, and a
|
||||
# random one-time Postgres password. Generated on the box rather than
|
||||
# placeholder-then-hand-fill because every value is derivable. Never
|
||||
# overwritten once present.
|
||||
server.shell(
|
||||
name="Coder .env (generate once)",
|
||||
commands=[
|
||||
f"test -f {COMPOSE_DIR}/coder/.env || {{ "
|
||||
f"printf 'DOCKER_GROUP_ID=%s\\nCODER_ACCESS_URL=http://framework:7080\\nPOSTGRES_PASSWORD=%s\\n' "
|
||||
f'"$(stat -c %g /var/run/docker.sock)" "$(openssl rand -hex 16)" '
|
||||
f"> {COMPOSE_DIR}/coder/.env && "
|
||||
f"chown root:docker {COMPOSE_DIR}/coder/.env && "
|
||||
f"chmod 640 {COMPOSE_DIR}/coder/.env; }}",
|
||||
],
|
||||
_sudo=True,
|
||||
)
|
||||
|
||||
# Homepage config. The compose loop above only copies homepage.yml; the
|
||||
# YAML config files live in compose/homepage/ on the source side and at
|
||||
# /srv/docker/homepage/config/ on the box. Source-of-truth is the repo —
|
||||
@@ -622,6 +710,22 @@ for asset, mode in (
|
||||
_sudo=True,
|
||||
)
|
||||
|
||||
# Qwable operator assets. Same image as llama (kyuz0 rocm-7.2.2); dense
|
||||
# 27B Qwen3.6 fine-tuned on Fable-5 traces. Weights live at /models/qwen/
|
||||
# via manual `hf download` per the README. swap-model `qwable` target.
|
||||
for asset, mode in (
|
||||
("smoke.sh", "0775"),
|
||||
("README.md", "0664"),
|
||||
):
|
||||
files.put(
|
||||
name=f"qwable: {asset}",
|
||||
src=f"compose/qwable/{asset}",
|
||||
dest=f"{COMPOSE_DIR}/qwable/{asset}",
|
||||
group="docker",
|
||||
mode=mode,
|
||||
_sudo=True,
|
||||
)
|
||||
|
||||
# LiteLLM router assets. config.yaml is the source-of-truth model
|
||||
# routing table — pyinfra syncs it on every run; edits on the box get
|
||||
# overwritten. The .env file holds LITELLM_MASTER_KEY + LITELLM_SALT_KEY
|
||||
@@ -758,6 +862,37 @@ files.directory(
|
||||
_sudo=True,
|
||||
)
|
||||
|
||||
# --- Operator scripts -------------------------------------------------------
|
||||
|
||||
# swap-model — one-command swap between which inference container is
|
||||
# GPU-resident. Encodes the coexistence table (235B doesn't fit alongside
|
||||
# anything; ollama+kimi do) + per-service health probes. Lives in
|
||||
# /usr/local/bin so the SSH user (in the docker group) can run it
|
||||
# directly: `ssh framework swap-model 235b`. See scripts/swap-model for
|
||||
# the modes and bin/swap-model for the Mac-side wrapper.
|
||||
files.put(
|
||||
name="swap-model script (box-side)",
|
||||
src="scripts/swap-model",
|
||||
dest="/usr/local/bin/swap-model",
|
||||
user="root",
|
||||
group="root",
|
||||
mode="0755",
|
||||
_sudo=True,
|
||||
)
|
||||
|
||||
# bench-engines — one-shot decision tool for the GGUF-tier consolidation
|
||||
# (Ollama vs kyuz0 llama.cpp decode t/s on gfx1151). See the framework
|
||||
# README "Inference engine consolidation".
|
||||
files.put(
|
||||
name="bench-engines script (box-side)",
|
||||
src="scripts/bench-engines",
|
||||
dest="/usr/local/bin/bench-engines",
|
||||
user="root",
|
||||
group="root",
|
||||
mode="0755",
|
||||
_sudo=True,
|
||||
)
|
||||
|
||||
# --- Cleanup of artifacts from the prior native-build deploy ----------------
|
||||
# All idempotent — `present=False` is a no-op when the target is absent.
|
||||
|
||||
|
||||
Reference in New Issue
Block a user