Files
localgenai/framework/deploy.py
2026-05-07 07:37:40 -04:00

319 lines
8.7 KiB
Python

"""
pyinfra deploy for Framework Desktop (Ryzen AI Max+ 395 / Strix Halo).
Containerized layout: inference engines (llama.cpp, vLLM, Ollama) run as
docker compose services, each shipping its own ROCm/Vulkan stack. The
host stays minimal — kernel + driver + diagnostics + Docker.
Run with:
./run.sh # equivalent to pyinfra inventory.py deploy.py
Idempotent — re-running only does work that's actually needed. Includes
one-shot cleanup steps for artifacts left over from the earlier native
build (llama.cpp git checkout, symlinks, systemd unit, full ROCm).
"""
from io import StringIO
from pyinfra.operations import apt, files, server, systemd
from pyinfra import host
# --- Tunables ----------------------------------------------------------------
# Latest stable as of 2026-04-30. Verify at https://repo.radeon.com/amdgpu-install/.
ROCM_VERSION = "7.2.3"
# The .deb filename has an opaque build suffix — find the exact name at
# the URL above and paste it here.
AMDGPU_INSTALL_DEB = "amdgpu-install_7.2.3.70203-1_all.deb"
SSH_USER = host.data.get("ssh_user", "noise")
MODELS_DIR = "/models"
COMPOSE_DIR = f"/home/{SSH_USER}/docker"
# --- Phase 1 — Base OS basics ------------------------------------------------
apt.update(name="apt update", _sudo=True)
apt.packages(
name="HWE kernel (>=6.11 for ROCm 7)",
packages=["linux-generic-hwe-24.04"],
_sudo=True,
)
# User basics + monitoring tools.
apt.packages(
name="Base CLI tools",
packages=[
"tmux",
"vim",
"htop",
"btop",
"nvtop",
"radeontop",
"git",
"curl",
"ca-certificates",
"unzip",
],
_sudo=True,
)
# Vulkan diagnostics on the host (containers ship their own runtime).
apt.packages(
name="Vulkan host diagnostics",
packages=["mesa-vulkan-drivers", "vulkan-tools"],
_sudo=True,
)
# uv (Python package/tool manager).
server.shell(
name="Install / upgrade uv",
commands=[
"curl -LsSf https://astral.sh/uv/install.sh | "
"env UV_INSTALL_DIR=/usr/local/bin UV_UNMANAGED_INSTALL=1 sh",
],
_sudo=True,
)
# gpakosz/.tmux config (Oh My Tmux!).
TMUX_CONF_DIR = f"/home/{SSH_USER}/.tmux"
server.shell(
name="Clone gpakosz/.tmux",
commands=[
f"test -d {TMUX_CONF_DIR}/.git || "
f"git clone --depth 1 https://github.com/gpakosz/.tmux.git {TMUX_CONF_DIR}",
],
_sudo=True,
_sudo_user=SSH_USER,
)
files.link(
name="Symlink ~/.tmux.conf -> ~/.tmux/.tmux.conf",
path=f"/home/{SSH_USER}/.tmux.conf",
target=f"{TMUX_CONF_DIR}/.tmux.conf",
user=SSH_USER,
group=SSH_USER,
_sudo=True,
)
# Seed the user override file only if absent — preserves customizations.
server.shell(
name="Seed ~/.tmux.conf.local (if missing)",
commands=[
f"test -f /home/{SSH_USER}/.tmux.conf.local || "
f"cp {TMUX_CONF_DIR}/.tmux.conf.local /home/{SSH_USER}/",
],
_sudo=True,
_sudo_user=SSH_USER,
)
# --- Tailscale ---------------------------------------------------------------
# Tailscale's noble repo works fine on later Ubuntus — the package is
# self-contained Go binaries.
files.download(
name="Fetch Tailscale apt key",
src="https://pkgs.tailscale.com/stable/ubuntu/noble.noarmor.gpg",
dest="/usr/share/keyrings/tailscale-archive-keyring.gpg",
mode="644",
_sudo=True,
)
files.download(
name="Fetch Tailscale apt list",
src="https://pkgs.tailscale.com/stable/ubuntu/noble.tailscale-keyring.list",
dest="/etc/apt/sources.list.d/tailscale.list",
mode="644",
_sudo=True,
)
apt.update(name="apt update (Tailscale)", _sudo=True)
apt.packages(name="Install Tailscale", packages=["tailscale"], _sudo=True)
systemd.service(
name="Enable tailscaled",
service="tailscaled",
running=True,
enabled=True,
_sudo=True,
)
# `sudo tailscale up` is interactive (browser auth) — run manually once.
# --- Docker -----------------------------------------------------------------
apt.packages(
name="Install Docker + compose plugin",
packages=["docker.io", "docker-compose-v2"],
_sudo=True,
)
systemd.service(
name="Enable docker daemon",
service="docker",
running=True,
enabled=True,
_sudo=True,
)
# --- GPU access (host kernel/driver bits only) ------------------------------
# AMD's amdgpu-install package adds the ROCm apt repo; we use it just to
# get rocminfo / rocm-smi-lib for host-side diagnostics. Containers ship
# their own ROCm.
files.directory(
name="apt keyring dir",
path="/etc/apt/keyrings",
mode="755",
_sudo=True,
)
amdgpu_deb = f"/tmp/{AMDGPU_INSTALL_DEB}"
amdgpu_url = (
f"https://repo.radeon.com/amdgpu-install/{ROCM_VERSION}/ubuntu/noble/"
f"{AMDGPU_INSTALL_DEB}"
)
server.shell(
name="Fetch amdgpu-install .deb",
commands=[f"test -f {amdgpu_deb} || curl -fsSL {amdgpu_url} -o {amdgpu_deb}"],
_sudo=True,
)
server.shell(
name="Install amdgpu-install package",
commands=[f"apt install -y {amdgpu_deb}"],
_sudo=True,
)
# Idempotent cleanup: if a prior run installed the full ROCm userspace
# (~25 GB), tear it down before installing the diagnostic-only subset.
# On a fresh box this is a no-op.
server.shell(
name="Remove full ROCm install if present",
commands=[
"if dpkg -l rocm-dev 2>/dev/null | grep -q '^ii'; then "
" amdgpu-install -y --uninstall || true; "
"fi",
],
_sudo=True,
)
apt.packages(
name="ROCm host diagnostics (rocminfo, rocm-smi)",
packages=["rocminfo", "rocm-smi-lib"],
_sudo=True,
)
# Group membership for /dev/kfd + /dev/dri access (needed for GPU passthrough
# into containers, and for unprivileged host-side rocminfo).
server.group(name="ensure render group", group="render", _sudo=True)
server.group(name="ensure video group", group="video", _sudo=True)
server.user(
name="Add login user to render/video/docker",
user=SSH_USER,
groups=["render", "video", "docker"],
append=True,
_sudo=True,
)
# --- Storage layout ---------------------------------------------------------
files.directory(
name="/models root",
path=MODELS_DIR,
user=SSH_USER,
group=SSH_USER,
mode="755",
_sudo=True,
)
for sub in ("moonshotai", "qwen", "deepseek", "zai", "mistralai"):
files.directory(
name=f"{MODELS_DIR}/{sub}",
path=f"{MODELS_DIR}/{sub}",
user=SSH_USER,
group=SSH_USER,
mode="755",
_sudo=True,
)
# Ollama bind-mounts its content-addressed store here.
files.directory(
name=f"{MODELS_DIR}/ollama",
path=f"{MODELS_DIR}/ollama",
user=SSH_USER,
group=SSH_USER,
mode="755",
_sudo=True,
)
# --- Compose files for inference services ----------------------------------
files.directory(
name="Compose root",
path=COMPOSE_DIR,
user=SSH_USER,
group=SSH_USER,
mode="755",
_sudo=True,
)
# Earlier iterations dropped compose at /srv/compose. Idempotent cleanup.
files.directory(
name="Remove old /srv/compose",
path="/srv/compose",
present=False,
_sudo=True,
)
for svc in ("llama", "vllm", "ollama"):
files.directory(
name=f"compose/{svc} dir",
path=f"{COMPOSE_DIR}/{svc}",
user=SSH_USER,
group=SSH_USER,
mode="755",
_sudo=True,
)
files.put(
name=f"compose/{svc}/docker-compose.yml",
src=f"compose/{svc}.yml",
dest=f"{COMPOSE_DIR}/{svc}/docker-compose.yml",
user=SSH_USER,
group=SSH_USER,
mode="644",
_sudo=True,
)
# --- Cleanup of artifacts from the prior native-build deploy ----------------
# All idempotent — `present=False` is a no-op when the target is absent.
server.shell(
name="Stop & disable old native llama-server.service",
commands=[
"systemctl disable --now llama-server.service 2>/dev/null || true",
],
_sudo=True,
)
files.file(
name="Remove old llama-server.service",
path="/etc/systemd/system/llama-server.service",
present=False,
_sudo=True,
)
files.link(
name="Remove old llama-server-vulkan symlink",
path="/usr/local/bin/llama-server-vulkan",
present=False,
_sudo=True,
)
files.link(
name="Remove old llama-server-rocm symlink",
path="/usr/local/bin/llama-server-rocm",
present=False,
_sudo=True,
)
files.directory(
name="Remove old llama.cpp checkout",
path="/opt/llama.cpp",
present=False,
_sudo=True,
)
server.shell(
name="Stop & remove native Ollama install",
commands=[
"systemctl disable --now ollama.service 2>/dev/null || true",
"rm -f /etc/systemd/system/ollama.service /usr/local/bin/ollama",
"userdel ollama 2>/dev/null || true",
],
_sudo=True,
)
systemd.daemon_reload(name="systemctl daemon-reload", _sudo=True)