Build nvtop 3.2 from source (apt's 3.0.2 doesn't detect gfx1151)
Ubuntu 26.04 ships nvtop 3.0.2 via apt, which predates the gfx1151 sysfs detection improvements that landed in 3.2.x. Symptom: nvtop runs but the iGPU doesn't appear. Drop nvtop from the apt package list, add a from-source build step that pulls a pinned NVTOP_VERSION, builds with -DAMDGPU_SUPPORT=ON, and installs to /usr/local/bin (which wins over /usr/bin in PATH). Idempotent: only rebuilds when the installed version doesn't match. Run `sudo nvtop` to see container processes — non-root users only see their own /proc/<pid>/fdinfo entries. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -80,6 +80,10 @@ Top of `deploy.py`:
|
||||
the version; find it at https://repo.radeon.com/amdgpu-install/.
|
||||
- `AMDGPU_TOP_VERSION` — bump when a newer release lands at
|
||||
https://github.com/Umio-Yasuno/amdgpu_top/releases.
|
||||
- `NVTOP_VERSION` — built from source because Ubuntu 26.04's apt
|
||||
package (3.0.2) predates gfx1151 detection. Bump when a newer release
|
||||
lands at https://github.com/Syllo/nvtop/releases. Run `sudo nvtop` to
|
||||
see all GPU processes (non-root only sees the calling user's own).
|
||||
|
||||
Compose images in
|
||||
`compose/{llama,vllm,ollama,openwebui,beszel,openlit,phoenix,openhands,homepage}.yml`
|
||||
|
||||
@@ -32,6 +32,12 @@ AMDGPU_INSTALL_DEB = "amdgpu-install_7.2.3.70203-1_all.deb"
|
||||
AMDGPU_TOP_VERSION = "0.11.4-1"
|
||||
AMDGPU_TOP_DEB = f"amdgpu-top_without_gui_{AMDGPU_TOP_VERSION}_amd64.deb"
|
||||
|
||||
# nvtop — htop-like GPU monitor with multi-vendor support. Ubuntu 26.04
|
||||
# ships 3.0.2 in apt, which predates the gfx1151 sysfs detection
|
||||
# improvements; we build 3.2.x from source instead. Verify at
|
||||
# https://github.com/Syllo/nvtop/releases.
|
||||
NVTOP_VERSION = "3.2.0"
|
||||
|
||||
SSH_USER = host.data.get("ssh_user", "noise")
|
||||
MODELS_DIR = "/models"
|
||||
# /srv is the FHS-blessed location for "data and configuration for
|
||||
@@ -52,14 +58,15 @@ apt.packages(
|
||||
# User basics + monitoring tools.
|
||||
apt.packages(
|
||||
name="Base CLI tools",
|
||||
# radeontop intentionally omitted — it predates RDNA 3.5 / Strix Halo
|
||||
# and just errors with "no VRAM support". amdgpu_top installed below.
|
||||
# radeontop intentionally omitted — predates RDNA 3.5 / Strix Halo,
|
||||
# errors with "no VRAM support". amdgpu_top installed below.
|
||||
# nvtop from apt intentionally omitted — Ubuntu 26.04 ships 3.0.2,
|
||||
# which doesn't pick up gfx1151. Built from source below instead.
|
||||
packages=[
|
||||
"tmux",
|
||||
"vim",
|
||||
"htop",
|
||||
"btop",
|
||||
"nvtop",
|
||||
"git",
|
||||
"curl",
|
||||
"ca-certificates",
|
||||
@@ -246,6 +253,39 @@ server.shell(
|
||||
_sudo=True,
|
||||
)
|
||||
|
||||
# nvtop from source. Build deps + clone + cmake + install to /usr/local
|
||||
# (which wins over /usr/bin in $PATH). Idempotent — only rebuilds if
|
||||
# /usr/local/bin/nvtop's version doesn't match NVTOP_VERSION. Run
|
||||
# `sudo nvtop` to see container processes (otherwise non-root user
|
||||
# only sees its own /proc/<pid>/fdinfo entries).
|
||||
apt.packages(
|
||||
name="nvtop build deps",
|
||||
packages=[
|
||||
"cmake",
|
||||
"build-essential",
|
||||
"libncurses-dev",
|
||||
"libdrm-dev",
|
||||
"libudev-dev",
|
||||
"libsystemd-dev",
|
||||
],
|
||||
_sudo=True,
|
||||
)
|
||||
server.shell(
|
||||
name=f"Build & install nvtop {NVTOP_VERSION} from source",
|
||||
commands=[
|
||||
f"/usr/local/bin/nvtop --version 2>/dev/null | grep -q 'version {NVTOP_VERSION}' && exit 0; "
|
||||
f"rm -rf /tmp/nvtop-build && "
|
||||
f"git clone --depth 1 --branch {NVTOP_VERSION} "
|
||||
f"https://github.com/Syllo/nvtop.git /tmp/nvtop-build && "
|
||||
f"cmake -S /tmp/nvtop-build -B /tmp/nvtop-build/build "
|
||||
f"-DAMDGPU_SUPPORT=ON -DCMAKE_INSTALL_PREFIX=/usr/local && "
|
||||
f"make -C /tmp/nvtop-build/build -j && "
|
||||
f"make -C /tmp/nvtop-build/build install && "
|
||||
f"rm -rf /tmp/nvtop-build",
|
||||
],
|
||||
_sudo=True,
|
||||
)
|
||||
|
||||
# Group membership for /dev/kfd + /dev/dri access (needed for GPU passthrough
|
||||
# into containers, and for unprivileged host-side rocminfo).
|
||||
server.group(name="ensure render group", group="render", _sudo=True)
|
||||
|
||||
Reference in New Issue
Block a user