From 37b0cd9a5836f50f503da28cb44af95e48b752b4 Mon Sep 17 00:00:00 2001 From: noisedestroyers Date: Fri, 8 May 2026 15:56:10 -0400 Subject: [PATCH] Build nvtop 3.2 from source (apt's 3.0.2 doesn't detect gfx1151) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ubuntu 26.04 ships nvtop 3.0.2 via apt, which predates the gfx1151 sysfs detection improvements that landed in 3.2.x. Symptom: nvtop runs but the iGPU doesn't appear. Drop nvtop from the apt package list, add a from-source build step that pulls a pinned NVTOP_VERSION, builds with -DAMDGPU_SUPPORT=ON, and installs to /usr/local/bin (which wins over /usr/bin in PATH). Idempotent: only rebuilds when the installed version doesn't match. Run `sudo nvtop` to see container processes — non-root users only see their own /proc//fdinfo entries. Co-Authored-By: Claude Opus 4.7 (1M context) --- pyinfra/framework/README.md | 4 ++++ pyinfra/framework/deploy.py | 46 ++++++++++++++++++++++++++++++++++--- 2 files changed, 47 insertions(+), 3 deletions(-) diff --git a/pyinfra/framework/README.md b/pyinfra/framework/README.md index 5f133ac..e69023f 100644 --- a/pyinfra/framework/README.md +++ b/pyinfra/framework/README.md @@ -80,6 +80,10 @@ Top of `deploy.py`: the version; find it at https://repo.radeon.com/amdgpu-install/. - `AMDGPU_TOP_VERSION` — bump when a newer release lands at https://github.com/Umio-Yasuno/amdgpu_top/releases. +- `NVTOP_VERSION` — built from source because Ubuntu 26.04's apt + package (3.0.2) predates gfx1151 detection. Bump when a newer release + lands at https://github.com/Syllo/nvtop/releases. Run `sudo nvtop` to + see all GPU processes (non-root only sees the calling user's own). Compose images in `compose/{llama,vllm,ollama,openwebui,beszel,openlit,phoenix,openhands,homepage}.yml` diff --git a/pyinfra/framework/deploy.py b/pyinfra/framework/deploy.py index 57cd15a..7068198 100644 --- a/pyinfra/framework/deploy.py +++ b/pyinfra/framework/deploy.py @@ -32,6 +32,12 @@ AMDGPU_INSTALL_DEB = "amdgpu-install_7.2.3.70203-1_all.deb" AMDGPU_TOP_VERSION = "0.11.4-1" AMDGPU_TOP_DEB = f"amdgpu-top_without_gui_{AMDGPU_TOP_VERSION}_amd64.deb" +# nvtop — htop-like GPU monitor with multi-vendor support. Ubuntu 26.04 +# ships 3.0.2 in apt, which predates the gfx1151 sysfs detection +# improvements; we build 3.2.x from source instead. Verify at +# https://github.com/Syllo/nvtop/releases. +NVTOP_VERSION = "3.2.0" + SSH_USER = host.data.get("ssh_user", "noise") MODELS_DIR = "/models" # /srv is the FHS-blessed location for "data and configuration for @@ -52,14 +58,15 @@ apt.packages( # User basics + monitoring tools. apt.packages( name="Base CLI tools", - # radeontop intentionally omitted — it predates RDNA 3.5 / Strix Halo - # and just errors with "no VRAM support". amdgpu_top installed below. + # radeontop intentionally omitted — predates RDNA 3.5 / Strix Halo, + # errors with "no VRAM support". amdgpu_top installed below. + # nvtop from apt intentionally omitted — Ubuntu 26.04 ships 3.0.2, + # which doesn't pick up gfx1151. Built from source below instead. packages=[ "tmux", "vim", "htop", "btop", - "nvtop", "git", "curl", "ca-certificates", @@ -246,6 +253,39 @@ server.shell( _sudo=True, ) +# nvtop from source. Build deps + clone + cmake + install to /usr/local +# (which wins over /usr/bin in $PATH). Idempotent — only rebuilds if +# /usr/local/bin/nvtop's version doesn't match NVTOP_VERSION. Run +# `sudo nvtop` to see container processes (otherwise non-root user +# only sees its own /proc//fdinfo entries). +apt.packages( + name="nvtop build deps", + packages=[ + "cmake", + "build-essential", + "libncurses-dev", + "libdrm-dev", + "libudev-dev", + "libsystemd-dev", + ], + _sudo=True, +) +server.shell( + name=f"Build & install nvtop {NVTOP_VERSION} from source", + commands=[ + f"/usr/local/bin/nvtop --version 2>/dev/null | grep -q 'version {NVTOP_VERSION}' && exit 0; " + f"rm -rf /tmp/nvtop-build && " + f"git clone --depth 1 --branch {NVTOP_VERSION} " + f"https://github.com/Syllo/nvtop.git /tmp/nvtop-build && " + f"cmake -S /tmp/nvtop-build -B /tmp/nvtop-build/build " + f"-DAMDGPU_SUPPORT=ON -DCMAKE_INSTALL_PREFIX=/usr/local && " + f"make -C /tmp/nvtop-build/build -j && " + f"make -C /tmp/nvtop-build/build install && " + f"rm -rf /tmp/nvtop-build", + ], + _sudo=True, +) + # Group membership for /dev/kfd + /dev/dri access (needed for GPU passthrough # into containers, and for unprivileged host-side rocminfo). server.group(name="ensure render group", group="render", _sudo=True)