progress 235b

This commit is contained in:
2026-06-08 15:31:50 +01:00
parent a29793032d
commit de1635872f
25 changed files with 1598 additions and 53 deletions

View File

@@ -0,0 +1,77 @@
# ComfyUI on Strix Halo gfx1151 via kyuz0/amd-strix-halo-comfyui.
#
# Toolbox-style image (Fedora rawhide + ROCm) with /bin/bash as CMD.
# We override entrypoint to launch ComfyUI's main.py with the flag set
# gfx1151 needs (--disable-mmap because mmap >64 GB is slow on ROCm;
# --bf16-vae avoids VAE OOM; --cache-none keeps unified-memory pressure
# manageable).
#
# Coexistence with other services. ComfyUI competes for GPU with
# kimi-linear (always-resident) and ollama (loads-on-demand). To avoid
# silent contention this stack is NOT set to restart automatically —
# bring it up manually (`docker compose up -d`) when you need image gen,
# and `docker compose down` after. Mid-term we'll add a
# load-shed/coordination layer; this comment is the binding for now.
#
# Pin: kyuz0/amd-strix-halo-comfyui:20260213-143435 (sha-7242b4d). Bump
# deliberately after re-validating Flux/HiDream/LTX2 still work.
services:
comfyui:
image: kyuz0/amd-strix-halo-comfyui:20260213-143435
container_name: comfyui
# Explicit no auto-restart — see header note about GPU contention.
restart: "no"
devices:
- /dev/kfd:/dev/kfd
- /dev/dri:/dev/dri
cap_add:
- SYS_PTRACE
security_opt:
- seccomp=unconfined
# Numeric GIDs of host's video (44) and render (991) groups — names
# don't exist inside the Fedora-rawhide base, but GIDs need to match
# the host for /dev/kfd + /dev/dri access.
group_add:
- "44"
- "991"
shm_size: 16g
ipc: host
environment:
# Same unified-memory recipe as kimi-linear.yml: BIOS UMA=0.5 GB +
# ttm.pages_limit=33554432 cmdline + this triple. Without these,
# PyTorch's HIP allocator only sees the tiny 0.5 GB UMA pool and
# can't reach GTT. The kyuz0 image is built against native gfx1151
# so HSA_OVERRIDE_GFX_VERSION isn't needed.
- HSA_XNACK=1
- HSA_FORCE_FINE_GRAIN_PCIE=1
- PYTORCH_HIP_ALLOC_CONF=backend:native,expandable_segments:True,garbage_collection_threshold:0.9
volumes:
# All ComfyUI state lives under /srv/docker/comfyui/ on the host.
# Image's $HOME is /root (Fedora rawhide). Models go in subdirs
# under comfy-models/ (text_encoders/, vae/, checkpoints/,
# diffusion_models/, unet/, loras/, clip_vision/) — kyuz0's image
# populates extra_model_paths.yaml pointing at $HOME/comfy-models.
- /srv/docker/comfyui/models:/root/comfy-models
- /srv/docker/comfyui/output:/root/comfy-outputs
- /srv/docker/comfyui/custom_nodes:/opt/ComfyUI/custom_nodes
- /srv/docker/comfyui/workflows:/opt/ComfyUI/user/default/workflows
ports:
# 8188 = standard ComfyUI port. kyuz0's banner alias uses 8000 but
# that would collide with vLLM (compose/kimi-linear.yml).
- "8188:8188"
# bash -lc loads /etc/profile.d/01-rocm-envs.sh (TORCH_ROCM_AOTRITON,
# TORCH_BLAS_PREFER_HIPBLASLT) — without a login shell those don't
# apply and ROCm perf regresses.
entrypoint: ["/bin/bash", "-lc"]
# set_extra_paths.sh writes /opt/ComfyUI/extra_model_paths.yaml so
# ComfyUI finds models under $HOME/comfy-models. Idempotent — safe
# to run every start. Without it, model dropdowns in the UI are
# empty and templates report "missing model".
command:
- >
/opt/set_extra_paths.sh &&
cd /opt/ComfyUI && python main.py
--listen 0.0.0.0 --port 8188
--output-directory /root/comfy-outputs
--disable-mmap --gpu-only --disable-smart-memory
--cache-none --bf16-vae