pyinfra/framework/compose/comfyui.yml

# ComfyUI on Strix Halo gfx1151 via kyuz0/amd-strix-halo-comfyui.
#
# Toolbox-style image (Fedora rawhide + ROCm) with /bin/bash as CMD.
# We override entrypoint to launch ComfyUI's main.py with the flag set
# gfx1151 needs (--disable-mmap because mmap >64 GB is slow on ROCm;
# --bf16-vae avoids VAE OOM; --cache-none keeps unified-memory pressure
# manageable).
#
# Coexistence with other services. ComfyUI competes for GPU with
# kimi-linear (always-resident) and ollama (loads-on-demand). To avoid
# silent contention this stack is NOT set to restart automatically —
# bring it up manually (`docker compose up -d`) when you need image gen,
# and `docker compose down` after. Mid-term we'll add a
# load-shed/coordination layer; this comment is the binding for now.
#
# Pin: kyuz0/amd-strix-halo-comfyui:20260213-143435 (sha-7242b4d). Bump
# deliberately after re-validating Flux/HiDream/LTX2 still work.
services:
  comfyui:
    image: kyuz0/amd-strix-halo-comfyui:20260213-143435
    container_name: comfyui
    # Explicit no auto-restart — see header note about GPU contention.
    restart: "no"
    devices:
      - /dev/kfd:/dev/kfd
      - /dev/dri:/dev/dri
    cap_add:
      - SYS_PTRACE
    security_opt:
      - seccomp=unconfined
    # Numeric GIDs of host's video (44) and render (991) groups — names
    # don't exist inside the Fedora-rawhide base, but GIDs need to match
    # the host for /dev/kfd + /dev/dri access.
    group_add:
      - "44"
      - "991"
    shm_size: 16g
    ipc: host
    environment:
      # Same unified-memory recipe as kimi-linear.yml: BIOS UMA=0.5 GB +
      # ttm.pages_limit=33554432 cmdline + this triple. Without these,
      # PyTorch's HIP allocator only sees the tiny 0.5 GB UMA pool and
      # can't reach GTT. The kyuz0 image is built against native gfx1151
      # so HSA_OVERRIDE_GFX_VERSION isn't needed.
      - HSA_XNACK=1
      - HSA_FORCE_FINE_GRAIN_PCIE=1
      - PYTORCH_HIP_ALLOC_CONF=backend:native,expandable_segments:True,garbage_collection_threshold:0.9
    volumes:
      # All ComfyUI state lives under /srv/docker/comfyui/ on the host.
      # Image's $HOME is /root (Fedora rawhide). Models go in subdirs
      # under comfy-models/ (text_encoders/, vae/, checkpoints/,
      # diffusion_models/, unet/, loras/, clip_vision/) — kyuz0's image
      # populates extra_model_paths.yaml pointing at $HOME/comfy-models.
      - /srv/docker/comfyui/models:/root/comfy-models
      - /srv/docker/comfyui/output:/root/comfy-outputs
      - /srv/docker/comfyui/custom_nodes:/opt/ComfyUI/custom_nodes
      - /srv/docker/comfyui/workflows:/opt/ComfyUI/user/default/workflows
    ports:
      # 8188 = standard ComfyUI port. kyuz0's banner alias uses 8000 but
      # that would collide with vLLM (compose/kimi-linear.yml).
      - "8188:8188"
    # bash -lc loads /etc/profile.d/01-rocm-envs.sh (TORCH_ROCM_AOTRITON,
    # TORCH_BLAS_PREFER_HIPBLASLT) — without a login shell those don't
    # apply and ROCm perf regresses.
    entrypoint: ["/bin/bash", "-lc"]
    # set_extra_paths.sh writes /opt/ComfyUI/extra_model_paths.yaml so
    # ComfyUI finds models under $HOME/comfy-models. Idempotent — safe
    # to run every start. Without it, model dropdowns in the UI are
    # empty and templates report "missing model".
    command:
      - >
        /opt/set_extra_paths.sh &&
        cd /opt/ComfyUI && python main.py
        --listen 0.0.0.0 --port 8188
        --output-directory /root/comfy-outputs
        --disable-mmap --gpu-only --disable-smart-memory
        --cache-none --bf16-vae
progress 235b 2026-06-08 15:31:50 +01:00			`# ComfyUI on Strix Halo gfx1151 via kyuz0/amd-strix-halo-comfyui.`
			`#`
			`# Toolbox-style image (Fedora rawhide + ROCm) with /bin/bash as CMD.`
			`# We override entrypoint to launch ComfyUI's main.py with the flag set`
			`# gfx1151 needs (--disable-mmap because mmap >64 GB is slow on ROCm;`
			`# --bf16-vae avoids VAE OOM; --cache-none keeps unified-memory pressure`
			`# manageable).`
			`#`
			`# Coexistence with other services. ComfyUI competes for GPU with`
			`# kimi-linear (always-resident) and ollama (loads-on-demand). To avoid`
			`# silent contention this stack is NOT set to restart automatically —`
			# bring it up manually (`docker compose up -d`) when you need image gen,
			# and `docker compose down` after. Mid-term we'll add a
			`# load-shed/coordination layer; this comment is the binding for now.`
			`#`
			`# Pin: kyuz0/amd-strix-halo-comfyui:20260213-143435 (sha-7242b4d). Bump`
			`# deliberately after re-validating Flux/HiDream/LTX2 still work.`
			`services:`
			`comfyui:`
			`image: kyuz0/amd-strix-halo-comfyui:20260213-143435`
			`container_name: comfyui`
			`# Explicit no auto-restart — see header note about GPU contention.`
			`restart: "no"`
			`devices:`
			`- /dev/kfd:/dev/kfd`
			`- /dev/dri:/dev/dri`
			`cap_add:`
			`- SYS_PTRACE`
			`security_opt:`
			`- seccomp=unconfined`
			`# Numeric GIDs of host's video (44) and render (991) groups — names`
			`# don't exist inside the Fedora-rawhide base, but GIDs need to match`
			`# the host for /dev/kfd + /dev/dri access.`
			`group_add:`
			`- "44"`
			`- "991"`
			`shm_size: 16g`
			`ipc: host`
			`environment:`
			`# Same unified-memory recipe as kimi-linear.yml: BIOS UMA=0.5 GB +`
			`# ttm.pages_limit=33554432 cmdline + this triple. Without these,`
			`# PyTorch's HIP allocator only sees the tiny 0.5 GB UMA pool and`
			`# can't reach GTT. The kyuz0 image is built against native gfx1151`
			`# so HSA_OVERRIDE_GFX_VERSION isn't needed.`
			`- HSA_XNACK=1`
			`- HSA_FORCE_FINE_GRAIN_PCIE=1`
			`- PYTORCH_HIP_ALLOC_CONF=backend:native,expandable_segments:True,garbage_collection_threshold:0.9`
			`volumes:`
			`# All ComfyUI state lives under /srv/docker/comfyui/ on the host.`
			`# Image's $HOME is /root (Fedora rawhide). Models go in subdirs`
			`# under comfy-models/ (text_encoders/, vae/, checkpoints/,`
			`# diffusion_models/, unet/, loras/, clip_vision/) — kyuz0's image`
			`# populates extra_model_paths.yaml pointing at $HOME/comfy-models.`
			`- /srv/docker/comfyui/models:/root/comfy-models`
			`- /srv/docker/comfyui/output:/root/comfy-outputs`
			`- /srv/docker/comfyui/custom_nodes:/opt/ComfyUI/custom_nodes`
			`- /srv/docker/comfyui/workflows:/opt/ComfyUI/user/default/workflows`
			`ports:`
			`# 8188 = standard ComfyUI port. kyuz0's banner alias uses 8000 but`
			`# that would collide with vLLM (compose/kimi-linear.yml).`
			`- "8188:8188"`
			`# bash -lc loads /etc/profile.d/01-rocm-envs.sh (TORCH_ROCM_AOTRITON,`
			`# TORCH_BLAS_PREFER_HIPBLASLT) — without a login shell those don't`
			`# apply and ROCm perf regresses.`
			`entrypoint: ["/bin/bash", "-lc"]`
			`# set_extra_paths.sh writes /opt/ComfyUI/extra_model_paths.yaml so`
			`# ComfyUI finds models under $HOME/comfy-models. Idempotent — safe`
			`# to run every start. Without it, model dropdowns in the UI are`
			`# empty and templates report "missing model".`
			`command:`
			`- >`
			`/opt/set_extra_paths.sh &&`
			`cd /opt/ComfyUI && python main.py`
			`--listen 0.0.0.0 --port 8188`
			`--output-directory /root/comfy-outputs`
			`--disable-mmap --gpu-only --disable-smart-memory`
			`--cache-none --bf16-vae`