localgenai/pyinfra/framework/compose/kimi-linear/Dockerfile

# Derived image: kyuz0:stable plus gfx1151 AITER GEMM config fallbacks.
#
# kyuz0's image is built for gfx1151 but doesn't ship every per-op AITER
# autotuning config. Kimi-Linear's MLA layers hit FP8 BMM ops
# (BATCHED_GEMM-A8W8-A_PER_TOKEN_GROUP_PREQUANT_W_PER_BATCHED_TENSOR_QUANT
# and friends) that have no gfx1151 config in the bundle. We synthesize
# them by copying from the closest-arch config that does exist (RDNA3
# gfx1100 is closest to RDNA3.5 gfx1151). Tile sizes won't be optimal
# but the kernels will compile and run.
#
# Idempotent — only fills slots that don't already have a gfx1151 config.
#
# If we ever need a vLLM-pinned base (e.g. upstream regresses on
# Kimi-Linear), build it via ./build.sh first and change FROM here to
# kimi-linear-local:v0.11.2.

FROM kyuz0/vllm-therock-gfx1151:stable

RUN set -e; \
    DIR=/opt/venv/lib64/python3.12/site-packages/aiter/ops/triton/configs/gemm; \
    cd "$DIR"; \
    filled=0; \
    for SRC_PREFIX in gfx1100 gfx1101 gfx942 gfx90a; do \
        for SRC in ${SRC_PREFIX}-*.json; do \
            [ -f "$SRC" ] || continue; \
            OP=${SRC#${SRC_PREFIX}-}; \
            DST=gfx1151-${OP}; \
            if [ ! -f "$DST" ]; then \
                cp "$SRC" "$DST"; \
                echo "[fix-aiter] $SRC -> $DST"; \
                filled=$((filled+1)); \
            fi; \
        done; \
    done; \
    echo "[fix-aiter] filled $filled gfx1151 config slots"