# Derived image: kyuz0:stable plus gfx1151 AITER GEMM config fallbacks. # # kyuz0's image is built for gfx1151 but doesn't ship every per-op AITER # autotuning config. Kimi-Linear's MLA layers hit FP8 BMM ops # (BATCHED_GEMM-A8W8-A_PER_TOKEN_GROUP_PREQUANT_W_PER_BATCHED_TENSOR_QUANT # and friends) that have no gfx1151 config in the bundle. We synthesize # them by copying from the closest-arch config that does exist (RDNA3 # gfx1100 is closest to RDNA3.5 gfx1151). Tile sizes won't be optimal # but the kernels will compile and run. # # Idempotent — only fills slots that don't already have a gfx1151 config. # # If we ever need a vLLM-pinned base (e.g. upstream regresses on # Kimi-Linear), build it via ./build.sh first and change FROM here to # kimi-linear-local:v0.11.2. FROM kyuz0/vllm-therock-gfx1151:stable RUN set -e; \ DIR=/opt/venv/lib64/python3.12/site-packages/aiter/ops/triton/configs/gemm; \ cd "$DIR"; \ filled=0; \ for SRC_PREFIX in gfx1100 gfx1101 gfx942 gfx90a; do \ for SRC in ${SRC_PREFIX}-*.json; do \ [ -f "$SRC" ] || continue; \ OP=${SRC#${SRC_PREFIX}-}; \ DST=gfx1151-${OP}; \ if [ ! -f "$DST" ]; then \ cp "$SRC" "$DST"; \ echo "[fix-aiter] $SRC -> $DST"; \ filled=$((filled+1)); \ fi; \ done; \ done; \ echo "[fix-aiter] filled $filled gfx1151 config slots"