Files
localgenai/pyinfra/framework/compose/kimi-linear/Dockerfile
noisedestroyers a29793032d Document current coding-workflow stack state
Snapshot of where opencode + Qwen3-Coder + MCPs + Kimi-Linear + voice
  + Phoenix tracing land today, plus in-flight (oc-tree, kimi-linear
  context ramp) and next (ComfyUI) items with pointers to per-project
  NEXT_STEPS.md guides.
2026-05-10 21:14:43 -04:00

36 lines
1.4 KiB
Docker

# Derived image: kyuz0:stable plus gfx1151 AITER GEMM config fallbacks.
#
# kyuz0's image is built for gfx1151 but doesn't ship every per-op AITER
# autotuning config. Kimi-Linear's MLA layers hit FP8 BMM ops
# (BATCHED_GEMM-A8W8-A_PER_TOKEN_GROUP_PREQUANT_W_PER_BATCHED_TENSOR_QUANT
# and friends) that have no gfx1151 config in the bundle. We synthesize
# them by copying from the closest-arch config that does exist (RDNA3
# gfx1100 is closest to RDNA3.5 gfx1151). Tile sizes won't be optimal
# but the kernels will compile and run.
#
# Idempotent — only fills slots that don't already have a gfx1151 config.
#
# If we ever need a vLLM-pinned base (e.g. upstream regresses on
# Kimi-Linear), build it via ./build.sh first and change FROM here to
# kimi-linear-local:v0.11.2.
FROM kyuz0/vllm-therock-gfx1151:stable
RUN set -e; \
DIR=/opt/venv/lib64/python3.12/site-packages/aiter/ops/triton/configs/gemm; \
cd "$DIR"; \
filled=0; \
for SRC_PREFIX in gfx1100 gfx1101 gfx942 gfx90a; do \
for SRC in ${SRC_PREFIX}-*.json; do \
[ -f "$SRC" ] || continue; \
OP=${SRC#${SRC_PREFIX}-}; \
DST=gfx1151-${OP}; \
if [ ! -f "$DST" ]; then \
cp "$SRC" "$DST"; \
echo "[fix-aiter] $SRC -> $DST"; \
filled=$((filled+1)); \
fi; \
done; \
done; \
echo "[fix-aiter] filled $filled gfx1151 config slots"