#!/usr/bin/env bash # Smoke-test the running kimi-linear vLLM container. Exits non-zero if # anything's wrong, so it doubles as a P1 health check. set -euo pipefail HOST="${KIMI_HOST:-127.0.0.1:8000}" MODEL="${KIMI_MODEL:-kimi-linear}" echo "[smoke] GET /v1/models on $HOST" curl -fsS "http://$HOST/v1/models" | python3 -m json.tool echo echo "[smoke] POST /v1/chat/completions ($MODEL) — tiny generation" curl -fsS "http://$HOST/v1/chat/completions" \ -H "Content-Type: application/json" \ -d "{ \"model\": \"$MODEL\", \"messages\": [{\"role\": \"user\", \"content\": \"Reply with exactly: ok\"}], \"max_tokens\": 16, \"temperature\": 0.0 }" | python3 -m json.tool echo echo "[smoke] passed"