#!/usr/bin/env bash
# Smoke-test the running kimi-linear vLLM container. Exits non-zero if
# anything's wrong, so it doubles as a P1 health check.
set -euo pipefail

HOST="${KIMI_HOST:-127.0.0.1:8000}"
MODEL="${KIMI_MODEL:-kimi-linear}"

echo "[smoke] GET /v1/models on $HOST"
curl -fsS "http://$HOST/v1/models" | python3 -m json.tool

echo
echo "[smoke] POST /v1/chat/completions ($MODEL) — tiny generation"
curl -fsS "http://$HOST/v1/chat/completions" \
    -H "Content-Type: application/json" \
    -d "{
        \"model\": \"$MODEL\",
        \"messages\": [{\"role\": \"user\", \"content\": \"Reply with exactly: ok\"}],
        \"max_tokens\": 16,
        \"temperature\": 0.0
    }" | python3 -m json.tool

echo
echo "[smoke] passed"