localgenai/pyinfra/framework/compose/litellm/smoke.sh

#!/usr/bin/env bash
# Smoke-test the LiteLLM proxy. /health/readiness for liveness, /v1/models
# for the configured backends, then /health (which dials every backend)
# to surface which ones are actually reachable right now.
set -euo pipefail

HOST="${LITELLM_HOST:-127.0.0.1:4000}"
# Read master key from sibling .env if present, otherwise from environment.
if [[ -z "${LITELLM_MASTER_KEY:-}" && -f "$(dirname "$0")/../.env" ]]; then
    # shellcheck disable=SC1091
    source "$(dirname "$0")/../.env"
fi
if [[ -z "${LITELLM_MASTER_KEY:-}" ]]; then
    echo "[smoke] LITELLM_MASTER_KEY not set — export it or populate /srv/docker/litellm/.env" >&2
    exit 1
fi

echo "[smoke] GET /health/readiness on $HOST (proxy alive?)"
curl -fsS "http://$HOST/health/readiness" \
    -H "Authorization: Bearer $LITELLM_MASTER_KEY" \
  | python3 -m json.tool

echo
echo "[smoke] GET /v1/models (configured model_names)"
curl -fsS "http://$HOST/v1/models" \
    -H "Authorization: Bearer $LITELLM_MASTER_KEY" \
  | python3 -c "
import json, sys
r = json.load(sys.stdin)
for m in r.get('data', []):
    print(f\"  - {m.get('id', '?')}\")"

echo
echo "[smoke] GET /health (each backend's reachability — slow, ~10s)"
curl -fsS "http://$HOST/health" \
    -H "Authorization: Bearer $LITELLM_MASTER_KEY" \
  | python3 -c "
import json, sys
r = json.load(sys.stdin)
healthy = r.get('healthy_endpoints', [])
unhealthy = r.get('unhealthy_endpoints', [])
print(f'  healthy:   {len(healthy)}')
for e in healthy:
    print(f'    + {e.get(\"model\", \"?\")}')
print(f'  unhealthy: {len(unhealthy)}')
for e in unhealthy:
    print(f'    - {e.get(\"model\", \"?\")}: {e.get(\"error\", \"?\")[:80]}')"

echo
echo "[smoke] passed — proxy up, model list populated. Unhealthy backends are expected if their compose stacks are down."