progress 235b
This commit is contained in:
45
pyinfra/framework/compose/llama/smoke.sh
Executable file
45
pyinfra/framework/compose/llama/smoke.sh
Executable file
@@ -0,0 +1,45 @@
|
||||
#!/usr/bin/env bash
|
||||
# Smoke-test the running llama-server (kyuz0 rocm-7.2.2). Hits /health
|
||||
# for liveness, then a tiny OpenAI-compatible chat completion. Also
|
||||
# prints eval_tps so you can compare to Ollama directly.
|
||||
set -euo pipefail
|
||||
|
||||
HOST="${LLAMA_HOST:-127.0.0.1:8080}"
|
||||
MODEL="${LLAMA_MODEL:-qwen3-coder}"
|
||||
|
||||
echo "[smoke] GET /health on $HOST"
|
||||
curl -fsS "http://$HOST/health" | python3 -m json.tool
|
||||
|
||||
echo
|
||||
echo "[smoke] POST /v1/chat/completions ($MODEL) — tiny generation"
|
||||
curl -fsS "http://$HOST/v1/chat/completions" \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d "{
|
||||
\"model\": \"$MODEL\",
|
||||
\"messages\": [{\"role\": \"user\", \"content\": \"Reply with exactly: ok\"}],
|
||||
\"max_tokens\": 16,
|
||||
\"temperature\": 0.0
|
||||
}" | python3 -m json.tool
|
||||
|
||||
echo
|
||||
echo "[smoke] perf measure — eval_tps and prompt_tps"
|
||||
# Use llama.cpp's native /completion endpoint which returns timings.
|
||||
curl -fsS "http://$HOST/completion" \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{
|
||||
"prompt": "Write a Python function that computes the Fibonacci sequence iteratively. Include type hints and a brief docstring.",
|
||||
"n_predict": 200,
|
||||
"temperature": 0.0,
|
||||
"stream": false
|
||||
}' | python3 -c "
|
||||
import json, sys
|
||||
r = json.load(sys.stdin)
|
||||
t = r.get('timings', {})
|
||||
print(f'predicted_per_second: {t.get(\"predicted_per_second\", \"?\"):.2f} tok/s')
|
||||
print(f'prompt_per_second: {t.get(\"prompt_per_second\", \"?\"):.2f} tok/s')
|
||||
print(f'predicted_n: {t.get(\"predicted_n\", \"?\")}')
|
||||
print(f'prompt_n: {t.get(\"prompt_n\", \"?\")}')
|
||||
"
|
||||
|
||||
echo
|
||||
echo "[smoke] passed"
|
||||
Reference in New Issue
Block a user