added qwable and orinth

This commit is contained in:
2026-06-26 11:33:35 -04:00
parent 224afbb3a6
commit 705421470a
6 changed files with 384 additions and 7 deletions

View File

@@ -14,6 +14,7 @@
# swap-model 235b # Qwen3-235B-A22B via llama.cpp (long-task)
# swap-model kimi # Kimi-Linear-48B-A3B via vLLM (long-context)
# swap-model qwable # Qwable-3.6-27B via llama.cpp (Fable-style)
# swap-model ornith # Ornith-1.0-35B via llama.cpp (agentic coding)
# swap-model comfyui # ComfyUI (image generation)
# swap-model none # everything down — free the GPU
# swap-model status # show what's currently up
@@ -45,6 +46,7 @@ declare -A SVC_DIR=(
[kimi]=kimi-linear
[235b]=qwen3-235b
[qwable]=qwable
[ornith]=ornith
[comfyui]=comfyui
)
declare -A SVC_HEALTH=(
@@ -53,6 +55,7 @@ declare -A SVC_HEALTH=(
[kimi]="http://127.0.0.1:8000/v1/models"
[235b]="http://127.0.0.1:8081/health"
[qwable]="http://127.0.0.1:8082/health"
[ornith]="http://127.0.0.1:8083/health"
[comfyui]="http://127.0.0.1:8188/"
)
@@ -68,8 +71,9 @@ plan() {
235b) UP=(235b) ; DOWN=(ollama llama kimi qwable comfyui) ;;
kimi) UP=(kimi) ; DOWN=(235b comfyui) ;;
qwable) UP=(qwable) ; DOWN=(235b comfyui) ;;
comfyui) UP=(comfyui) ; DOWN=(235b kimi qwable) ;;
none) UP=() ; DOWN=(ollama llama kimi 235b qwable comfyui) ;;
ornith) UP=(ornith) ; DOWN=(235b comfyui) ;;
comfyui) UP=(comfyui) ; DOWN=(235b kimi qwable ornith) ;;
none) UP=() ; DOWN=(ollama llama kimi 235b qwable ornith comfyui) ;;
*) return 1 ;;
esac
}
@@ -127,7 +131,7 @@ up_svc() {
show_status() {
echo "Inference services:"
for svc in ollama llama kimi 235b qwable comfyui; do
for svc in ollama llama kimi 235b qwable ornith comfyui; do
local container="${SVC_DIR[$svc]}" state="down" health=""
if is_running "$container"; then
state="up"
@@ -150,6 +154,7 @@ Usage:
swap-model 235b # Qwen3-235B-A22B (llama.cpp, long-task, ~5-10 tok/s)
swap-model kimi # Kimi-Linear-48B (vLLM, long-context chat)
swap-model qwable # Qwable-3.6-27B (llama.cpp, Fable-style, ~10-15 tok/s)
swap-model ornith # Ornith-1.0-35B (llama.cpp, agentic coding MoE, ~80-100 tok/s)
swap-model comfyui # ComfyUI (image generation)
swap-model none # everything down (free the GPU arena)
swap-model status # show current state
@@ -158,16 +163,16 @@ Behaviour: stops conflicting services (frees the 110 GB GPU arena),
starts the target, polls its /health until it returns 200. Wait timeout
defaults to ${WAIT_TIMEOUT}s; override with SWAP_WAIT_TIMEOUT.
Coexistence: ollama(30B), kimi, and qwable(27B, 16.5 GB) coexist with
each other. 235B and comfyui coexist with nothing. See
compose/qwen3-235b/README.md for arena math.
Coexistence: ollama(30B), kimi, qwable(27B, 16.5 GB), and ornith(35B-A3B,
21 GB) coexist with each other. 235B and comfyui coexist with nothing.
See compose/qwen3-235b/README.md for arena math.
EOF
}
# --- Main --------------------------------------------------------------------
TARGET="${1:-}"
case "$TARGET" in
coder|235b|kimi|qwable|comfyui|none) ;;
coder|235b|kimi|qwable|ornith|comfyui|none) ;;
status) show_status ; exit 0 ;;
-h|--help|help|"") usage ; exit 0 ;;
*)