added qwable and orinth
This commit is contained in:
@@ -14,6 +14,7 @@
|
||||
# swap-model 235b # Qwen3-235B-A22B via llama.cpp (long-task)
|
||||
# swap-model kimi # Kimi-Linear-48B-A3B via vLLM (long-context)
|
||||
# swap-model qwable # Qwable-3.6-27B via llama.cpp (Fable-style)
|
||||
# swap-model ornith # Ornith-1.0-35B via llama.cpp (agentic coding)
|
||||
# swap-model comfyui # ComfyUI (image generation)
|
||||
# swap-model none # everything down — free the GPU
|
||||
# swap-model status # show what's currently up
|
||||
@@ -45,6 +46,7 @@ declare -A SVC_DIR=(
|
||||
[kimi]=kimi-linear
|
||||
[235b]=qwen3-235b
|
||||
[qwable]=qwable
|
||||
[ornith]=ornith
|
||||
[comfyui]=comfyui
|
||||
)
|
||||
declare -A SVC_HEALTH=(
|
||||
@@ -53,6 +55,7 @@ declare -A SVC_HEALTH=(
|
||||
[kimi]="http://127.0.0.1:8000/v1/models"
|
||||
[235b]="http://127.0.0.1:8081/health"
|
||||
[qwable]="http://127.0.0.1:8082/health"
|
||||
[ornith]="http://127.0.0.1:8083/health"
|
||||
[comfyui]="http://127.0.0.1:8188/"
|
||||
)
|
||||
|
||||
@@ -68,8 +71,9 @@ plan() {
|
||||
235b) UP=(235b) ; DOWN=(ollama llama kimi qwable comfyui) ;;
|
||||
kimi) UP=(kimi) ; DOWN=(235b comfyui) ;;
|
||||
qwable) UP=(qwable) ; DOWN=(235b comfyui) ;;
|
||||
comfyui) UP=(comfyui) ; DOWN=(235b kimi qwable) ;;
|
||||
none) UP=() ; DOWN=(ollama llama kimi 235b qwable comfyui) ;;
|
||||
ornith) UP=(ornith) ; DOWN=(235b comfyui) ;;
|
||||
comfyui) UP=(comfyui) ; DOWN=(235b kimi qwable ornith) ;;
|
||||
none) UP=() ; DOWN=(ollama llama kimi 235b qwable ornith comfyui) ;;
|
||||
*) return 1 ;;
|
||||
esac
|
||||
}
|
||||
@@ -127,7 +131,7 @@ up_svc() {
|
||||
|
||||
show_status() {
|
||||
echo "Inference services:"
|
||||
for svc in ollama llama kimi 235b qwable comfyui; do
|
||||
for svc in ollama llama kimi 235b qwable ornith comfyui; do
|
||||
local container="${SVC_DIR[$svc]}" state="down" health=""
|
||||
if is_running "$container"; then
|
||||
state="up"
|
||||
@@ -150,6 +154,7 @@ Usage:
|
||||
swap-model 235b # Qwen3-235B-A22B (llama.cpp, long-task, ~5-10 tok/s)
|
||||
swap-model kimi # Kimi-Linear-48B (vLLM, long-context chat)
|
||||
swap-model qwable # Qwable-3.6-27B (llama.cpp, Fable-style, ~10-15 tok/s)
|
||||
swap-model ornith # Ornith-1.0-35B (llama.cpp, agentic coding MoE, ~80-100 tok/s)
|
||||
swap-model comfyui # ComfyUI (image generation)
|
||||
swap-model none # everything down (free the GPU arena)
|
||||
swap-model status # show current state
|
||||
@@ -158,16 +163,16 @@ Behaviour: stops conflicting services (frees the 110 GB GPU arena),
|
||||
starts the target, polls its /health until it returns 200. Wait timeout
|
||||
defaults to ${WAIT_TIMEOUT}s; override with SWAP_WAIT_TIMEOUT.
|
||||
|
||||
Coexistence: ollama(30B), kimi, and qwable(27B, 16.5 GB) coexist with
|
||||
each other. 235B and comfyui coexist with nothing. See
|
||||
compose/qwen3-235b/README.md for arena math.
|
||||
Coexistence: ollama(30B), kimi, qwable(27B, 16.5 GB), and ornith(35B-A3B,
|
||||
21 GB) coexist with each other. 235B and comfyui coexist with nothing.
|
||||
See compose/qwen3-235b/README.md for arena math.
|
||||
EOF
|
||||
}
|
||||
|
||||
# --- Main --------------------------------------------------------------------
|
||||
TARGET="${1:-}"
|
||||
case "$TARGET" in
|
||||
coder|235b|kimi|qwable|comfyui|none) ;;
|
||||
coder|235b|kimi|qwable|ornith|comfyui|none) ;;
|
||||
status) show_status ; exit 0 ;;
|
||||
-h|--help|help|"") usage ; exit 0 ;;
|
||||
*)
|
||||
|
||||
Reference in New Issue
Block a user