added qwable and orinth

2026-06-26 11:33:35 -04:00
parent 224afbb3a6
commit 705421470a
6 changed files with 384 additions and 7 deletions
--- a/pyinfra/framework/scripts/swap-model
+++ b/pyinfra/framework/scripts/swap-model
@@ -14,6 +14,7 @@
 #   swap-model 235b         # Qwen3-235B-A22B via llama.cpp (long-task)
 #   swap-model kimi         # Kimi-Linear-48B-A3B via vLLM (long-context)
 #   swap-model qwable       # Qwable-3.6-27B via llama.cpp (Fable-style)
+#   swap-model ornith       # Ornith-1.0-35B via llama.cpp (agentic coding)
 #   swap-model comfyui      # ComfyUI (image generation)
 #   swap-model none         # everything down — free the GPU
 #   swap-model status       # show what's currently up
@@ -45,6 +46,7 @@ declare -A SVC_DIR=(
    [kimi]=kimi-linear
    [235b]=qwen3-235b
    [qwable]=qwable
+    [ornith]=ornith
    [comfyui]=comfyui
 )
 declare -A SVC_HEALTH=(
@@ -53,6 +55,7 @@ declare -A SVC_HEALTH=(
    [kimi]="http://127.0.0.1:8000/v1/models"
    [235b]="http://127.0.0.1:8081/health"
    [qwable]="http://127.0.0.1:8082/health"
+    [ornith]="http://127.0.0.1:8083/health"
    [comfyui]="http://127.0.0.1:8188/"
 )

@@ -68,8 +71,9 @@ plan() {
        235b)    UP=(235b)    ; DOWN=(ollama llama kimi qwable comfyui) ;;
        kimi)    UP=(kimi)    ; DOWN=(235b comfyui) ;;
        qwable)  UP=(qwable)  ; DOWN=(235b comfyui) ;;
-        comfyui) UP=(comfyui) ; DOWN=(235b kimi qwable) ;;
-        none)    UP=()        ; DOWN=(ollama llama kimi 235b qwable comfyui) ;;
+        ornith)  UP=(ornith)  ; DOWN=(235b comfyui) ;;
+        comfyui) UP=(comfyui) ; DOWN=(235b kimi qwable ornith) ;;
+        none)    UP=()        ; DOWN=(ollama llama kimi 235b qwable ornith comfyui) ;;
        *)       return 1 ;;
    esac
 }
@@ -127,7 +131,7 @@ up_svc() {

 show_status() {
    echo "Inference services:"
-    for svc in ollama llama kimi 235b qwable comfyui; do
+    for svc in ollama llama kimi 235b qwable ornith comfyui; do
        local container="${SVC_DIR[$svc]}" state="down" health=""
        if is_running "$container"; then
            state="up"
@@ -150,6 +154,7 @@ Usage:
  swap-model 235b         # Qwen3-235B-A22B  (llama.cpp, long-task, ~5-10 tok/s)
  swap-model kimi         # Kimi-Linear-48B  (vLLM, long-context chat)
  swap-model qwable       # Qwable-3.6-27B   (llama.cpp, Fable-style, ~10-15 tok/s)
+  swap-model ornith       # Ornith-1.0-35B   (llama.cpp, agentic coding MoE, ~80-100 tok/s)
  swap-model comfyui      # ComfyUI          (image generation)
  swap-model none         # everything down  (free the GPU arena)
  swap-model status       # show current state
@@ -158,16 +163,16 @@ Behaviour: stops conflicting services (frees the 110 GB GPU arena),
 starts the target, polls its /health until it returns 200. Wait timeout
 defaults to ${WAIT_TIMEOUT}s; override with SWAP_WAIT_TIMEOUT.

-Coexistence: ollama(30B), kimi, and qwable(27B, 16.5 GB) coexist with
-each other. 235B and comfyui coexist with nothing. See
-compose/qwen3-235b/README.md for arena math.
+Coexistence: ollama(30B), kimi, qwable(27B, 16.5 GB), and ornith(35B-A3B,
+21 GB) coexist with each other. 235B and comfyui coexist with nothing.
+See compose/qwen3-235b/README.md for arena math.
 EOF
 }

 # --- Main --------------------------------------------------------------------
 TARGET="${1:-}"
 case "$TARGET" in
-    coder|235b|kimi|qwable|comfyui|none) ;;
+    coder|235b|kimi|qwable|ornith|comfyui|none) ;;
    status)         show_status ; exit 0 ;;
    -h|--help|help|"") usage ; exit 0 ;;
    *)