progress 235b
This commit is contained in:
65
pyinfra/framework/compose/litellm.yml
Normal file
65
pyinfra/framework/compose/litellm.yml
Normal file
@@ -0,0 +1,65 @@
|
||||
# LiteLLM proxy — single OpenAI-compatible endpoint in front of all the
|
||||
# local model backends on this box (Ollama 11434, llama.cpp 30B 8080,
|
||||
# vLLM Kimi-Linear 8000, llama.cpp Qwen3-235B 8081).
|
||||
#
|
||||
# Why this exists. With ≥3 backends running and ≥2 client harnesses
|
||||
# (opencode on Mac, OpenHands on the box, future orchestrator on another
|
||||
# server), each client otherwise carries its own per-backend config.
|
||||
# LiteLLM centralizes: model_name → backend_url mapping lives here once,
|
||||
# clients just speak "model: qwen3-235b" to a single URL.
|
||||
#
|
||||
# Routing model is documented in compose/litellm/README.md — opencode
|
||||
# stays direct-wired for now (fewer hops, simpler debug); OpenHands +
|
||||
# the future orchestrator will point here.
|
||||
#
|
||||
# Backend reachability. `extra_hosts: host.docker.internal:host-gateway`
|
||||
# resolves to the host's docker0 IP from inside this container, which
|
||||
# is how it reaches the other compose services published on host ports.
|
||||
# Don't use container_name-based DNS — those containers live on separate
|
||||
# bridge networks (each compose stack has its own).
|
||||
services:
|
||||
litellm:
|
||||
image: ghcr.io/berriai/litellm:main-stable
|
||||
container_name: litellm
|
||||
restart: unless-stopped
|
||||
extra_hosts:
|
||||
# On Linux, `host-gateway` is Docker's magic alias for the host's
|
||||
# docker0 IP — equivalent to host.docker.internal on Mac/Windows.
|
||||
# Lets LiteLLM dial localhost-bound backends as
|
||||
# http://host.docker.internal:<port>.
|
||||
- "host.docker.internal:host-gateway"
|
||||
environment:
|
||||
# Master key. LiteLLM requires one for admin endpoints + serves
|
||||
# as the default Bearer for client requests. Sibling .env file
|
||||
# holds the value (created by pyinfra as a placeholder; you fill
|
||||
# it in on first deploy). Same pattern as compose/beszel.yml.
|
||||
- LITELLM_MASTER_KEY=${LITELLM_MASTER_KEY}
|
||||
# Optional: salt for hashing virtual keys at rest. Unused in the
|
||||
# single-user setup but LiteLLM logs a warning without it.
|
||||
- LITELLM_SALT_KEY=${LITELLM_SALT_KEY:-sk-localgenai-salt}
|
||||
volumes:
|
||||
# Source-of-truth config lives in the repo; pyinfra syncs it to
|
||||
# /srv/docker/litellm/config.yaml on every `./run.sh`. Don't edit
|
||||
# on the box — drift gets overwritten.
|
||||
- /srv/docker/litellm/config.yaml:/app/config.yaml:ro
|
||||
ports:
|
||||
- "4000:4000"
|
||||
command:
|
||||
- --config
|
||||
- /app/config.yaml
|
||||
- --port
|
||||
- "4000"
|
||||
# --num_workers 1 keeps memory minimal; LiteLLM is I/O-bound here,
|
||||
# not CPU-bound. Bump if you see queueing.
|
||||
- --num_workers
|
||||
- "1"
|
||||
healthcheck:
|
||||
# LiteLLM exposes both /health (verifies all backends are reachable
|
||||
# — heavy) and /health/readiness (just the proxy itself — cheap).
|
||||
# Use readiness for the compose healthcheck so a stopped backend
|
||||
# doesn't mark LiteLLM unhealthy.
|
||||
test: ["CMD", "curl", "-fsS", "http://127.0.0.1:4000/health/readiness"]
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
Reference in New Issue
Block a user