localgenai/pyinfra/framework/compose/homepage/services.yaml

# Service tiles for the localgenai stack. Edit in place — pyinfra
# ships this once and never overwrites.
#
# Widget reference: https://gethomepage.dev/widgets/

- Inference:
    - Ollama:
        icon: ollama.svg
        href: http://framework:11434
        description: Local model server (Qwen3-Coder-30B and friends)
        server: localhost-docker
        container: ollama
        # Built-in `type: ollama` widget is missing on the installed
        # Homepage version. customapi against /api/ps gives a better
        # signal anyway: actually-loaded model + its VRAM footprint.
        # When no model is loaded the models array is empty and fields
        # render as N/A — that itself is useful state.
        widget:
          type: customapi
          url: http://host.docker.internal:11434/api/ps
          refreshInterval: 30000
          mappings:
            - field: models.0.name
              label: Loaded
            - field: models.0.size_vram
              label: VRAM
              format: bytes

    - llama.cpp:
        icon: si-llama
        href: http://framework:8080
        description: Vulkan-backed llama.cpp server (gfx1151)
        server: localhost-docker
        container: llama
        # No native widget; a ping check confirms liveness.
        widget:
          type: customapi
          url: http://host.docker.internal:8080/health
          refreshInterval: 30000
          mappings:
            - field: status
              label: Status

    - vLLM (Kimi-Linear):
        icon: mdi-server-network
        href: http://framework:8000
        description: Batched OpenAI-compatible serving — Kimi-Linear-48B-A3B (long-context)
        server: localhost-docker
        # Actual vLLM container is `kimi-linear` (compose/kimi-linear.yml).
        # The legacy `vllm` container in compose/vllm.yml is an unused stub.
        container: kimi-linear
        widget:
          type: customapi
          url: http://host.docker.internal:8000/v1/models
          refreshInterval: 30000
          mappings:
            - field: data.0.id
              label: Served
            - field: data.0.max_model_len
              label: Context
              format: number

    - ComfyUI:
        icon: mdi-image-edit
        href: http://framework:8188
        description: Image generation (Flux.1-Dev via kyuz0 gfx1151 toolbox)
        server: localhost-docker
        container: comfyui
        # ComfyUI's /system_stats returns nested {system, devices[0]}.
        # Surfacing version + free VRAM gives a quick "is it healthy
        # and does it have memory" read at a glance.
        widget:
          type: customapi
          url: http://host.docker.internal:8188/system_stats
          refreshInterval: 30000
          mappings:
            - field: system.comfyui_version
              label: Version
            - field: devices.0.vram_free
              label: VRAM Free
              format: bytes

- Agent UIs:
    - OpenWebUI:
        icon: open-webui.svg
        href: http://framework:3000
        description: Chat UI in front of Ollama, with SearXNG search
        server: localhost-docker
        container: openwebui

    - OpenHands:
        icon: mdi-robot
        href: http://framework:3030
        description: Autonomous coding agent in a Docker sandbox
        server: localhost-docker
        container: openhands

- Observability:
    - Beszel:
        icon: beszel.svg
        href: http://framework:8090
        description: Host + container + AMD GPU dashboard
        server: localhost-docker
        container: beszel

    - OpenLIT:
        icon: mdi-chart-line-variant
        href: http://framework:3001
        description: LLM fleet metrics (cost, tokens, latency)
        server: localhost-docker
        container: openlit

    - Phoenix:
        icon: arize-phoenix.svg
        href: http://framework:6006
        description: Per-trace agent waterfall / flamegraph
        server: localhost-docker
        container: phoenix

- Voice:
    # Wyoming-protocol services have no web UI; tiles are informational.
    # The OpenAI-compatible servers (faster-whisper, Kokoro) have UIs /
    # APIs you can hit directly.
    - Whisper (Wyoming):
        icon: mdi-microphone-message
        description: STT for Home Assistant Assist (Wyoming :10300)
        server: localhost-docker
        container: wyoming-whisper

    - Piper (Wyoming):
        icon: mdi-account-voice
        description: TTS for Home Assistant Assist (Wyoming :10200)
        server: localhost-docker
        container: wyoming-piper

    - faster-whisper:
        icon: mdi-microphone
        href: http://framework:8001
        description: STT (OpenAI API) — large-v3-turbo, used by OpenWebUI/Conduit
        server: localhost-docker
        container: faster-whisper

    - Kokoro:
        icon: mdi-account-music
        href: http://framework:8880/web
        description: TTS (OpenAI API) — Kokoro-82M, used by OpenWebUI/Conduit
        server: localhost-docker
        container: kokoro

- External:
    - SearXNG:
        icon: searxng.svg
        href: https://searxng.n0n.io
        description: Self-hosted metasearch (used by OpenWebUI + OpenCode)