localgenai/pyinfra/framework/compose/kokoro.yml

# Kokoro-FastAPI — OpenAI-compatible TTS in front of Kokoro-82M.
# https://github.com/remsky/Kokoro-FastAPI
#
# Speaks `/v1/audio/speech`. Pair with faster-whisper-server for a full
# OpenAI-compatible voice loop driving OpenWebUI / Conduit.
#
# Kokoro-82M (hexgrad, Jan 2025) is small (~340 MB) but produces
# noticeably more natural prosody than Piper. Apache 2.0 licence.
# CPU image is plenty fast for this model size on Strix Halo.
services:
  kokoro:
    image: ghcr.io/remsky/kokoro-fastapi-cpu:latest
    container_name: kokoro
    restart: unless-stopped
    ports:
      # 8880 is Kokoro-FastAPI's own default — keeping the same on the
      # host side so docs/tutorials line up.
      - "8880:8880"
    volumes:
      - /srv/docker/kokoro/models:/app/api/src/models