localgenai/pyinfra/framework/compose/faster-whisper.yml

# faster-whisper-server — OpenAI-compatible STT.
# https://github.com/fedirz/faster-whisper-server
#
# Speaks `/v1/audio/transcriptions` (and `/v1/audio/translations`) so any
# client that talks to OpenAI's audio API works without changes —
# OpenWebUI, Conduit (via OpenWebUI), arbitrary scripts.
#
# Runs alongside (not instead of) Wyoming Whisper. Wyoming stays for
# Home Assistant Assist; this server is for OpenAI-API consumers.
#
# CPU mode: Strix Halo's 16 Zen 5 cores comfortably real-time even on
# large-v3-turbo. CTranslate2's ROCm support for gfx1151 is unreliable;
# CPU sidesteps that.
services:
  faster-whisper:
    image: fedirz/faster-whisper-server:latest-cpu
    container_name: faster-whisper
    restart: unless-stopped
    ports:
      - "8001:8000"
    environment:
      # Default model loaded on first request. Auto-downloads on use.
      WHISPER__MODEL: Systran/faster-whisper-large-v3-turbo
      WHISPER__INFERENCE_DEVICE: cpu
      WHISPER__COMPUTE_TYPE: int8
      # Built-in web UI at /
      ENABLE_UI: "true"
    volumes:
      # Persist model downloads across container recreates.
      - /srv/docker/faster-whisper/cache:/root/.cache/huggingface