Files
localgenai/pyinfra/framework/compose/faster-whisper.yml

31 lines
1.2 KiB
YAML
Raw Normal View History

# faster-whisper-server — OpenAI-compatible STT.
# https://github.com/fedirz/faster-whisper-server
#
# Speaks `/v1/audio/transcriptions` (and `/v1/audio/translations`) so any
# client that talks to OpenAI's audio API works without changes —
# OpenWebUI, Conduit (via OpenWebUI), arbitrary scripts.
#
# Runs alongside (not instead of) Wyoming Whisper. Wyoming stays for
# Home Assistant Assist; this server is for OpenAI-API consumers.
#
# CPU mode: Strix Halo's 16 Zen 5 cores comfortably real-time even on
# large-v3-turbo. CTranslate2's ROCm support for gfx1151 is unreliable;
# CPU sidesteps that.
services:
faster-whisper:
image: fedirz/faster-whisper-server:latest-cpu
container_name: faster-whisper
restart: unless-stopped
ports:
- "8001:8000"
environment:
# Default model loaded on first request. Auto-downloads on use.
WHISPER__MODEL: Systran/faster-whisper-large-v3-turbo
WHISPER__INFERENCE_DEVICE: cpu
WHISPER__COMPUTE_TYPE: int8
# Built-in web UI at /
ENABLE_UI: "true"
volumes:
# Persist model downloads across container recreates.
- /srv/docker/faster-whisper/cache:/root/.cache/huggingface