mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-28 21:49:40 +02:00
69 lines
2.2 KiB
Python
69 lines
2.2 KiB
Python
"""LiteLLM adapter: hosted TTS (OpenAI, Azure, Vertex AI) via one ``aspeech`` call.
|
|
|
|
LiteLLM normalises every hosted provider behind the same ``aspeech`` surface,
|
|
so a single adapter covers them all. The provider is encoded in the model
|
|
string (e.g. ``openai/tts-1``, ``vertex_ai/...``) and the voice reference is
|
|
whatever that provider expects, which the catalog already supplies.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from ..audio import SynthesizedAudio
|
|
from ..errors import TextToSpeechError
|
|
from ..port import TextToSpeech
|
|
from ..request import SynthesisRequest
|
|
|
|
# Hosted providers return MP3-encoded bytes from ``aspeech``.
|
|
_CONTAINER = "mp3"
|
|
|
|
# Matches the legacy podcaster timeouts; long single segments still finish well
|
|
# under this, and retries cover transient upstream failures.
|
|
_TIMEOUT_SECONDS = 600
|
|
_MAX_RETRIES = 2
|
|
|
|
|
|
class LiteLlmTextToSpeech(TextToSpeech):
|
|
"""Synthesises segments through any LiteLLM-supported hosted TTS model."""
|
|
|
|
def __init__(
|
|
self,
|
|
*,
|
|
model: str,
|
|
api_base: str | None = None,
|
|
api_key: str | None = None,
|
|
) -> None:
|
|
self._model = model
|
|
self._api_base = api_base
|
|
self._api_key = api_key
|
|
|
|
@property
|
|
def container(self) -> str:
|
|
return _CONTAINER
|
|
|
|
async def synthesize(self, request: SynthesisRequest) -> SynthesizedAudio:
|
|
from litellm import aspeech
|
|
|
|
kwargs = {
|
|
"model": self._model,
|
|
"voice": request.voice,
|
|
"input": request.text,
|
|
"max_retries": _MAX_RETRIES,
|
|
"timeout": _TIMEOUT_SECONDS,
|
|
}
|
|
if self._api_base:
|
|
kwargs["api_base"] = self._api_base
|
|
if self._api_key:
|
|
kwargs["api_key"] = self._api_key
|
|
|
|
try:
|
|
response = await aspeech(**kwargs)
|
|
except Exception as exc: # noqa: BLE001 - normalise provider errors
|
|
raise TextToSpeechError(
|
|
f"{self._model} synthesis failed: {exc}"
|
|
) from exc
|
|
|
|
data = getattr(response, "content", None)
|
|
if not data:
|
|
raise TextToSpeechError(f"{self._model} returned no audio")
|
|
|
|
return SynthesizedAudio(data=data, container=_CONTAINER)
|