feat: Added Speech to Text support.

- Supports audio & video files.
- Will be useful for Youtube vids which dont have transcripts.
This commit is contained in:
DESKTOP-RTLN3BA\$punk 2025-05-13 21:13:53 -07:00
parent 57987ecc76
commit a8080d2dc7
8 changed files with 172 additions and 73 deletions

View file

@ -135,14 +135,23 @@ async def create_merged_podcast_audio(state: State, config: RunnableConfig) -> D
filename = f"{temp_dir}/{session_id}_{index}.mp3"
try:
# Generate speech using litellm
response = await aspeech(
model=app_config.TTS_SERVICE,
voice=voice,
input=dialog,
max_retries=2,
timeout=600,
)
if app_config.TTS_SERVICE_API_BASE:
response = await aspeech(
model=app_config.TTS_SERVICE,
api_base=app_config.TTS_SERVICE_API_BASE,
voice=voice,
input=dialog,
max_retries=2,
timeout=600,
)
else:
response = await aspeech(
model=app_config.TTS_SERVICE,
voice=voice,
input=dialog,
max_retries=2,
timeout=600,
)
# Save the audio to a file - use proper streaming method
with open(filename, 'wb') as f: