feat: Added Speech to Text support.

- Supports audio & video files. - Will be useful for Youtube vids which dont have transcripts.
2026-04-29 19:06:24 +02:00 · 2025-05-13 21:13:53 -07:00 · 2025-05-13 21:13:53 -07:00 · a8080d2dc7
commit a8080d2dc7
parent 57987ecc76
8 changed files with 172 additions and 73 deletions
--- a/surfsense_backend/app/agents/podcaster/nodes.py
+++ b/surfsense_backend/app/agents/podcaster/nodes.py
@ -135,14 +135,23 @@ async def create_merged_podcast_audio(state: State, config: RunnableConfig) -> D
        filename = f"{temp_dir}/{session_id}_{index}.mp3"
        
        try:
-            # Generate speech using litellm
-            response = await aspeech(
-                model=app_config.TTS_SERVICE,
-                voice=voice,
-                input=dialog,
-                max_retries=2,
-                timeout=600,
-            )
+            if app_config.TTS_SERVICE_API_BASE:
+                response = await aspeech(
+                    model=app_config.TTS_SERVICE,
+                    api_base=app_config.TTS_SERVICE_API_BASE,
+                    voice=voice,
+                    input=dialog,
+                    max_retries=2,
+                    timeout=600,
+                )
+            else:
+                response = await aspeech(
+                    model=app_config.TTS_SERVICE,
+                    voice=voice,
+                    input=dialog,
+                    max_retries=2,
+                    timeout=600,
+                )
            
            # Save the audio to a file - use proper streaming method
            with open(filename, 'wb') as f: