From 02fc6f1d1616de98a566d6925f96061a86a114db Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Sun, 5 Apr 2026 17:26:03 +0530 Subject: [PATCH] feat: add audio transcription functionality to ETL pipeline --- .../app/etl_pipeline/parsers/__init__.py | 0 .../app/etl_pipeline/parsers/audio.py | 34 +++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 surfsense_backend/app/etl_pipeline/parsers/__init__.py create mode 100644 surfsense_backend/app/etl_pipeline/parsers/audio.py diff --git a/surfsense_backend/app/etl_pipeline/parsers/__init__.py b/surfsense_backend/app/etl_pipeline/parsers/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/app/etl_pipeline/parsers/audio.py b/surfsense_backend/app/etl_pipeline/parsers/audio.py new file mode 100644 index 000000000..cd49bafde --- /dev/null +++ b/surfsense_backend/app/etl_pipeline/parsers/audio.py @@ -0,0 +1,34 @@ +from litellm import atranscription + +from app.config import config as app_config + + +async def transcribe_audio(file_path: str, filename: str) -> str: + stt_service_type = ( + "local" + if app_config.STT_SERVICE and app_config.STT_SERVICE.startswith("local/") + else "external" + ) + + if stt_service_type == "local": + from app.services.stt_service import stt_service + + result = stt_service.transcribe_file(file_path) + text = result.get("text", "") + if not text: + raise ValueError("Transcription returned empty text") + else: + with open(file_path, "rb") as audio_file: + kwargs: dict = { + "model": app_config.STT_SERVICE, + "file": audio_file, + "api_key": app_config.STT_SERVICE_API_KEY, + } + if app_config.STT_SERVICE_API_BASE: + kwargs["api_base"] = app_config.STT_SERVICE_API_BASE + response = await atranscription(**kwargs) + text = response.get("text", "") + if not text: + raise ValueError("Transcription returned empty text") + + return f"# Transcription of {filename}\n\n{text}"