From c08508c0c48e2f814409dfe615cfcd2e6800d9c8 Mon Sep 17 00:00:00 2001 From: sandeeppainuly Date: Sun, 28 Sep 2025 12:03:36 +0200 Subject: [PATCH 1/2] Fix YouTube transcript API: replace deprecated get_transcript with fetch method --- .../app/tasks/document_processors/youtube_processor.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/surfsense_backend/app/tasks/document_processors/youtube_processor.py b/surfsense_backend/app/tasks/document_processors/youtube_processor.py index e918204de..3f69d74c7 100644 --- a/surfsense_backend/app/tasks/document_processors/youtube_processor.py +++ b/surfsense_backend/app/tasks/document_processors/youtube_processor.py @@ -136,7 +136,8 @@ async def add_youtube_video_document( ) try: - captions = YouTubeTranscriptApi.get_transcript(video_id) + ytt_api = YouTubeTranscriptApi() + captions = ytt_api.fetch(video_id) # Include complete caption information with timestamps transcript_segments = [] for line in captions: From 7bb8e77ee1fc31179a034cbf2cf6b6dce55c8172 Mon Sep 17 00:00:00 2001 From: sandeeppainuly Date: Sun, 28 Sep 2025 12:09:17 +0200 Subject: [PATCH 2/2] Update transcript processing to use new API object attributes --- .../app/tasks/document_processors/youtube_processor.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/surfsense_backend/app/tasks/document_processors/youtube_processor.py b/surfsense_backend/app/tasks/document_processors/youtube_processor.py index 3f69d74c7..37981f6ae 100644 --- a/surfsense_backend/app/tasks/document_processors/youtube_processor.py +++ b/surfsense_backend/app/tasks/document_processors/youtube_processor.py @@ -141,9 +141,9 @@ async def add_youtube_video_document( # Include complete caption information with timestamps transcript_segments = [] for line in captions: - start_time = line.get("start", 0) - duration = line.get("duration", 0) - text = line.get("text", "") + start_time = line.start + duration = line.duration + text = line.text timestamp = f"[{start_time:.2f}s-{start_time + duration:.2f}s]" transcript_segments.append(f"{timestamp} {text}") transcript_text = "\n".join(transcript_segments)