Merge pull request #336 from sandeeppainuly/fix-294-youtube-transcript-source-not-working-youtubetranscriptapi-has-no-attribute-get-transcript

Fix YouTube transcript API: replace deprecated get_transcript with fetch method
This commit is contained in:
Rohan Verma 2025-09-28 20:51:52 -07:00 committed by GitHub
commit 4e012845c4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -136,13 +136,14 @@ async def add_youtube_video_document(
)
try:
captions = YouTubeTranscriptApi.get_transcript(video_id)
ytt_api = YouTubeTranscriptApi()
captions = ytt_api.fetch(video_id)
# Include complete caption information with timestamps
transcript_segments = []
for line in captions:
start_time = line.get("start", 0)
duration = line.get("duration", 0)
text = line.get("text", "")
start_time = line.start
duration = line.duration
text = line.text
timestamp = f"[{start_time:.2f}s-{start_time + duration:.2f}s]"
transcript_segments.append(f"{timestamp} {text}")
transcript_text = "\n".join(transcript_segments)