mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-08 20:25:19 +02:00
Merge pull request #31 from MODSetter/dev
feat(youtube): integrate YouTube video processing connector
This commit is contained in:
commit
1b7fef2629
13 changed files with 608 additions and 18 deletions
|
|
@ -6,7 +6,7 @@ from app.db import get_async_session, User, SearchSpace, Document, DocumentType
|
||||||
from app.schemas import DocumentsCreate, DocumentUpdate, DocumentRead
|
from app.schemas import DocumentsCreate, DocumentUpdate, DocumentRead
|
||||||
from app.users import current_active_user
|
from app.users import current_active_user
|
||||||
from app.utils.check_ownership import check_ownership
|
from app.utils.check_ownership import check_ownership
|
||||||
from app.tasks.background_tasks import add_extension_received_document, add_received_file_document, add_crawled_url_document
|
from app.tasks.background_tasks import add_extension_received_document, add_received_file_document, add_crawled_url_document, add_youtube_video_document
|
||||||
# Force asyncio to use standard event loop before unstructured imports
|
# Force asyncio to use standard event loop before unstructured imports
|
||||||
import asyncio
|
import asyncio
|
||||||
try:
|
try:
|
||||||
|
|
@ -368,8 +368,7 @@ async def process_youtube_video_with_new_session(
|
||||||
|
|
||||||
async with async_session_maker() as session:
|
async with async_session_maker() as session:
|
||||||
try:
|
try:
|
||||||
# TODO: Implement YouTube video processing
|
await add_youtube_video_document(session, url, search_space_id)
|
||||||
print("Processing YouTube video with new session")
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
import logging
|
import logging
|
||||||
logging.error(f"Error processing YouTube video: {str(e)}")
|
logging.error(f"Error processing YouTube video: {str(e)}")
|
||||||
|
|
|
||||||
|
|
@ -244,3 +244,142 @@ async def add_received_file_document(
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
await session.rollback()
|
await session.rollback()
|
||||||
raise RuntimeError(f"Failed to process file document: {str(e)}")
|
raise RuntimeError(f"Failed to process file document: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
async def add_youtube_video_document(
|
||||||
|
session: AsyncSession,
|
||||||
|
url: str,
|
||||||
|
search_space_id: int
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Process a YouTube video URL, extract transcripts, and add as document.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from youtube_transcript_api import YouTubeTranscriptApi
|
||||||
|
|
||||||
|
# Extract video ID from URL
|
||||||
|
def get_youtube_video_id(url: str):
|
||||||
|
from urllib.parse import urlparse, parse_qs
|
||||||
|
|
||||||
|
parsed_url = urlparse(url)
|
||||||
|
hostname = parsed_url.hostname
|
||||||
|
|
||||||
|
if hostname == "youtu.be":
|
||||||
|
return parsed_url.path[1:]
|
||||||
|
if hostname in ("www.youtube.com", "youtube.com"):
|
||||||
|
if parsed_url.path == "/watch":
|
||||||
|
query_params = parse_qs(parsed_url.query)
|
||||||
|
return query_params.get("v", [None])[0]
|
||||||
|
if parsed_url.path.startswith("/embed/"):
|
||||||
|
return parsed_url.path.split("/")[2]
|
||||||
|
if parsed_url.path.startswith("/v/"):
|
||||||
|
return parsed_url.path.split("/")[2]
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Get video ID
|
||||||
|
video_id = get_youtube_video_id(url)
|
||||||
|
if not video_id:
|
||||||
|
raise ValueError(f"Could not extract video ID from URL: {url}")
|
||||||
|
|
||||||
|
# Get video metadata
|
||||||
|
import json
|
||||||
|
from urllib.parse import urlencode
|
||||||
|
from urllib.request import urlopen
|
||||||
|
|
||||||
|
params = {"format": "json", "url": f"https://www.youtube.com/watch?v={video_id}"}
|
||||||
|
oembed_url = "https://www.youtube.com/oembed"
|
||||||
|
query_string = urlencode(params)
|
||||||
|
full_url = oembed_url + "?" + query_string
|
||||||
|
|
||||||
|
with urlopen(full_url) as response:
|
||||||
|
response_text = response.read()
|
||||||
|
video_data = json.loads(response_text.decode())
|
||||||
|
|
||||||
|
# Get video transcript
|
||||||
|
try:
|
||||||
|
captions = YouTubeTranscriptApi.get_transcript(video_id)
|
||||||
|
# Include complete caption information with timestamps
|
||||||
|
transcript_segments = []
|
||||||
|
for line in captions:
|
||||||
|
start_time = line.get("start", 0)
|
||||||
|
duration = line.get("duration", 0)
|
||||||
|
text = line.get("text", "")
|
||||||
|
timestamp = f"[{start_time:.2f}s-{start_time + duration:.2f}s]"
|
||||||
|
transcript_segments.append(f"{timestamp} {text}")
|
||||||
|
transcript_text = "\n".join(transcript_segments)
|
||||||
|
except Exception as e:
|
||||||
|
transcript_text = f"No captions available for this video. Error: {str(e)}"
|
||||||
|
|
||||||
|
# Format document metadata in a more maintainable way
|
||||||
|
metadata_sections = [
|
||||||
|
("METADATA", [
|
||||||
|
f"TITLE: {video_data.get('title', 'YouTube Video')}",
|
||||||
|
f"URL: {url}",
|
||||||
|
f"VIDEO_ID: {video_id}",
|
||||||
|
f"AUTHOR: {video_data.get('author_name', 'Unknown')}",
|
||||||
|
f"THUMBNAIL: {video_data.get('thumbnail_url', '')}"
|
||||||
|
]),
|
||||||
|
("CONTENT", [
|
||||||
|
"FORMAT: transcript",
|
||||||
|
"TEXT_START",
|
||||||
|
transcript_text,
|
||||||
|
"TEXT_END"
|
||||||
|
])
|
||||||
|
]
|
||||||
|
|
||||||
|
# Build the document string more efficiently
|
||||||
|
document_parts = []
|
||||||
|
document_parts.append("<DOCUMENT>")
|
||||||
|
|
||||||
|
for section_title, section_content in metadata_sections:
|
||||||
|
document_parts.append(f"<{section_title}>")
|
||||||
|
document_parts.extend(section_content)
|
||||||
|
document_parts.append(f"</{section_title}>")
|
||||||
|
|
||||||
|
document_parts.append("</DOCUMENT>")
|
||||||
|
combined_document_string = '\n'.join(document_parts)
|
||||||
|
|
||||||
|
# Generate summary
|
||||||
|
summary_chain = SUMMARY_PROMPT_TEMPLATE | config.long_context_llm_instance
|
||||||
|
summary_result = await summary_chain.ainvoke({"document": combined_document_string})
|
||||||
|
summary_content = summary_result.content
|
||||||
|
summary_embedding = config.embedding_model_instance.embed(summary_content)
|
||||||
|
|
||||||
|
# Process chunks
|
||||||
|
chunks = [
|
||||||
|
Chunk(content=chunk.text, embedding=chunk.embedding)
|
||||||
|
for chunk in config.chunker_instance.chunk(transcript_text)
|
||||||
|
]
|
||||||
|
|
||||||
|
# Create document
|
||||||
|
from app.db import Document, DocumentType
|
||||||
|
|
||||||
|
document = Document(
|
||||||
|
title=video_data.get("title", "YouTube Video"),
|
||||||
|
document_type=DocumentType.YOUTUBE_VIDEO,
|
||||||
|
document_metadata={
|
||||||
|
"url": url,
|
||||||
|
"video_id": video_id,
|
||||||
|
"video_title": video_data.get("title", "YouTube Video"),
|
||||||
|
"author": video_data.get("author_name", "Unknown"),
|
||||||
|
"thumbnail": video_data.get("thumbnail_url", "")
|
||||||
|
},
|
||||||
|
content=summary_content,
|
||||||
|
embedding=summary_embedding,
|
||||||
|
chunks=chunks,
|
||||||
|
search_space_id=search_space_id
|
||||||
|
)
|
||||||
|
|
||||||
|
session.add(document)
|
||||||
|
await session.commit()
|
||||||
|
await session.refresh(document)
|
||||||
|
|
||||||
|
return document
|
||||||
|
except SQLAlchemyError as db_error:
|
||||||
|
await session.rollback()
|
||||||
|
raise db_error
|
||||||
|
except Exception as e:
|
||||||
|
await session.rollback()
|
||||||
|
import logging
|
||||||
|
logging.error(f"Failed to process YouTube video: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
|
||||||
|
|
@ -59,6 +59,32 @@ async def stream_connector_search_results(
|
||||||
|
|
||||||
# Process each selected connector
|
# Process each selected connector
|
||||||
for connector in selected_connectors:
|
for connector in selected_connectors:
|
||||||
|
if connector == "YOUTUBE_VIDEO":
|
||||||
|
# Send terminal message about starting search
|
||||||
|
yield streaming_service.add_terminal_message("Starting to search for youtube videos...")
|
||||||
|
|
||||||
|
# Search for YouTube videos using reformulated query
|
||||||
|
result_object, youtube_chunks = await connector_service.search_youtube(
|
||||||
|
user_query=reformulated_query,
|
||||||
|
user_id=user_id,
|
||||||
|
search_space_id=search_space_id,
|
||||||
|
top_k=TOP_K
|
||||||
|
)
|
||||||
|
|
||||||
|
# Send terminal message about search results
|
||||||
|
yield streaming_service.add_terminal_message(
|
||||||
|
f"Found {len(result_object['sources'])} relevant YouTube videos",
|
||||||
|
"success"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Update sources
|
||||||
|
all_sources.append(result_object)
|
||||||
|
yield streaming_service.update_sources(all_sources)
|
||||||
|
|
||||||
|
# Add documents to collection
|
||||||
|
all_raw_documents.extend(youtube_chunks)
|
||||||
|
|
||||||
|
|
||||||
# Extension Docs
|
# Extension Docs
|
||||||
if connector == "EXTENSION":
|
if connector == "EXTENSION":
|
||||||
# Send terminal message about starting search
|
# Send terminal message about starting search
|
||||||
|
|
|
||||||
|
|
@ -479,4 +479,83 @@ class ConnectorService:
|
||||||
"sources": sources_list,
|
"sources": sources_list,
|
||||||
}
|
}
|
||||||
|
|
||||||
return result_object, extension_chunks
|
return result_object, extension_chunks
|
||||||
|
|
||||||
|
async def search_youtube(self, user_query: str, user_id: int, search_space_id: int, top_k: int = 20) -> tuple:
|
||||||
|
"""
|
||||||
|
Search for YouTube videos and return both the source information and langchain documents
|
||||||
|
|
||||||
|
Args:
|
||||||
|
user_query: The user's query
|
||||||
|
user_id: The user's ID
|
||||||
|
search_space_id: The search space ID to search in
|
||||||
|
top_k: Maximum number of results to return
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple: (sources_info, langchain_documents)
|
||||||
|
"""
|
||||||
|
youtube_chunks = await self.retriever.hybrid_search(
|
||||||
|
query_text=user_query,
|
||||||
|
top_k=top_k,
|
||||||
|
user_id=user_id,
|
||||||
|
search_space_id=search_space_id,
|
||||||
|
document_type="YOUTUBE_VIDEO"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Map youtube_chunks to the required format
|
||||||
|
mapped_sources = {}
|
||||||
|
for i, chunk in enumerate(youtube_chunks):
|
||||||
|
# Fix for UI
|
||||||
|
youtube_chunks[i]['document']['id'] = self.source_id_counter
|
||||||
|
|
||||||
|
# Extract document metadata
|
||||||
|
document = chunk.get('document', {})
|
||||||
|
metadata = document.get('metadata', {})
|
||||||
|
|
||||||
|
# Extract YouTube-specific metadata
|
||||||
|
video_title = metadata.get('video_title', 'Untitled Video')
|
||||||
|
video_id = metadata.get('video_id', '')
|
||||||
|
channel_name = metadata.get('channel_name', '')
|
||||||
|
published_date = metadata.get('published_date', '')
|
||||||
|
|
||||||
|
# Create a more descriptive title for YouTube videos
|
||||||
|
title = video_title
|
||||||
|
if channel_name:
|
||||||
|
title += f" - {channel_name}"
|
||||||
|
|
||||||
|
# Create a more descriptive description for YouTube videos
|
||||||
|
description = metadata.get('description', chunk.get('content', '')[:100])
|
||||||
|
if len(description) == 100:
|
||||||
|
description += "..."
|
||||||
|
|
||||||
|
# For URL, construct a URL to the YouTube video
|
||||||
|
url = f"https://www.youtube.com/watch?v={video_id}" if video_id else ""
|
||||||
|
|
||||||
|
source = {
|
||||||
|
"id": self.source_id_counter,
|
||||||
|
"title": title,
|
||||||
|
"description": description,
|
||||||
|
"url": url,
|
||||||
|
"video_id": video_id, # Additional field for YouTube videos
|
||||||
|
"channel_name": channel_name # Additional field for YouTube videos
|
||||||
|
}
|
||||||
|
|
||||||
|
self.source_id_counter += 1
|
||||||
|
|
||||||
|
# Use video_id as a unique identifier for tracking unique sources
|
||||||
|
source_key = video_id or f"youtube_{i}"
|
||||||
|
if source_key and source_key not in mapped_sources:
|
||||||
|
mapped_sources[source_key] = source
|
||||||
|
|
||||||
|
# Convert to list of sources
|
||||||
|
sources_list = list(mapped_sources.values())
|
||||||
|
|
||||||
|
# Create result object
|
||||||
|
result_object = {
|
||||||
|
"id": 6, # Assign a unique ID for the YouTube connector
|
||||||
|
"name": "YouTube Videos",
|
||||||
|
"type": "YOUTUBE_VIDEO",
|
||||||
|
"sources": sources_list,
|
||||||
|
}
|
||||||
|
|
||||||
|
return result_object, youtube_chunks
|
||||||
|
|
@ -26,4 +26,5 @@ dependencies = [
|
||||||
"unstructured[all-docs]>=0.16.25",
|
"unstructured[all-docs]>=0.16.25",
|
||||||
"uvicorn[standard]>=0.34.0",
|
"uvicorn[standard]>=0.34.0",
|
||||||
"validators>=0.34.0",
|
"validators>=0.34.0",
|
||||||
|
"youtube-transcript-api>=1.0.3",
|
||||||
]
|
]
|
||||||
|
|
|
||||||
24
surfsense_backend/uv.lock
generated
24
surfsense_backend/uv.lock
generated
|
|
@ -580,6 +580,15 @@ wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/c3/be/d0d44e092656fe7a06b55e6103cbce807cdbdee17884a5367c68c9860853/dataclasses_json-0.6.7-py3-none-any.whl", hash = "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a", size = 28686 },
|
{ url = "https://files.pythonhosted.org/packages/c3/be/d0d44e092656fe7a06b55e6103cbce807cdbdee17884a5367c68c9860853/dataclasses_json-0.6.7-py3-none-any.whl", hash = "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a", size = 28686 },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "defusedxml"
|
||||||
|
version = "0.7.1"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/0f/d5/c66da9b79e5bdb124974bfe172b4daf3c984ebd9c2a06e2b8a4dc7331c72/defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69", size = 75520 }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604 },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "deprecated"
|
name = "deprecated"
|
||||||
version = "1.2.18"
|
version = "1.2.18"
|
||||||
|
|
@ -3240,6 +3249,7 @@ dependencies = [
|
||||||
{ name = "unstructured-client" },
|
{ name = "unstructured-client" },
|
||||||
{ name = "uvicorn", extra = ["standard"] },
|
{ name = "uvicorn", extra = ["standard"] },
|
||||||
{ name = "validators" },
|
{ name = "validators" },
|
||||||
|
{ name = "youtube-transcript-api" },
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.metadata]
|
[package.metadata]
|
||||||
|
|
@ -3265,6 +3275,7 @@ requires-dist = [
|
||||||
{ name = "unstructured-client", specifier = ">=0.30.0" },
|
{ name = "unstructured-client", specifier = ">=0.30.0" },
|
||||||
{ name = "uvicorn", extras = ["standard"], specifier = ">=0.34.0" },
|
{ name = "uvicorn", extras = ["standard"], specifier = ">=0.34.0" },
|
||||||
{ name = "validators", specifier = ">=0.34.0" },
|
{ name = "validators", specifier = ">=0.34.0" },
|
||||||
|
{ name = "youtube-transcript-api", specifier = ">=1.0.3" },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
@ -3919,6 +3930,19 @@ wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/f5/4b/a06e0ec3d155924f77835ed2d167ebd3b211a7b0853da1cf8d8414d784ef/yarl-1.18.3-py3-none-any.whl", hash = "sha256:b57f4f58099328dfb26c6a771d09fb20dbbae81d20cfb66141251ea063bd101b", size = 45109 },
|
{ url = "https://files.pythonhosted.org/packages/f5/4b/a06e0ec3d155924f77835ed2d167ebd3b211a7b0853da1cf8d8414d784ef/yarl-1.18.3-py3-none-any.whl", hash = "sha256:b57f4f58099328dfb26c6a771d09fb20dbbae81d20cfb66141251ea063bd101b", size = 45109 },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "youtube-transcript-api"
|
||||||
|
version = "1.0.3"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "defusedxml" },
|
||||||
|
{ name = "requests" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/b0/32/f60d87a99c05a53604c58f20f670c7ea6262b55e0bbeb836ffe4550b248b/youtube_transcript_api-1.0.3.tar.gz", hash = "sha256:902baf90e7840a42e1e148335e09fe5575dbff64c81414957aea7038e8a4db46", size = 2153252 }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/f0/44/40c03bb0f8bddfb9d2beff2ed31641f52d96c287ba881d20e0c074784ac2/youtube_transcript_api-1.0.3-py3-none-any.whl", hash = "sha256:d1874e57de65cf14c9d7d09b2b37c814d6287fa0e770d4922c4cd32a5b3f6c47", size = 2169911 },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "zipp"
|
name = "zipp"
|
||||||
version = "3.21.0"
|
version = "3.21.0"
|
||||||
|
|
|
||||||
|
|
@ -94,7 +94,7 @@ import rehypeSanitize from "rehype-sanitize";
|
||||||
import remarkGfm from "remark-gfm";
|
import remarkGfm from "remark-gfm";
|
||||||
import { DocumentViewer } from "@/components/document-viewer";
|
import { DocumentViewer } from "@/components/document-viewer";
|
||||||
import { JsonMetadataViewer } from "@/components/json-metadata-viewer";
|
import { JsonMetadataViewer } from "@/components/json-metadata-viewer";
|
||||||
import { IconBrandNotion, IconBrandSlack } from "@tabler/icons-react";
|
import { IconBrandNotion, IconBrandSlack, IconBrandYoutube } from "@tabler/icons-react";
|
||||||
|
|
||||||
// Define animation variants for reuse
|
// Define animation variants for reuse
|
||||||
const fadeInScale = {
|
const fadeInScale = {
|
||||||
|
|
@ -114,7 +114,7 @@ const fadeInScale = {
|
||||||
type Document = {
|
type Document = {
|
||||||
id: number;
|
id: number;
|
||||||
title: string;
|
title: string;
|
||||||
document_type: "EXTENSION" | "CRAWLED_URL" | "SLACK_CONNECTOR" | "NOTION_CONNECTOR" | "FILE";
|
document_type: "EXTENSION" | "CRAWLED_URL" | "SLACK_CONNECTOR" | "NOTION_CONNECTOR" | "FILE" | "YOUTUBE_VIDEO";
|
||||||
document_metadata: any;
|
document_metadata: any;
|
||||||
content: string;
|
content: string;
|
||||||
created_at: string;
|
created_at: string;
|
||||||
|
|
@ -141,6 +141,7 @@ const documentTypeIcons = {
|
||||||
SLACK_CONNECTOR: IconBrandSlack,
|
SLACK_CONNECTOR: IconBrandSlack,
|
||||||
NOTION_CONNECTOR: IconBrandNotion,
|
NOTION_CONNECTOR: IconBrandNotion,
|
||||||
FILE: File,
|
FILE: File,
|
||||||
|
YOUTUBE_VIDEO: IconBrandYoutube,
|
||||||
} as const;
|
} as const;
|
||||||
|
|
||||||
const columns: ColumnDef<Document>[] = [
|
const columns: ColumnDef<Document>[] = [
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,302 @@
|
||||||
|
"use client";
|
||||||
|
|
||||||
|
import { useState } from 'react';
|
||||||
|
import { useParams, useRouter } from 'next/navigation';
|
||||||
|
import { Tag, TagInput } from "emblor";
|
||||||
|
import { Button } from "@/components/ui/button";
|
||||||
|
import { Label } from "@/components/ui/label";
|
||||||
|
import { Card, CardContent, CardDescription, CardFooter, CardHeader, CardTitle } from "@/components/ui/card";
|
||||||
|
import { toast } from "sonner";
|
||||||
|
import { Youtube, Loader2 } from "lucide-react";
|
||||||
|
import { motion } from "framer-motion";
|
||||||
|
|
||||||
|
// YouTube video ID validation regex
|
||||||
|
const youtubeRegex = /^(https:\/\/)?(www\.)?(youtube\.com\/watch\?v=|youtu\.be\/)([a-zA-Z0-9_-]{11})$/;
|
||||||
|
|
||||||
|
export default function YouTubeVideoAdder() {
|
||||||
|
const params = useParams();
|
||||||
|
const router = useRouter();
|
||||||
|
const search_space_id = params.search_space_id as string;
|
||||||
|
|
||||||
|
const [videoTags, setVideoTags] = useState<Tag[]>([]);
|
||||||
|
const [activeTagIndex, setActiveTagIndex] = useState<number | null>(null);
|
||||||
|
const [isSubmitting, setIsSubmitting] = useState(false);
|
||||||
|
const [error, setError] = useState<string | null>(null);
|
||||||
|
|
||||||
|
// Function to validate a YouTube URL
|
||||||
|
const isValidYoutubeUrl = (url: string): boolean => {
|
||||||
|
return youtubeRegex.test(url);
|
||||||
|
};
|
||||||
|
|
||||||
|
// Function to extract video ID from URL
|
||||||
|
const extractVideoId = (url: string): string | null => {
|
||||||
|
const match = url.match(/(?:youtube\.com\/watch\?v=|youtu\.be\/)([a-zA-Z0-9_-]{11})/);
|
||||||
|
return match ? match[1] : null;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Function to handle video URL submission
|
||||||
|
const handleSubmit = async () => {
|
||||||
|
// Validate that we have at least one video URL
|
||||||
|
if (videoTags.length === 0) {
|
||||||
|
setError("Please add at least one YouTube video URL");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate all URLs
|
||||||
|
const invalidUrls = videoTags.filter(tag => !isValidYoutubeUrl(tag.text));
|
||||||
|
if (invalidUrls.length > 0) {
|
||||||
|
setError(`Invalid YouTube URLs detected: ${invalidUrls.map(tag => tag.text).join(', ')}`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
setError(null);
|
||||||
|
setIsSubmitting(true);
|
||||||
|
|
||||||
|
try {
|
||||||
|
toast("YouTube Video Processing", {
|
||||||
|
description: "Starting YouTube video processing...",
|
||||||
|
});
|
||||||
|
|
||||||
|
// Extract URLs from tags
|
||||||
|
const videoUrls = videoTags.map(tag => tag.text);
|
||||||
|
|
||||||
|
// Make API call to backend
|
||||||
|
const response = await fetch(`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/documents/`, {
|
||||||
|
method: "POST",
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Authorization': `Bearer ${localStorage.getItem("surfsense_bearer_token")}`
|
||||||
|
},
|
||||||
|
body: JSON.stringify({
|
||||||
|
"document_type": "YOUTUBE_VIDEO",
|
||||||
|
"content": videoUrls,
|
||||||
|
"search_space_id": parseInt(search_space_id)
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error("Failed to process YouTube videos");
|
||||||
|
}
|
||||||
|
|
||||||
|
await response.json();
|
||||||
|
|
||||||
|
toast("Processing Successful", {
|
||||||
|
description: "YouTube videos have been submitted for processing",
|
||||||
|
});
|
||||||
|
|
||||||
|
// Redirect to documents page
|
||||||
|
router.push(`/dashboard/${search_space_id}/documents`);
|
||||||
|
} catch (error: any) {
|
||||||
|
setError(error.message || "An error occurred while processing YouTube videos");
|
||||||
|
toast("Processing Error", {
|
||||||
|
description: `Error processing YouTube videos: ${error.message}`,
|
||||||
|
});
|
||||||
|
} finally {
|
||||||
|
setIsSubmitting(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Function to add a new video URL tag
|
||||||
|
const handleAddTag = (text: string) => {
|
||||||
|
// Basic URL validation
|
||||||
|
if (!isValidYoutubeUrl(text)) {
|
||||||
|
toast("Invalid YouTube URL", {
|
||||||
|
description: "Please enter a valid YouTube video URL",
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for duplicates
|
||||||
|
if (videoTags.some(tag => tag.text === text)) {
|
||||||
|
toast("Duplicate URL", {
|
||||||
|
description: "This YouTube video has already been added",
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add the new tag
|
||||||
|
const newTag: Tag = {
|
||||||
|
id: Date.now().toString(),
|
||||||
|
text: text,
|
||||||
|
};
|
||||||
|
|
||||||
|
setVideoTags([...videoTags, newTag]);
|
||||||
|
};
|
||||||
|
|
||||||
|
// Animation variants
|
||||||
|
const containerVariants = {
|
||||||
|
hidden: { opacity: 0 },
|
||||||
|
visible: {
|
||||||
|
opacity: 1,
|
||||||
|
transition: {
|
||||||
|
staggerChildren: 0.1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const itemVariants = {
|
||||||
|
hidden: { y: 20, opacity: 0 },
|
||||||
|
visible: {
|
||||||
|
y: 0,
|
||||||
|
opacity: 1,
|
||||||
|
transition: {
|
||||||
|
type: "spring",
|
||||||
|
stiffness: 300,
|
||||||
|
damping: 24
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="container mx-auto py-8">
|
||||||
|
<motion.div
|
||||||
|
initial="hidden"
|
||||||
|
animate="visible"
|
||||||
|
variants={containerVariants}
|
||||||
|
>
|
||||||
|
<Card className="max-w-2xl mx-auto">
|
||||||
|
<motion.div variants={itemVariants}>
|
||||||
|
<CardHeader>
|
||||||
|
<CardTitle className="flex items-center gap-2">
|
||||||
|
<Youtube className="h-5 w-5" />
|
||||||
|
Add YouTube Videos
|
||||||
|
</CardTitle>
|
||||||
|
<CardDescription>
|
||||||
|
Enter YouTube video URLs to add to your document collection
|
||||||
|
</CardDescription>
|
||||||
|
</CardHeader>
|
||||||
|
</motion.div>
|
||||||
|
|
||||||
|
<motion.div variants={itemVariants}>
|
||||||
|
<CardContent>
|
||||||
|
<div className="space-y-4">
|
||||||
|
<div className="space-y-2">
|
||||||
|
<Label htmlFor="video-input">Enter YouTube Video URLs</Label>
|
||||||
|
<TagInput
|
||||||
|
id="video-input"
|
||||||
|
tags={videoTags}
|
||||||
|
setTags={setVideoTags}
|
||||||
|
placeholder="Enter a YouTube URL and press Enter"
|
||||||
|
onAddTag={handleAddTag}
|
||||||
|
styleClasses={{
|
||||||
|
inlineTagsContainer:
|
||||||
|
"border-input rounded-lg bg-background shadow-sm shadow-black/5 transition-shadow focus-within:border-ring focus-within:outline-none focus-within:ring-[3px] focus-within:ring-ring/20 p-1 gap-1",
|
||||||
|
input: "w-full min-w-[80px] focus-visible:outline-none shadow-none px-2 h-7",
|
||||||
|
tag: {
|
||||||
|
body: "h-7 relative bg-background border border-input hover:bg-background rounded-md font-medium text-xs ps-2 pe-7 flex",
|
||||||
|
closeButton:
|
||||||
|
"absolute -inset-y-px -end-px p-0 rounded-e-lg flex size-7 transition-colors outline-0 focus-visible:outline focus-visible:outline-2 focus-visible:outline-ring/70 text-muted-foreground/80 hover:text-foreground",
|
||||||
|
},
|
||||||
|
}}
|
||||||
|
activeTagIndex={activeTagIndex}
|
||||||
|
setActiveTagIndex={setActiveTagIndex}
|
||||||
|
/>
|
||||||
|
<p className="text-xs text-muted-foreground mt-1">
|
||||||
|
Add multiple YouTube URLs by pressing Enter after each one
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{error && (
|
||||||
|
<motion.div
|
||||||
|
className="text-sm text-red-500 mt-2"
|
||||||
|
initial={{ opacity: 0, scale: 0.9 }}
|
||||||
|
animate={{ opacity: 1, scale: 1 }}
|
||||||
|
transition={{ type: "spring", stiffness: 500, damping: 30 }}
|
||||||
|
>
|
||||||
|
{error}
|
||||||
|
</motion.div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
<motion.div
|
||||||
|
variants={itemVariants}
|
||||||
|
className="bg-muted/50 rounded-lg p-4 text-sm"
|
||||||
|
>
|
||||||
|
<h4 className="font-medium mb-2">Tips for adding YouTube videos:</h4>
|
||||||
|
<ul className="list-disc pl-5 space-y-1 text-muted-foreground">
|
||||||
|
<li>Use standard YouTube URLs (youtube.com/watch?v= or youtu.be/)</li>
|
||||||
|
<li>Make sure videos are publicly accessible</li>
|
||||||
|
<li>Supported formats: youtube.com/watch?v=VIDEO_ID or youtu.be/VIDEO_ID</li>
|
||||||
|
<li>Processing may take some time depending on video length</li>
|
||||||
|
</ul>
|
||||||
|
</motion.div>
|
||||||
|
|
||||||
|
{videoTags.length > 0 && (
|
||||||
|
<motion.div
|
||||||
|
variants={itemVariants}
|
||||||
|
className="mt-4 space-y-2"
|
||||||
|
>
|
||||||
|
<h4 className="font-medium">Preview:</h4>
|
||||||
|
<div className="grid grid-cols-1 gap-3">
|
||||||
|
{videoTags.map((tag, index) => {
|
||||||
|
const videoId = extractVideoId(tag.text);
|
||||||
|
return videoId ? (
|
||||||
|
<motion.div
|
||||||
|
key={tag.id}
|
||||||
|
initial={{ opacity: 0, y: 10 }}
|
||||||
|
animate={{ opacity: 1, y: 0 }}
|
||||||
|
transition={{ delay: index * 0.1 }}
|
||||||
|
className="relative aspect-video rounded-lg overflow-hidden border"
|
||||||
|
>
|
||||||
|
<iframe
|
||||||
|
width="100%"
|
||||||
|
height="100%"
|
||||||
|
src={`https://www.youtube.com/embed/${videoId}`}
|
||||||
|
title="YouTube video player"
|
||||||
|
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
|
||||||
|
allowFullScreen
|
||||||
|
></iframe>
|
||||||
|
</motion.div>
|
||||||
|
) : null;
|
||||||
|
})}
|
||||||
|
</div>
|
||||||
|
</motion.div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</CardContent>
|
||||||
|
</motion.div>
|
||||||
|
|
||||||
|
<motion.div variants={itemVariants}>
|
||||||
|
<CardFooter className="flex justify-between">
|
||||||
|
<Button
|
||||||
|
variant="outline"
|
||||||
|
onClick={() => router.push(`/dashboard/${search_space_id}/documents`)}
|
||||||
|
>
|
||||||
|
Cancel
|
||||||
|
</Button>
|
||||||
|
<Button
|
||||||
|
onClick={handleSubmit}
|
||||||
|
disabled={isSubmitting || videoTags.length === 0}
|
||||||
|
className="relative overflow-hidden"
|
||||||
|
>
|
||||||
|
{isSubmitting ? (
|
||||||
|
<>
|
||||||
|
<Loader2 className="mr-2 h-4 w-4 animate-spin" />
|
||||||
|
Processing...
|
||||||
|
</>
|
||||||
|
) : (
|
||||||
|
<>
|
||||||
|
<motion.span
|
||||||
|
initial={{ x: -5, opacity: 0 }}
|
||||||
|
animate={{ x: 0, opacity: 1 }}
|
||||||
|
transition={{ delay: 0.2 }}
|
||||||
|
className="mr-2"
|
||||||
|
>
|
||||||
|
<Youtube className="h-4 w-4" />
|
||||||
|
</motion.span>
|
||||||
|
Submit YouTube Videos
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
<motion.div
|
||||||
|
className="absolute inset-0 bg-primary/10"
|
||||||
|
initial={{ x: "-100%" }}
|
||||||
|
animate={isSubmitting ? { x: "0%" } : { x: "-100%" }}
|
||||||
|
transition={{ duration: 0.5, ease: "easeInOut" }}
|
||||||
|
/>
|
||||||
|
</Button>
|
||||||
|
</CardFooter>
|
||||||
|
</motion.div>
|
||||||
|
</Card>
|
||||||
|
</motion.div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
@ -48,6 +48,10 @@ export default function DashboardLayout({
|
||||||
title: "Add Webpages",
|
title: "Add Webpages",
|
||||||
url: `/dashboard/${search_space_id}/documents/webpage`,
|
url: `/dashboard/${search_space_id}/documents/webpage`,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
title: "Add Youtube Videos",
|
||||||
|
url: `/dashboard/${search_space_id}/documents/youtube`,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
title: "Manage Documents",
|
title: "Manage Documents",
|
||||||
url: `/dashboard/${search_space_id}/documents`,
|
url: `/dashboard/${search_space_id}/documents`,
|
||||||
|
|
|
||||||
|
|
@ -239,12 +239,10 @@ const SourcesDialogContent = ({
|
||||||
|
|
||||||
const ChatPage = () => {
|
const ChatPage = () => {
|
||||||
const [token, setToken] = React.useState<string | null>(null);
|
const [token, setToken] = React.useState<string | null>(null);
|
||||||
const [showAnswer, setShowAnswer] = useState(true);
|
|
||||||
const [activeTab, setActiveTab] = useState("");
|
const [activeTab, setActiveTab] = useState("");
|
||||||
const [dialogOpen, setDialogOpen] = useState(false);
|
const [dialogOpen, setDialogOpen] = useState(false);
|
||||||
const [sourcesPage, setSourcesPage] = useState(1);
|
const [sourcesPage, setSourcesPage] = useState(1);
|
||||||
const [expandedSources, setExpandedSources] = useState(false);
|
const [expandedSources, setExpandedSources] = useState(false);
|
||||||
const [isLoadingMore, setIsLoadingMore] = useState(false);
|
|
||||||
const [canScrollLeft, setCanScrollLeft] = useState(false);
|
const [canScrollLeft, setCanScrollLeft] = useState(false);
|
||||||
const [canScrollRight, setCanScrollRight] = useState(true);
|
const [canScrollRight, setCanScrollRight] = useState(true);
|
||||||
const [sourceFilter, setSourceFilter] = useState("");
|
const [sourceFilter, setSourceFilter] = useState("");
|
||||||
|
|
@ -258,7 +256,6 @@ const ChatPage = () => {
|
||||||
const terminalMessagesRef = useRef<HTMLDivElement>(null);
|
const terminalMessagesRef = useRef<HTMLDivElement>(null);
|
||||||
const { connectorSourceItems, isLoading: isLoadingConnectors } = useSearchSourceConnectors();
|
const { connectorSourceItems, isLoading: isLoadingConnectors } = useSearchSourceConnectors();
|
||||||
|
|
||||||
const SOURCES_PER_PAGE = 5;
|
|
||||||
const INITIAL_SOURCES_DISPLAY = 3;
|
const INITIAL_SOURCES_DISPLAY = 3;
|
||||||
|
|
||||||
const { search_space_id, chat_id } = useParams();
|
const { search_space_id, chat_id } = useParams();
|
||||||
|
|
@ -836,7 +833,7 @@ const ChatPage = () => {
|
||||||
{connectorSources.map(connector => (
|
{connectorSources.map(connector => (
|
||||||
<TabsContent key={connector.id} value={connector.type} className="mt-0">
|
<TabsContent key={connector.id} value={connector.type} className="mt-0">
|
||||||
<div className="space-y-3">
|
<div className="space-y-3">
|
||||||
{getMainViewSources(connector).map((source: any) => (
|
{getMainViewSources(connector)?.map((source: any) => (
|
||||||
<Card key={source.id} className="p-3 hover:bg-gray-50 dark:hover:bg-gray-800 cursor-pointer">
|
<Card key={source.id} className="p-3 hover:bg-gray-50 dark:hover:bg-gray-800 cursor-pointer">
|
||||||
<div className="flex items-start gap-3">
|
<div className="flex items-start gap-3">
|
||||||
<div className="flex-shrink-0 w-6 h-6 flex items-center justify-center">
|
<div className="flex-shrink-0 w-6 h-6 flex items-center justify-center">
|
||||||
|
|
@ -874,7 +871,7 @@ const ChatPage = () => {
|
||||||
setSourcesPage={setSourcesPage}
|
setSourcesPage={setSourcesPage}
|
||||||
setSourceFilter={setSourceFilter}
|
setSourceFilter={setSourceFilter}
|
||||||
setExpandedSources={setExpandedSources}
|
setExpandedSources={setExpandedSources}
|
||||||
isLoadingMore={isLoadingMore}
|
isLoadingMore={false}
|
||||||
/>
|
/>
|
||||||
</DialogContent>
|
</DialogContent>
|
||||||
</Dialog>
|
</Dialog>
|
||||||
|
|
@ -887,7 +884,7 @@ const ChatPage = () => {
|
||||||
|
|
||||||
{/* Answer Section */}
|
{/* Answer Section */}
|
||||||
<div className="mb-6">
|
<div className="mb-6">
|
||||||
{showAnswer && (
|
{
|
||||||
<div className="prose dark:prose-invert max-w-none">
|
<div className="prose dark:prose-invert max-w-none">
|
||||||
{message.annotations && (() => {
|
{message.annotations && (() => {
|
||||||
// Get all ANSWER annotations
|
// Get all ANSWER annotations
|
||||||
|
|
@ -913,7 +910,7 @@ const ChatPage = () => {
|
||||||
return <MarkdownViewer content={message.content} getCitationSource={getCitationSource} />;
|
return <MarkdownViewer content={message.content} getCitationSource={getCitationSource} />;
|
||||||
})()}
|
})()}
|
||||||
</div>
|
</div>
|
||||||
)}
|
}
|
||||||
</div>
|
</div>
|
||||||
{/* Scroll to bottom button */}
|
{/* Scroll to bottom button */}
|
||||||
<div className="fixed bottom-8 right-8">
|
<div className="fixed bottom-8 right-8">
|
||||||
|
|
|
||||||
|
|
@ -4,15 +4,14 @@ import {
|
||||||
Plus,
|
Plus,
|
||||||
Search,
|
Search,
|
||||||
Globe,
|
Globe,
|
||||||
BookOpen,
|
|
||||||
Sparkles,
|
Sparkles,
|
||||||
Microscope,
|
Microscope,
|
||||||
Telescope,
|
Telescope,
|
||||||
File,
|
File,
|
||||||
Link,
|
Link,
|
||||||
Slack,
|
Webhook,
|
||||||
Webhook
|
|
||||||
} from 'lucide-react';
|
} from 'lucide-react';
|
||||||
|
import { IconBrandNotion, IconBrandSlack, IconBrandYoutube } from "@tabler/icons-react";
|
||||||
import { Button } from '@/components/ui/button';
|
import { Button } from '@/components/ui/button';
|
||||||
import { Connector, ResearchMode } from './types';
|
import { Connector, ResearchMode } from './types';
|
||||||
|
|
||||||
|
|
@ -21,6 +20,8 @@ export const getConnectorIcon = (connectorType: string) => {
|
||||||
const iconProps = { className: "h-4 w-4" };
|
const iconProps = { className: "h-4 w-4" };
|
||||||
|
|
||||||
switch(connectorType) {
|
switch(connectorType) {
|
||||||
|
case 'YOUTUBE_VIDEO':
|
||||||
|
return <IconBrandYoutube {...iconProps} />;
|
||||||
case 'CRAWLED_URL':
|
case 'CRAWLED_URL':
|
||||||
return <Globe {...iconProps} />;
|
return <Globe {...iconProps} />;
|
||||||
case 'FILE':
|
case 'FILE':
|
||||||
|
|
@ -31,9 +32,9 @@ export const getConnectorIcon = (connectorType: string) => {
|
||||||
case 'TAVILY_API':
|
case 'TAVILY_API':
|
||||||
return <Link {...iconProps} />;
|
return <Link {...iconProps} />;
|
||||||
case 'SLACK_CONNECTOR':
|
case 'SLACK_CONNECTOR':
|
||||||
return <Slack {...iconProps} />;
|
return <IconBrandSlack {...iconProps} />;
|
||||||
case 'NOTION_CONNECTOR':
|
case 'NOTION_CONNECTOR':
|
||||||
return <BookOpen {...iconProps} />;
|
return <IconBrandNotion {...iconProps} />;
|
||||||
case 'DEEP':
|
case 'DEEP':
|
||||||
return <Sparkles {...iconProps} />;
|
return <Sparkles {...iconProps} />;
|
||||||
case 'DEEPER':
|
case 'DEEPER':
|
||||||
|
|
|
||||||
|
|
@ -15,4 +15,9 @@ export const connectorSourcesMenu = [
|
||||||
name: "Extension",
|
name: "Extension",
|
||||||
type: "EXTENSION",
|
type: "EXTENSION",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
id: 4,
|
||||||
|
name: "Youtube Video",
|
||||||
|
type: "YOUTUBE_VIDEO",
|
||||||
|
}
|
||||||
];
|
];
|
||||||
|
|
@ -43,6 +43,12 @@ export const useSearchSourceConnectors = () => {
|
||||||
name: "Extension",
|
name: "Extension",
|
||||||
type: "EXTENSION",
|
type: "EXTENSION",
|
||||||
sources: [],
|
sources: [],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 4,
|
||||||
|
name: "Youtube Video",
|
||||||
|
type: "YOUTUBE_VIDEO",
|
||||||
|
sources: [],
|
||||||
}
|
}
|
||||||
]);
|
]);
|
||||||
|
|
||||||
|
|
@ -108,6 +114,12 @@ export const useSearchSourceConnectors = () => {
|
||||||
name: "Extension",
|
name: "Extension",
|
||||||
type: "EXTENSION",
|
type: "EXTENSION",
|
||||||
sources: [],
|
sources: [],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 4,
|
||||||
|
name: "Youtube Video",
|
||||||
|
type: "YOUTUBE_VIDEO",
|
||||||
|
sources: [],
|
||||||
}
|
}
|
||||||
];
|
];
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue