diff --git a/surfsense_backend/app/agents/researcher/configuration.py b/surfsense_backend/app/agents/researcher/configuration.py index 84cb897c6..c89592c65 100644 --- a/surfsense_backend/app/agents/researcher/configuration.py +++ b/surfsense_backend/app/agents/researcher/configuration.py @@ -3,18 +3,10 @@ from __future__ import annotations from dataclasses import dataclass, fields -from enum import Enum from langchain_core.runnables import RunnableConfig -class SearchMode(Enum): - """Enum defining the type of search mode.""" - - CHUNKS = "CHUNKS" - DOCUMENTS = "DOCUMENTS" - - @dataclass(kw_only=True) class Configuration: """The configuration for the agent.""" @@ -24,7 +16,6 @@ class Configuration: connectors_to_search: list[str] user_id: str search_space_id: int - search_mode: SearchMode document_ids_to_add_in_context: list[int] language: str | None = None top_k: int = 10 diff --git a/surfsense_backend/app/agents/researcher/nodes.py b/surfsense_backend/app/agents/researcher/nodes.py index b8ad33c4f..7ecdaa213 100644 --- a/surfsense_backend/app/agents/researcher/nodes.py +++ b/surfsense_backend/app/agents/researcher/nodes.py @@ -16,7 +16,7 @@ from app.db import Document from app.services.connector_service import ConnectorService from app.services.query_service import QueryService -from .configuration import Configuration, SearchMode +from .configuration import Configuration from .prompts import get_further_questions_system_prompt from .qna_agent.graph import graph as qna_agent_graph from .state import State @@ -102,7 +102,7 @@ async def fetch_documents_by_ids( Fetch documents by their IDs within a search space. This function ensures that only documents belonging to the search space are fetched. - Similar to SearchMode.DOCUMENTS, it fetches full documents and concatenates their chunks. + It fetches full documents and returns their chunks individually. Also creates source objects for UI display, grouped by document type. Args: @@ -526,7 +526,6 @@ async def fetch_relevant_documents( state: State = None, top_k: int = 10, connector_service: ConnectorService = None, - search_mode: SearchMode = SearchMode.CHUNKS, user_selected_sources: list[dict[str, Any]] | None = None, start_date: datetime | None = None, end_date: datetime | None = None, @@ -539,6 +538,8 @@ async def fetch_relevant_documents( displaying connector names (like "Web Search" instead of "TAVILY_API") and adding relevant emojis to indicate the type of source being searched. + Uses combined chunk-level and document-level hybrid search with RRF fusion. + Args: research_questions: List of research questions to find documents for search_space_id: The search space ID @@ -548,7 +549,6 @@ async def fetch_relevant_documents( state: The current state containing the streaming service top_k: Number of top results to retrieve per connector per question connector_service: An initialized connector service to use for searching - search_mode: Search mode (CHUNKS or DOCUMENTS) user_selected_sources: Optional list of user-selected source objects start_date: Optional start date for filtering documents by updated_at end_date: Optional end date for filtering documents by updated_at @@ -629,7 +629,6 @@ async def fetch_relevant_documents( user_query=reformulated_query, search_space_id=search_space_id, top_k=top_k, - search_mode=search_mode, start_date=start_date, end_date=end_date, ) @@ -657,7 +656,6 @@ async def fetch_relevant_documents( user_query=reformulated_query, search_space_id=search_space_id, top_k=top_k, - search_mode=search_mode, start_date=start_date, end_date=end_date, ) @@ -685,7 +683,6 @@ async def fetch_relevant_documents( user_query=reformulated_query, search_space_id=search_space_id, top_k=top_k, - search_mode=search_mode, start_date=start_date, end_date=end_date, ) @@ -710,7 +707,6 @@ async def fetch_relevant_documents( user_query=reformulated_query, search_space_id=search_space_id, top_k=top_k, - search_mode=search_mode, start_date=start_date, end_date=end_date, ) @@ -735,7 +731,6 @@ async def fetch_relevant_documents( user_query=reformulated_query, search_space_id=search_space_id, top_k=top_k, - search_mode=search_mode, start_date=start_date, end_date=end_date, ) @@ -763,7 +758,6 @@ async def fetch_relevant_documents( user_query=reformulated_query, search_space_id=search_space_id, top_k=top_k, - search_mode=search_mode, start_date=start_date, end_date=end_date, ) @@ -791,7 +785,6 @@ async def fetch_relevant_documents( user_query=reformulated_query, search_space_id=search_space_id, top_k=top_k, - search_mode=search_mode, start_date=start_date, end_date=end_date, ) @@ -819,7 +812,6 @@ async def fetch_relevant_documents( user_query=reformulated_query, search_space_id=search_space_id, top_k=top_k, - search_mode=search_mode, start_date=start_date, end_date=end_date, ) @@ -947,7 +939,6 @@ async def fetch_relevant_documents( user_query=reformulated_query, search_space_id=search_space_id, top_k=top_k, - search_mode=search_mode, start_date=start_date, end_date=end_date, ) @@ -970,7 +961,6 @@ async def fetch_relevant_documents( user_query=reformulated_query, search_space_id=search_space_id, top_k=top_k, - search_mode=search_mode, start_date=start_date, end_date=end_date, ) @@ -997,7 +987,6 @@ async def fetch_relevant_documents( user_query=reformulated_query, search_space_id=search_space_id, top_k=top_k, - search_mode=search_mode, start_date=start_date, end_date=end_date, ) @@ -1024,7 +1013,6 @@ async def fetch_relevant_documents( user_query=reformulated_query, search_space_id=search_space_id, top_k=top_k, - search_mode=search_mode, start_date=start_date, end_date=end_date, ) @@ -1051,7 +1039,6 @@ async def fetch_relevant_documents( user_query=reformulated_query, search_space_id=search_space_id, top_k=top_k, - search_mode=search_mode, start_date=start_date, end_date=end_date, ) @@ -1078,7 +1065,6 @@ async def fetch_relevant_documents( user_query=reformulated_query, search_space_id=search_space_id, top_k=top_k, - search_mode=search_mode, start_date=start_date, end_date=end_date, ) @@ -1105,7 +1091,6 @@ async def fetch_relevant_documents( user_query=reformulated_query, search_space_id=search_space_id, top_k=top_k, - search_mode=search_mode, start_date=start_date, end_date=end_date, ) @@ -1133,7 +1118,6 @@ async def fetch_relevant_documents( user_query=reformulated_query, search_space_id=search_space_id, top_k=top_k, - search_mode=search_mode, start_date=start_date, end_date=end_date, ) @@ -1161,7 +1145,6 @@ async def fetch_relevant_documents( user_query=reformulated_query, search_space_id=search_space_id, top_k=top_k, - search_mode=search_mode, start_date=start_date, end_date=end_date, ) @@ -1444,7 +1427,6 @@ async def handle_qna_workflow( state=state, top_k=top_k, connector_service=connector_service, - search_mode=configuration.search_mode, user_selected_sources=user_selected_sources, start_date=start_date, end_date=end_date, diff --git a/surfsense_backend/app/agents/researcher/qna_agent/nodes.py b/surfsense_backend/app/agents/researcher/qna_agent/nodes.py index 35f01146b..1debf0cdf 100644 --- a/surfsense_backend/app/agents/researcher/qna_agent/nodes.py +++ b/surfsense_backend/app/agents/researcher/qna_agent/nodes.py @@ -89,10 +89,13 @@ async def rerank_documents(state: State, config: RunnableConfig) -> dict[str, An # Get reranker service from app config reranker_service = RerankerService.get_reranker_instance() - # If reranking is not enabled, return original documents without processing + # If reranking is not enabled, sort by existing score and return if not reranker_service: - print("Reranking is disabled. Using original document order.") - return {"reranked_documents": documents} + print("Reranking is disabled. Sorting documents by existing score.") + sorted_documents = sorted( + documents, key=lambda x: x.get("score", 0), reverse=True + ) + return {"reranked_documents": sorted_documents} # Perform reranking try: diff --git a/surfsense_backend/app/routes/chats_routes.py b/surfsense_backend/app/routes/chats_routes.py index d7aff102b..94e2855ba 100644 --- a/surfsense_backend/app/routes/chats_routes.py +++ b/surfsense_backend/app/routes/chats_routes.py @@ -29,7 +29,6 @@ from app.utils.validators import ( validate_document_ids, validate_messages, validate_research_mode, - validate_search_mode, validate_search_space_id, validate_top_k, ) @@ -61,7 +60,6 @@ async def handle_chat_data( document_ids_to_add_in_context = validate_document_ids( request_data.get("document_ids_to_add_in_context") ) - search_mode_str = validate_search_mode(request_data.get("search_mode")) top_k = validate_top_k(request_data.get("top_k")) # print("RESQUEST DATA:", request_data) # print("SELECTED CONNECTORS:", selected_connectors) @@ -144,7 +142,6 @@ async def handle_chat_data( research_mode, selected_connectors, langchain_chat_history, - search_mode_str, document_ids_to_add_in_context, language, top_k, diff --git a/surfsense_backend/app/services/connector_service.py b/surfsense_backend/app/services/connector_service.py index 2dc8e14dc..7c6f5acb4 100644 --- a/surfsense_backend/app/services/connector_service.py +++ b/surfsense_backend/app/services/connector_service.py @@ -10,7 +10,6 @@ from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select from tavily import TavilyClient -from app.agents.researcher.configuration import SearchMode from app.db import ( Chunk, Document, @@ -63,44 +62,32 @@ class ConnectorService: user_query: str, search_space_id: int, top_k: int = 20, - search_mode: SearchMode = SearchMode.CHUNKS, start_date: datetime | None = None, end_date: datetime | None = None, ) -> tuple: """ - Search for crawled URLs and return both the source information and langchain documents + Search for crawled URLs and return both the source information and langchain documents. + + Uses combined chunk-level and document-level hybrid search with RRF fusion. Args: user_query: The user's query search_space_id: The search space ID to search in top_k: Maximum number of results to return - search_mode: Search mode (CHUNKS or DOCUMENTS) start_date: Optional start date for filtering documents by updated_at end_date: Optional end date for filtering documents by updated_at Returns: tuple: (sources_info, langchain_documents) """ - if search_mode == SearchMode.CHUNKS: - crawled_urls_chunks = await self.chunk_retriever.hybrid_search( - query_text=user_query, - top_k=top_k, - search_space_id=search_space_id, - document_type="CRAWLED_URL", - start_date=start_date, - end_date=end_date, - ) - elif search_mode == SearchMode.DOCUMENTS: - crawled_urls_chunks = await self.document_retriever.hybrid_search( - query_text=user_query, - top_k=top_k, - search_space_id=search_space_id, - document_type="CRAWLED_URL", - start_date=start_date, - end_date=end_date, - ) - # Transform document retriever results to match expected format - crawled_urls_chunks = self._transform_document_results(crawled_urls_chunks) + crawled_urls_chunks = await self._combined_rrf_search( + query_text=user_query, + search_space_id=search_space_id, + document_type="CRAWLED_URL", + top_k=top_k, + start_date=start_date, + end_date=end_date, + ) # Early return if no results if not crawled_urls_chunks: @@ -176,44 +163,32 @@ class ConnectorService: user_query: str, search_space_id: int, top_k: int = 20, - search_mode: SearchMode = SearchMode.CHUNKS, start_date: datetime | None = None, end_date: datetime | None = None, ) -> tuple: """ - Search for files and return both the source information and langchain documents + Search for files and return both the source information and langchain documents. + + Uses combined chunk-level and document-level hybrid search with RRF fusion. Args: user_query: The user's query search_space_id: The search space ID to search in top_k: Maximum number of results to return - search_mode: Search mode (CHUNKS or DOCUMENTS) start_date: Optional start date for filtering documents by updated_at end_date: Optional end date for filtering documents by updated_at Returns: tuple: (sources_info, langchain_documents) """ - if search_mode == SearchMode.CHUNKS: - files_chunks = await self.chunk_retriever.hybrid_search( - query_text=user_query, - top_k=top_k, - search_space_id=search_space_id, - document_type="FILE", - start_date=start_date, - end_date=end_date, - ) - elif search_mode == SearchMode.DOCUMENTS: - files_chunks = await self.document_retriever.hybrid_search( - query_text=user_query, - top_k=top_k, - search_space_id=search_space_id, - document_type="FILE", - start_date=start_date, - end_date=end_date, - ) - # Transform document retriever results to match expected format - files_chunks = self._transform_document_results(files_chunks) + files_chunks = await self._combined_rrf_search( + query_text=user_query, + search_space_id=search_space_id, + document_type="FILE", + top_k=top_k, + start_date=start_date, + end_date=end_date, + ) # Early return if no results if not files_chunks: @@ -286,6 +261,125 @@ class ConnectorService: ) return transformed_results + async def _combined_rrf_search( + self, + query_text: str, + search_space_id: int, + document_type: str, + top_k: int = 20, + start_date: datetime | None = None, + end_date: datetime | None = None, + ) -> list[dict[str, Any]]: + """ + Perform combined search using both chunk-level and document-level hybrid search, + then merge results using Reciprocal Rank Fusion (RRF). + + This method: + 1. Runs chunk-level hybrid search (vector + keyword on chunks) + 2. Runs document-level hybrid search (vector + keyword on documents, returns chunks) + 3. Combines results using RRF based on their ranks in each result set + 4. Returns top-k deduplicated results + + Args: + query_text: The search query text + search_space_id: The search space ID to search within + document_type: Document type to filter (e.g., "FILE", "CRAWLED_URL") + top_k: Number of results to return + start_date: Optional start date for filtering documents by updated_at + end_date: Optional end date for filtering documents by updated_at + + Returns: + List of combined and deduplicated chunk results + """ + # RRF constant + k = 60 + + # Get more results from each retriever for better fusion + retriever_top_k = top_k * 2 + + # Run both searches in parallel + chunk_results, doc_results = await asyncio.gather( + self.chunk_retriever.hybrid_search( + query_text=query_text, + top_k=retriever_top_k, + search_space_id=search_space_id, + document_type=document_type, + start_date=start_date, + end_date=end_date, + ), + self.document_retriever.hybrid_search( + query_text=query_text, + top_k=retriever_top_k, + search_space_id=search_space_id, + document_type=document_type, + start_date=start_date, + end_date=end_date, + ), + ) + + # Transform document results to chunk format + doc_results_transformed = self._transform_document_results(doc_results) + + # Build rank maps for RRF calculation + # chunk_id -> rank in chunk results (1-indexed) + chunk_ranks: dict[int, int] = {} + for rank, result in enumerate(chunk_results, start=1): + chunk_id = result.get("chunk_id") + if chunk_id is not None: + chunk_ranks[chunk_id] = rank + + # chunk_id -> rank in document results (1-indexed) + doc_ranks: dict[int, int] = {} + for rank, result in enumerate(doc_results_transformed, start=1): + chunk_id = result.get("chunk_id") + if chunk_id is not None: + doc_ranks[chunk_id] = rank + + # Collect all unique chunk_ids + all_chunk_ids = set(chunk_ranks.keys()) | set(doc_ranks.keys()) + + # Calculate RRF scores for each chunk + rrf_scores: dict[int, float] = {} + for chunk_id in all_chunk_ids: + chunk_rank = chunk_ranks.get(chunk_id) + doc_rank = doc_ranks.get(chunk_id) + + rrf_score = 0.0 + if chunk_rank is not None: + rrf_score += 1.0 / (k + chunk_rank) + if doc_rank is not None: + rrf_score += 1.0 / (k + doc_rank) + + rrf_scores[chunk_id] = rrf_score + + # Create a map of chunk_id -> result data (prefer chunk results for data) + chunk_data: dict[int, dict[str, Any]] = {} + for result in chunk_results: + chunk_id = result.get("chunk_id") + if chunk_id is not None and chunk_id not in chunk_data: + chunk_data[chunk_id] = result + + # Fill in any missing chunks from document results + for result in doc_results_transformed: + chunk_id = result.get("chunk_id") + if chunk_id is not None and chunk_id not in chunk_data: + chunk_data[chunk_id] = result + + # Sort by RRF score and take top-k + sorted_chunk_ids = sorted( + all_chunk_ids, key=lambda cid: rrf_scores[cid], reverse=True + )[:top_k] + + # Build final results with updated scores + combined_results = [] + for chunk_id in sorted_chunk_ids: + if chunk_id in chunk_data: + result = chunk_data[chunk_id].copy() + result["score"] = rrf_scores[chunk_id] + combined_results.append(result) + + return combined_results + async def get_connector_by_type( self, connector_type: SearchSourceConnectorType, @@ -829,44 +923,32 @@ class ConnectorService: user_query: str, search_space_id: int, top_k: int = 20, - search_mode: SearchMode = SearchMode.CHUNKS, start_date: datetime | None = None, end_date: datetime | None = None, ) -> tuple: """ - Search for slack and return both the source information and langchain documents + Search for slack and return both the source information and langchain documents. + + Uses combined chunk-level and document-level hybrid search with RRF fusion. Args: user_query: The user's query search_space_id: The search space ID to search in top_k: Maximum number of results to return - search_mode: Search mode (CHUNKS or DOCUMENTS) start_date: Optional start date for filtering documents by updated_at end_date: Optional end date for filtering documents by updated_at Returns: tuple: (sources_info, langchain_documents) """ - if search_mode == SearchMode.CHUNKS: - slack_chunks = await self.chunk_retriever.hybrid_search( - query_text=user_query, - top_k=top_k, - search_space_id=search_space_id, - document_type="SLACK_CONNECTOR", - start_date=start_date, - end_date=end_date, - ) - elif search_mode == SearchMode.DOCUMENTS: - slack_chunks = await self.document_retriever.hybrid_search( - query_text=user_query, - top_k=top_k, - search_space_id=search_space_id, - document_type="SLACK_CONNECTOR", - start_date=start_date, - end_date=end_date, - ) - # Transform document retriever results to match expected format - slack_chunks = self._transform_document_results(slack_chunks) + slack_chunks = await self._combined_rrf_search( + query_text=user_query, + search_space_id=search_space_id, + document_type="SLACK_CONNECTOR", + top_k=top_k, + start_date=start_date, + end_date=end_date, + ) # Early return if no results if not slack_chunks: @@ -928,44 +1010,32 @@ class ConnectorService: user_query: str, search_space_id: int, top_k: int = 20, - search_mode: SearchMode = SearchMode.CHUNKS, start_date: datetime | None = None, end_date: datetime | None = None, ) -> tuple: """ - Search for Notion pages and return both the source information and langchain documents + Search for Notion pages and return both the source information and langchain documents. + + Uses combined chunk-level and document-level hybrid search with RRF fusion. Args: user_query: The user's query search_space_id: The search space ID to search in top_k: Maximum number of results to return - search_mode: Search mode (CHUNKS or DOCUMENTS) start_date: Optional start date for filtering documents by updated_at end_date: Optional end date for filtering documents by updated_at Returns: tuple: (sources_info, langchain_documents) """ - if search_mode == SearchMode.CHUNKS: - notion_chunks = await self.chunk_retriever.hybrid_search( - query_text=user_query, - top_k=top_k, - search_space_id=search_space_id, - document_type="NOTION_CONNECTOR", - start_date=start_date, - end_date=end_date, - ) - elif search_mode == SearchMode.DOCUMENTS: - notion_chunks = await self.document_retriever.hybrid_search( - query_text=user_query, - top_k=top_k, - search_space_id=search_space_id, - document_type="NOTION_CONNECTOR", - start_date=start_date, - end_date=end_date, - ) - # Transform document retriever results to match expected format - notion_chunks = self._transform_document_results(notion_chunks) + notion_chunks = await self._combined_rrf_search( + query_text=user_query, + search_space_id=search_space_id, + document_type="NOTION_CONNECTOR", + top_k=top_k, + start_date=start_date, + end_date=end_date, + ) # Early return if no results if not notion_chunks: @@ -1030,44 +1100,32 @@ class ConnectorService: user_query: str, search_space_id: int, top_k: int = 20, - search_mode: SearchMode = SearchMode.CHUNKS, start_date: datetime | None = None, end_date: datetime | None = None, ) -> tuple: """ - Search for extension data and return both the source information and langchain documents + Search for extension data and return both the source information and langchain documents. + + Uses combined chunk-level and document-level hybrid search with RRF fusion. Args: user_query: The user's query search_space_id: The search space ID to search in top_k: Maximum number of results to return - search_mode: Search mode (CHUNKS or DOCUMENTS) start_date: Optional start date for filtering documents by updated_at end_date: Optional end date for filtering documents by updated_at Returns: tuple: (sources_info, langchain_documents) """ - if search_mode == SearchMode.CHUNKS: - extension_chunks = await self.chunk_retriever.hybrid_search( - query_text=user_query, - top_k=top_k, - search_space_id=search_space_id, - document_type="EXTENSION", - start_date=start_date, - end_date=end_date, - ) - elif search_mode == SearchMode.DOCUMENTS: - extension_chunks = await self.document_retriever.hybrid_search( - query_text=user_query, - top_k=top_k, - search_space_id=search_space_id, - document_type="EXTENSION", - start_date=start_date, - end_date=end_date, - ) - # Transform document retriever results to match expected format - extension_chunks = self._transform_document_results(extension_chunks) + extension_chunks = await self._combined_rrf_search( + query_text=user_query, + search_space_id=search_space_id, + document_type="EXTENSION", + top_k=top_k, + start_date=start_date, + end_date=end_date, + ) # Early return if no results if not extension_chunks: @@ -1156,44 +1214,32 @@ class ConnectorService: user_query: str, search_space_id: int, top_k: int = 20, - search_mode: SearchMode = SearchMode.CHUNKS, start_date: datetime | None = None, end_date: datetime | None = None, ) -> tuple: """ - Search for YouTube videos and return both the source information and langchain documents + Search for YouTube videos and return both the source information and langchain documents. + + Uses combined chunk-level and document-level hybrid search with RRF fusion. Args: user_query: The user's query search_space_id: The search space ID to search in top_k: Maximum number of results to return - search_mode: Search mode (CHUNKS or DOCUMENTS) start_date: Optional start date for filtering documents by updated_at end_date: Optional end date for filtering documents by updated_at Returns: tuple: (sources_info, langchain_documents) """ - if search_mode == SearchMode.CHUNKS: - youtube_chunks = await self.chunk_retriever.hybrid_search( - query_text=user_query, - top_k=top_k, - search_space_id=search_space_id, - document_type="YOUTUBE_VIDEO", - start_date=start_date, - end_date=end_date, - ) - elif search_mode == SearchMode.DOCUMENTS: - youtube_chunks = await self.document_retriever.hybrid_search( - query_text=user_query, - top_k=top_k, - search_space_id=search_space_id, - document_type="YOUTUBE_VIDEO", - start_date=start_date, - end_date=end_date, - ) - # Transform document retriever results to match expected format - youtube_chunks = self._transform_document_results(youtube_chunks) + youtube_chunks = await self._combined_rrf_search( + query_text=user_query, + search_space_id=search_space_id, + document_type="YOUTUBE_VIDEO", + top_k=top_k, + start_date=start_date, + end_date=end_date, + ) # Early return if no results if not youtube_chunks: @@ -1258,44 +1304,32 @@ class ConnectorService: user_query: str, search_space_id: int, top_k: int = 20, - search_mode: SearchMode = SearchMode.CHUNKS, start_date: datetime | None = None, end_date: datetime | None = None, ) -> tuple: """ - Search for GitHub documents and return both the source information and langchain documents + Search for GitHub documents and return both the source information and langchain documents. + + Uses combined chunk-level and document-level hybrid search with RRF fusion. Args: user_query: The user's query search_space_id: The search space ID to search in top_k: Maximum number of results to return - search_mode: Search mode (CHUNKS or DOCUMENTS) start_date: Optional start date for filtering documents by updated_at end_date: Optional end date for filtering documents by updated_at Returns: tuple: (sources_info, langchain_documents) """ - if search_mode == SearchMode.CHUNKS: - github_chunks = await self.chunk_retriever.hybrid_search( - query_text=user_query, - top_k=top_k, - search_space_id=search_space_id, - document_type="GITHUB_CONNECTOR", - start_date=start_date, - end_date=end_date, - ) - elif search_mode == SearchMode.DOCUMENTS: - github_chunks = await self.document_retriever.hybrid_search( - query_text=user_query, - top_k=top_k, - search_space_id=search_space_id, - document_type="GITHUB_CONNECTOR", - start_date=start_date, - end_date=end_date, - ) - # Transform document retriever results to match expected format - github_chunks = self._transform_document_results(github_chunks) + github_chunks = await self._combined_rrf_search( + query_text=user_query, + search_space_id=search_space_id, + document_type="GITHUB_CONNECTOR", + top_k=top_k, + start_date=start_date, + end_date=end_date, + ) # Early return if no results if not github_chunks: @@ -1344,44 +1378,32 @@ class ConnectorService: user_query: str, search_space_id: int, top_k: int = 20, - search_mode: SearchMode = SearchMode.CHUNKS, start_date: datetime | None = None, end_date: datetime | None = None, ) -> tuple: """ - Search for Linear issues and comments and return both the source information and langchain documents + Search for Linear issues and comments and return both the source information and langchain documents. + + Uses combined chunk-level and document-level hybrid search with RRF fusion. Args: user_query: The user's query search_space_id: The search space ID to search in top_k: Maximum number of results to return - search_mode: Search mode (CHUNKS or DOCUMENTS) start_date: Optional start date for filtering documents by updated_at end_date: Optional end date for filtering documents by updated_at Returns: tuple: (sources_info, langchain_documents) """ - if search_mode == SearchMode.CHUNKS: - linear_chunks = await self.chunk_retriever.hybrid_search( - query_text=user_query, - top_k=top_k, - search_space_id=search_space_id, - document_type="LINEAR_CONNECTOR", - start_date=start_date, - end_date=end_date, - ) - elif search_mode == SearchMode.DOCUMENTS: - linear_chunks = await self.document_retriever.hybrid_search( - query_text=user_query, - top_k=top_k, - search_space_id=search_space_id, - document_type="LINEAR_CONNECTOR", - start_date=start_date, - end_date=end_date, - ) - # Transform document retriever results to match expected format - linear_chunks = self._transform_document_results(linear_chunks) + linear_chunks = await self._combined_rrf_search( + query_text=user_query, + search_space_id=search_space_id, + document_type="LINEAR_CONNECTOR", + top_k=top_k, + start_date=start_date, + end_date=end_date, + ) # Early return if no results if not linear_chunks: @@ -1458,44 +1480,32 @@ class ConnectorService: user_query: str, search_space_id: int, top_k: int = 20, - search_mode: SearchMode = SearchMode.CHUNKS, start_date: datetime | None = None, end_date: datetime | None = None, ) -> tuple: """ - Search for Jira issues and comments and return both the source information and langchain documents + Search for Jira issues and comments and return both the source information and langchain documents. + + Uses combined chunk-level and document-level hybrid search with RRF fusion. Args: user_query: The user's query search_space_id: The search space ID to search in top_k: Maximum number of results to return - search_mode: Search mode (CHUNKS or DOCUMENTS) start_date: Optional start date for filtering documents by updated_at end_date: Optional end date for filtering documents by updated_at Returns: tuple: (sources_info, langchain_documents) """ - if search_mode == SearchMode.CHUNKS: - jira_chunks = await self.chunk_retriever.hybrid_search( - query_text=user_query, - top_k=top_k, - search_space_id=search_space_id, - document_type="JIRA_CONNECTOR", - start_date=start_date, - end_date=end_date, - ) - elif search_mode == SearchMode.DOCUMENTS: - jira_chunks = await self.document_retriever.hybrid_search( - query_text=user_query, - top_k=top_k, - search_space_id=search_space_id, - document_type="JIRA_CONNECTOR", - start_date=start_date, - end_date=end_date, - ) - # Transform document retriever results to match expected format - jira_chunks = self._transform_document_results(jira_chunks) + jira_chunks = await self._combined_rrf_search( + query_text=user_query, + search_space_id=search_space_id, + document_type="JIRA_CONNECTOR", + top_k=top_k, + start_date=start_date, + end_date=end_date, + ) # Early return if no results if not jira_chunks: @@ -1583,44 +1593,32 @@ class ConnectorService: user_query: str, search_space_id: int, top_k: int = 20, - search_mode: SearchMode = SearchMode.CHUNKS, start_date: datetime | None = None, end_date: datetime | None = None, ) -> tuple: """ - Search for Google Calendar events and return both the source information and langchain documents + Search for Google Calendar events and return both the source information and langchain documents. + + Uses combined chunk-level and document-level hybrid search with RRF fusion. Args: user_query: The user's query search_space_id: The search space ID to search in top_k: Maximum number of results to return - search_mode: Search mode (CHUNKS or DOCUMENTS) start_date: Optional start date for filtering documents by updated_at end_date: Optional end date for filtering documents by updated_at Returns: tuple: (sources_info, langchain_documents) """ - if search_mode == SearchMode.CHUNKS: - calendar_chunks = await self.chunk_retriever.hybrid_search( - query_text=user_query, - top_k=top_k, - search_space_id=search_space_id, - document_type="GOOGLE_CALENDAR_CONNECTOR", - start_date=start_date, - end_date=end_date, - ) - elif search_mode == SearchMode.DOCUMENTS: - calendar_chunks = await self.document_retriever.hybrid_search( - query_text=user_query, - top_k=top_k, - search_space_id=search_space_id, - document_type="GOOGLE_CALENDAR_CONNECTOR", - start_date=start_date, - end_date=end_date, - ) - # Transform document retriever results to match expected format - calendar_chunks = self._transform_document_results(calendar_chunks) + calendar_chunks = await self._combined_rrf_search( + query_text=user_query, + search_space_id=search_space_id, + document_type="GOOGLE_CALENDAR_CONNECTOR", + top_k=top_k, + start_date=start_date, + end_date=end_date, + ) # Early return if no results if not calendar_chunks: @@ -1720,44 +1718,32 @@ class ConnectorService: user_query: str, search_space_id: int, top_k: int = 20, - search_mode: SearchMode = SearchMode.CHUNKS, start_date: datetime | None = None, end_date: datetime | None = None, ) -> tuple: """ - Search for Airtable records and return both the source information and langchain documents + Search for Airtable records and return both the source information and langchain documents. + + Uses combined chunk-level and document-level hybrid search with RRF fusion. Args: user_query: The user's query search_space_id: The search space ID to search in top_k: Maximum number of results to return - search_mode: Search mode (CHUNKS or DOCUMENTS) start_date: Optional start date for filtering documents by updated_at end_date: Optional end date for filtering documents by updated_at Returns: tuple: (sources_info, langchain_documents) """ - if search_mode == SearchMode.CHUNKS: - airtable_chunks = await self.chunk_retriever.hybrid_search( - query_text=user_query, - top_k=top_k, - search_space_id=search_space_id, - document_type="AIRTABLE_CONNECTOR", - start_date=start_date, - end_date=end_date, - ) - elif search_mode == SearchMode.DOCUMENTS: - airtable_chunks = await self.document_retriever.hybrid_search( - query_text=user_query, - top_k=top_k, - search_space_id=search_space_id, - document_type="AIRTABLE_CONNECTOR", - start_date=start_date, - end_date=end_date, - ) - # Transform document retriever results to match expected format - airtable_chunks = self._transform_document_results(airtable_chunks) + airtable_chunks = await self._combined_rrf_search( + query_text=user_query, + search_space_id=search_space_id, + document_type="AIRTABLE_CONNECTOR", + top_k=top_k, + start_date=start_date, + end_date=end_date, + ) # Early return if no results if not airtable_chunks: @@ -1812,44 +1798,32 @@ class ConnectorService: user_query: str, search_space_id: int, top_k: int = 20, - search_mode: SearchMode = SearchMode.CHUNKS, start_date: datetime | None = None, end_date: datetime | None = None, ) -> tuple: """ - Search for Gmail messages and return both the source information and langchain documents + Search for Gmail messages and return both the source information and langchain documents. + + Uses combined chunk-level and document-level hybrid search with RRF fusion. Args: user_query: The user's query search_space_id: The search space ID to search in top_k: Maximum number of results to return - search_mode: Search mode (CHUNKS or DOCUMENTS) start_date: Optional start date for filtering documents by updated_at end_date: Optional end date for filtering documents by updated_at Returns: tuple: (sources_info, langchain_documents) """ - if search_mode == SearchMode.CHUNKS: - gmail_chunks = await self.chunk_retriever.hybrid_search( - query_text=user_query, - top_k=top_k, - search_space_id=search_space_id, - document_type="GOOGLE_GMAIL_CONNECTOR", - start_date=start_date, - end_date=end_date, - ) - elif search_mode == SearchMode.DOCUMENTS: - gmail_chunks = await self.document_retriever.hybrid_search( - query_text=user_query, - top_k=top_k, - search_space_id=search_space_id, - document_type="GOOGLE_GMAIL_CONNECTOR", - start_date=start_date, - end_date=end_date, - ) - # Transform document retriever results to match expected format - gmail_chunks = self._transform_document_results(gmail_chunks) + gmail_chunks = await self._combined_rrf_search( + query_text=user_query, + search_space_id=search_space_id, + document_type="GOOGLE_GMAIL_CONNECTOR", + top_k=top_k, + start_date=start_date, + end_date=end_date, + ) # Early return if no results if not gmail_chunks: @@ -1940,44 +1914,32 @@ class ConnectorService: user_query: str, search_space_id: int, top_k: int = 20, - search_mode: SearchMode = SearchMode.CHUNKS, start_date: datetime | None = None, end_date: datetime | None = None, ) -> tuple: """ - Search for Confluence pages and return both the source information and langchain documents + Search for Confluence pages and return both the source information and langchain documents. + + Uses combined chunk-level and document-level hybrid search with RRF fusion. Args: user_query: The user's query search_space_id: The search space ID to search in top_k: Maximum number of results to return - search_mode: Search mode (CHUNKS or DOCUMENTS) start_date: Optional start date for filtering documents by updated_at end_date: Optional end date for filtering documents by updated_at Returns: tuple: (sources_info, langchain_documents) """ - if search_mode == SearchMode.CHUNKS: - confluence_chunks = await self.chunk_retriever.hybrid_search( - query_text=user_query, - top_k=top_k, - search_space_id=search_space_id, - document_type="CONFLUENCE_CONNECTOR", - start_date=start_date, - end_date=end_date, - ) - elif search_mode == SearchMode.DOCUMENTS: - confluence_chunks = await self.document_retriever.hybrid_search( - query_text=user_query, - top_k=top_k, - search_space_id=search_space_id, - document_type="CONFLUENCE_CONNECTOR", - start_date=start_date, - end_date=end_date, - ) - # Transform document retriever results to match expected format - confluence_chunks = self._transform_document_results(confluence_chunks) + confluence_chunks = await self._combined_rrf_search( + query_text=user_query, + search_space_id=search_space_id, + document_type="CONFLUENCE_CONNECTOR", + top_k=top_k, + start_date=start_date, + end_date=end_date, + ) # Early return if no results if not confluence_chunks: @@ -2039,44 +2001,32 @@ class ConnectorService: user_query: str, search_space_id: int, top_k: int = 20, - search_mode: SearchMode = SearchMode.CHUNKS, start_date: datetime | None = None, end_date: datetime | None = None, ) -> tuple: """ - Search for ClickUp tasks and return both the source information and langchain documents + Search for ClickUp tasks and return both the source information and langchain documents. + + Uses combined chunk-level and document-level hybrid search with RRF fusion. Args: user_query: The user's query search_space_id: The search space ID to search in top_k: Maximum number of results to return - search_mode: Search mode (CHUNKS or DOCUMENTS) start_date: Optional start date for filtering documents by updated_at end_date: Optional end date for filtering documents by updated_at Returns: tuple: (sources_info, langchain_documents) """ - if search_mode == SearchMode.CHUNKS: - clickup_chunks = await self.chunk_retriever.hybrid_search( - query_text=user_query, - top_k=top_k, - search_space_id=search_space_id, - document_type="CLICKUP_CONNECTOR", - start_date=start_date, - end_date=end_date, - ) - elif search_mode == SearchMode.DOCUMENTS: - clickup_chunks = await self.document_retriever.hybrid_search( - query_text=user_query, - top_k=top_k, - search_space_id=search_space_id, - document_type="CLICKUP_CONNECTOR", - start_date=start_date, - end_date=end_date, - ) - # Transform document retriever results to match expected format - clickup_chunks = self._transform_document_results(clickup_chunks) + clickup_chunks = await self._combined_rrf_search( + query_text=user_query, + search_space_id=search_space_id, + document_type="CLICKUP_CONNECTOR", + top_k=top_k, + start_date=start_date, + end_date=end_date, + ) # Early return if no results if not clickup_chunks: @@ -2280,44 +2230,32 @@ class ConnectorService: user_query: str, search_space_id: int, top_k: int = 20, - search_mode: SearchMode = SearchMode.CHUNKS, start_date: datetime | None = None, end_date: datetime | None = None, ) -> tuple: """ - Search for Discord messages and return both the source information and langchain documents + Search for Discord messages and return both the source information and langchain documents. + + Uses combined chunk-level and document-level hybrid search with RRF fusion. Args: user_query: The user's query search_space_id: The search space ID to search in top_k: Maximum number of results to return - search_mode: Search mode (CHUNKS or DOCUMENTS) start_date: Optional start date for filtering documents by updated_at end_date: Optional end date for filtering documents by updated_at Returns: tuple: (sources_info, langchain_documents) """ - if search_mode == SearchMode.CHUNKS: - discord_chunks = await self.chunk_retriever.hybrid_search( - query_text=user_query, - top_k=top_k, - search_space_id=search_space_id, - document_type="DISCORD_CONNECTOR", - start_date=start_date, - end_date=end_date, - ) - elif search_mode == SearchMode.DOCUMENTS: - discord_chunks = await self.document_retriever.hybrid_search( - query_text=user_query, - top_k=top_k, - search_space_id=search_space_id, - document_type="DISCORD_CONNECTOR", - start_date=start_date, - end_date=end_date, - ) - # Transform document retriever results to match expected format - discord_chunks = self._transform_document_results(discord_chunks) + discord_chunks = await self._combined_rrf_search( + query_text=user_query, + search_space_id=search_space_id, + document_type="DISCORD_CONNECTOR", + top_k=top_k, + start_date=start_date, + end_date=end_date, + ) # Early return if no results if not discord_chunks: @@ -2382,44 +2320,32 @@ class ConnectorService: user_query: str, search_space_id: int, top_k: int = 20, - search_mode: SearchMode = SearchMode.CHUNKS, start_date: datetime | None = None, end_date: datetime | None = None, ) -> tuple: """ - Search for Luma events and return both the source information and langchain documents + Search for Luma events and return both the source information and langchain documents. + + Uses combined chunk-level and document-level hybrid search with RRF fusion. Args: user_query: The user's query search_space_id: The search space ID to search in top_k: Maximum number of results to return - search_mode: Search mode (CHUNKS or DOCUMENTS) start_date: Optional start date for filtering documents by updated_at end_date: Optional end date for filtering documents by updated_at Returns: tuple: (sources_info, langchain_documents) """ - if search_mode == SearchMode.CHUNKS: - luma_chunks = await self.chunk_retriever.hybrid_search( - query_text=user_query, - top_k=top_k, - search_space_id=search_space_id, - document_type="LUMA_CONNECTOR", - start_date=start_date, - end_date=end_date, - ) - elif search_mode == SearchMode.DOCUMENTS: - luma_chunks = await self.document_retriever.hybrid_search( - query_text=user_query, - top_k=top_k, - search_space_id=search_space_id, - document_type="LUMA_CONNECTOR", - start_date=start_date, - end_date=end_date, - ) - # Transform document retriever results to match expected format - luma_chunks = self._transform_document_results(luma_chunks) + luma_chunks = await self._combined_rrf_search( + query_text=user_query, + search_space_id=search_space_id, + document_type="LUMA_CONNECTOR", + top_k=top_k, + start_date=start_date, + end_date=end_date, + ) # Early return if no results if not luma_chunks: @@ -2544,46 +2470,32 @@ class ConnectorService: user_query: str, search_space_id: int, top_k: int = 20, - search_mode: SearchMode = SearchMode.CHUNKS, start_date: datetime | None = None, end_date: datetime | None = None, ) -> tuple: """ - Search for Elasticsearch documents and return both the source information and langchain documents + Search for Elasticsearch documents and return both the source information and langchain documents. + + Uses combined chunk-level and document-level hybrid search with RRF fusion. Args: user_query: The user's query search_space_id: The search space ID to search in top_k: Maximum number of results to return - search_mode: Search mode (CHUNKS or DOCUMENTS) start_date: Optional start date for filtering documents by updated_at end_date: Optional end date for filtering documents by updated_at Returns: tuple: (sources_info, langchain_documents) """ - if search_mode == SearchMode.CHUNKS: - elasticsearch_chunks = await self.chunk_retriever.hybrid_search( - query_text=user_query, - top_k=top_k, - search_space_id=search_space_id, - document_type="ELASTICSEARCH_CONNECTOR", - start_date=start_date, - end_date=end_date, - ) - elif search_mode == SearchMode.DOCUMENTS: - elasticsearch_chunks = await self.document_retriever.hybrid_search( - query_text=user_query, - top_k=top_k, - search_space_id=search_space_id, - document_type="ELASTICSEARCH_CONNECTOR", - start_date=start_date, - end_date=end_date, - ) - # Transform document retriever results to match expected format - elasticsearch_chunks = self._transform_document_results( - elasticsearch_chunks - ) + elasticsearch_chunks = await self._combined_rrf_search( + query_text=user_query, + search_space_id=search_space_id, + document_type="ELASTICSEARCH_CONNECTOR", + top_k=top_k, + start_date=start_date, + end_date=end_date, + ) # Early return if no results if not elasticsearch_chunks: @@ -2663,45 +2575,33 @@ class ConnectorService: user_id: str, search_space_id: int, top_k: int = 20, - search_mode: SearchMode = SearchMode.CHUNKS, start_date: datetime | None = None, end_date: datetime | None = None, ) -> tuple: """ - Search for BookStack pages and return both the source information and langchain documents + Search for BookStack pages and return both the source information and langchain documents. + + Uses combined chunk-level and document-level hybrid search with RRF fusion. Args: user_query: The user's query user_id: The user's ID search_space_id: The search space ID to search in top_k: Maximum number of results to return - search_mode: Search mode (CHUNKS or DOCUMENTS) start_date: Optional start date for filtering documents by updated_at end_date: Optional end date for filtering documents by updated_at Returns: tuple: (sources_info, langchain_documents) """ - if search_mode == SearchMode.CHUNKS: - bookstack_chunks = await self.chunk_retriever.hybrid_search( - query_text=user_query, - top_k=top_k, - search_space_id=search_space_id, - document_type="BOOKSTACK_CONNECTOR", - start_date=start_date, - end_date=end_date, - ) - elif search_mode == SearchMode.DOCUMENTS: - bookstack_chunks = await self.document_retriever.hybrid_search( - query_text=user_query, - top_k=top_k, - search_space_id=search_space_id, - document_type="BOOKSTACK_CONNECTOR", - start_date=start_date, - end_date=end_date, - ) - # Transform document retriever results to match expected format - bookstack_chunks = self._transform_document_results(bookstack_chunks) + bookstack_chunks = await self._combined_rrf_search( + query_text=user_query, + search_space_id=search_space_id, + document_type="BOOKSTACK_CONNECTOR", + top_k=top_k, + start_date=start_date, + end_date=end_date, + ) # Early return if no results if not bookstack_chunks: diff --git a/surfsense_backend/app/tasks/stream_connector_search_results.py b/surfsense_backend/app/tasks/stream_connector_search_results.py index a944b1cfd..a4b9b6665 100644 --- a/surfsense_backend/app/tasks/stream_connector_search_results.py +++ b/surfsense_backend/app/tasks/stream_connector_search_results.py @@ -4,7 +4,6 @@ from uuid import UUID from sqlalchemy.ext.asyncio import AsyncSession -from app.agents.researcher.configuration import SearchMode from app.agents.researcher.graph import graph as researcher_graph from app.agents.researcher.state import State from app.services.streaming_service import StreamingService @@ -18,7 +17,6 @@ async def stream_connector_search_results( research_mode: str, selected_connectors: list[str], langchain_chat_history: list[Any], - search_mode_str: str, document_ids_to_add_in_context: list[int], language: str | None = None, top_k: int = 10, @@ -42,11 +40,6 @@ async def stream_connector_search_results( # Convert UUID to string if needed user_id_str = str(user_id) if isinstance(user_id, UUID) else user_id - if search_mode_str == "CHUNKS": - search_mode = SearchMode.CHUNKS - elif search_mode_str == "DOCUMENTS": - search_mode = SearchMode.DOCUMENTS - # Sample configuration config = { "configurable": { @@ -54,7 +47,6 @@ async def stream_connector_search_results( "connectors_to_search": selected_connectors, "user_id": user_id_str, "search_space_id": search_space_id, - "search_mode": search_mode, "document_ids_to_add_in_context": document_ids_to_add_in_context, "language": language, # Add language to the configuration "top_k": top_k, # Add top_k to the configuration diff --git a/surfsense_web/app/dashboard/[search_space_id]/researcher/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/researcher/[[...chat_id]]/page.tsx index a9d8b9649..196f565a9 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/researcher/[[...chat_id]]/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/researcher/[[...chat_id]]/page.tsx @@ -9,8 +9,8 @@ import { activeChatAtom } from "@/atoms/chats/chat-query.atoms"; import { activeChatIdAtom } from "@/atoms/chats/ui.atoms"; import { documentTypeCountsAtom } from "@/atoms/documents/document-query.atoms"; import ChatInterface from "@/components/chat/ChatInterface"; +import type { Document } from "@/contracts/types/document.types"; import { useChatState } from "@/hooks/use-chat"; -import type { Document } from "@/hooks/use-documents"; import { useSearchSourceConnectors } from "@/hooks/use-search-source-connectors"; export default function ResearcherPage() { @@ -31,8 +31,6 @@ export default function ResearcherPage() { const { token, - searchMode, - setSearchMode, researchMode, selectedConnectors, setSelectedConnectors, @@ -84,7 +82,6 @@ export default function ResearcherPage() { interface ChatState { selectedDocuments: Document[]; selectedConnectors: string[]; - searchMode: "DOCUMENTS" | "CHUNKS"; researchMode: "QNA"; // Always QNA mode topK: number; } @@ -124,7 +121,6 @@ export default function ResearcherPage() { search_space_id: search_space_id, selected_connectors: connectorTypes, research_mode: researchMode, - search_mode: searchMode, document_ids_to_add_in_context: documentIds, top_k: topK, }, @@ -155,7 +151,6 @@ export default function ResearcherPage() { storeChatState(search_space_id as string, String(newChat.id), { selectedDocuments, selectedConnectors, - searchMode, researchMode, topK, }); @@ -203,7 +198,6 @@ export default function ResearcherPage() { if (restoredState) { setSelectedDocuments(restoredState.selectedDocuments); setSelectedConnectors(restoredState.selectedConnectors); - setSearchMode(restoredState.searchMode); setTopK(restoredState.topK); // researchMode is always "QNA", no need to restore } @@ -214,7 +208,6 @@ export default function ResearcherPage() { search_space_id, setSelectedDocuments, setSelectedConnectors, - setSearchMode, setTopK, ]); @@ -287,8 +280,6 @@ export default function ResearcherPage() { selectedDocuments={selectedDocuments} onConnectorSelectionChange={setSelectedConnectors} selectedConnectors={selectedConnectors} - searchMode={searchMode} - onSearchModeChange={setSearchMode} topK={topK} onTopKChange={setTopK} /> diff --git a/surfsense_web/components/chat/ChatInputGroup.tsx b/surfsense_web/components/chat/ChatInputGroup.tsx index 97473891c..1e2e12837 100644 --- a/surfsense_web/components/chat/ChatInputGroup.tsx +++ b/surfsense_web/components/chat/ChatInputGroup.tsx @@ -27,7 +27,7 @@ import { } from "@/components/ui/select"; import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from "@/components/ui/tooltip"; import { getConnectorIcon } from "@/contracts/enums/connectorIcons"; -import type { Document } from "@/hooks/use-documents"; +import type { Document } from "@/contracts/types/document.types"; import { useGlobalLLMConfigs, useLLMConfigs, useLLMPreferences } from "@/hooks/use-llm-configs"; import { useSearchSourceConnectors } from "@/hooks/use-search-source-connectors"; @@ -422,58 +422,6 @@ const ConnectorSelector = React.memo( ConnectorSelector.displayName = "ConnectorSelector"; -const SearchModeSelector = React.memo( - ({ - searchMode, - onSearchModeChange, - }: { - searchMode?: "DOCUMENTS" | "CHUNKS"; - onSearchModeChange?: (mode: "DOCUMENTS" | "CHUNKS") => void; - }) => { - const handleDocumentsClick = React.useCallback(() => { - onSearchModeChange?.("DOCUMENTS"); - }, [onSearchModeChange]); - - const handleChunksClick = React.useCallback(() => { - onSearchModeChange?.("CHUNKS"); - }, [onSearchModeChange]); - - return ( -
-
- - -
-
- ); - } -); - -SearchModeSelector.displayName = "SearchModeSelector"; - const TopKSelector = React.memo( ({ topK = 10, onTopKChange }: { topK?: number; onTopKChange?: (topK: number) => void }) => { const MIN_VALUE = 1; @@ -808,8 +756,6 @@ const CustomChatInputOptions = React.memo( selectedDocuments, onConnectorSelectionChange, selectedConnectors, - searchMode, - onSearchModeChange, topK, onTopKChange, }: { @@ -817,8 +763,6 @@ const CustomChatInputOptions = React.memo( selectedDocuments?: Document[]; onConnectorSelectionChange?: (connectorTypes: string[]) => void; selectedConnectors?: string[]; - searchMode?: "DOCUMENTS" | "CHUNKS"; - onSearchModeChange?: (mode: "DOCUMENTS" | "CHUNKS") => void; topK?: number; onTopKChange?: (topK: number) => void; }) => { @@ -845,8 +789,6 @@ const CustomChatInputOptions = React.memo(
- -
@@ -863,8 +805,6 @@ export const ChatInputUI = React.memo( selectedDocuments, onConnectorSelectionChange, selectedConnectors, - searchMode, - onSearchModeChange, topK, onTopKChange, }: { @@ -872,8 +812,6 @@ export const ChatInputUI = React.memo( selectedDocuments?: Document[]; onConnectorSelectionChange?: (connectorTypes: string[]) => void; selectedConnectors?: string[]; - searchMode?: "DOCUMENTS" | "CHUNKS"; - onSearchModeChange?: (mode: "DOCUMENTS" | "CHUNKS") => void; topK?: number; onTopKChange?: (topK: number) => void; }) => { @@ -888,8 +826,6 @@ export const ChatInputUI = React.memo( selectedDocuments={selectedDocuments} onConnectorSelectionChange={onConnectorSelectionChange} selectedConnectors={selectedConnectors} - searchMode={searchMode} - onSearchModeChange={onSearchModeChange} topK={topK} onTopKChange={onTopKChange} /> diff --git a/surfsense_web/components/chat/ChatInterface.tsx b/surfsense_web/components/chat/ChatInterface.tsx index 799e45ef6..64852fa15 100644 --- a/surfsense_web/components/chat/ChatInterface.tsx +++ b/surfsense_web/components/chat/ChatInterface.tsx @@ -4,7 +4,7 @@ import { type ChatHandler, ChatSection as LlamaIndexChatSection } from "@llamain import { useParams } from "next/navigation"; import { ChatInputUI } from "@/components/chat/ChatInputGroup"; import { ChatMessagesUI } from "@/components/chat/ChatMessages"; -import type { Document } from "@/hooks/use-documents"; +import type { Document } from "@/contracts/types/document.types"; interface ChatInterfaceProps { handler: ChatHandler; @@ -12,8 +12,6 @@ interface ChatInterfaceProps { selectedDocuments?: Document[]; onConnectorSelectionChange?: (connectorTypes: string[]) => void; selectedConnectors?: string[]; - searchMode?: "DOCUMENTS" | "CHUNKS"; - onSearchModeChange?: (mode: "DOCUMENTS" | "CHUNKS") => void; topK?: number; onTopKChange?: (topK: number) => void; } @@ -24,8 +22,6 @@ export default function ChatInterface({ selectedDocuments = [], onConnectorSelectionChange, selectedConnectors = [], - searchMode, - onSearchModeChange, topK = 10, onTopKChange, }: ChatInterfaceProps) { @@ -41,8 +37,6 @@ export default function ChatInterface({ selectedDocuments={selectedDocuments} onConnectorSelectionChange={onConnectorSelectionChange} selectedConnectors={selectedConnectors} - searchMode={searchMode} - onSearchModeChange={onSearchModeChange} topK={topK} onTopKChange={onTopKChange} /> diff --git a/surfsense_web/hooks/use-chat.ts b/surfsense_web/hooks/use-chat.ts index b006401d1..108165bbf 100644 --- a/surfsense_web/hooks/use-chat.ts +++ b/surfsense_web/hooks/use-chat.ts @@ -2,7 +2,7 @@ import type { Message } from "@ai-sdk/react"; import { useCallback, useEffect, useState } from "react"; import type { ChatDetails } from "@/app/dashboard/[search_space_id]/chats/chats-client"; import type { ResearchMode } from "@/components/chat"; -import type { Document } from "@/hooks/use-documents"; +import type { Document } from "@/contracts/types/document.types"; import { getBearerToken } from "@/lib/auth-utils"; interface UseChatStateProps { @@ -16,7 +16,6 @@ export function useChatState({ chat_id }: UseChatStateProps) { const [currentChatId, setCurrentChatId] = useState(chat_id || null); // Chat configuration state - const [searchMode, setSearchMode] = useState<"DOCUMENTS" | "CHUNKS">("DOCUMENTS"); const [researchMode, setResearchMode] = useState("QNA"); const [selectedConnectors, setSelectedConnectors] = useState([]); const [selectedDocuments, setSelectedDocuments] = useState([]); @@ -34,8 +33,6 @@ export function useChatState({ chat_id }: UseChatStateProps) { setIsLoading, currentChatId, setCurrentChatId, - searchMode, - setSearchMode, researchMode, setResearchMode, selectedConnectors,