mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-22 21:28:12 +02:00
feat(roadmap-1.2): removed SearchMode enum and combined hybrid search
This commit is contained in:
parent
dc92f313e7
commit
08fb488995
10 changed files with 331 additions and 548 deletions
|
|
@ -3,18 +3,10 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, fields
|
||||
from enum import Enum
|
||||
|
||||
from langchain_core.runnables import RunnableConfig
|
||||
|
||||
|
||||
class SearchMode(Enum):
|
||||
"""Enum defining the type of search mode."""
|
||||
|
||||
CHUNKS = "CHUNKS"
|
||||
DOCUMENTS = "DOCUMENTS"
|
||||
|
||||
|
||||
@dataclass(kw_only=True)
|
||||
class Configuration:
|
||||
"""The configuration for the agent."""
|
||||
|
|
@ -24,7 +16,6 @@ class Configuration:
|
|||
connectors_to_search: list[str]
|
||||
user_id: str
|
||||
search_space_id: int
|
||||
search_mode: SearchMode
|
||||
document_ids_to_add_in_context: list[int]
|
||||
language: str | None = None
|
||||
top_k: int = 10
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ from app.db import Document
|
|||
from app.services.connector_service import ConnectorService
|
||||
from app.services.query_service import QueryService
|
||||
|
||||
from .configuration import Configuration, SearchMode
|
||||
from .configuration import Configuration
|
||||
from .prompts import get_further_questions_system_prompt
|
||||
from .qna_agent.graph import graph as qna_agent_graph
|
||||
from .state import State
|
||||
|
|
@ -102,7 +102,7 @@ async def fetch_documents_by_ids(
|
|||
Fetch documents by their IDs within a search space.
|
||||
|
||||
This function ensures that only documents belonging to the search space are fetched.
|
||||
Similar to SearchMode.DOCUMENTS, it fetches full documents and concatenates their chunks.
|
||||
It fetches full documents and returns their chunks individually.
|
||||
Also creates source objects for UI display, grouped by document type.
|
||||
|
||||
Args:
|
||||
|
|
@ -526,7 +526,6 @@ async def fetch_relevant_documents(
|
|||
state: State = None,
|
||||
top_k: int = 10,
|
||||
connector_service: ConnectorService = None,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
user_selected_sources: list[dict[str, Any]] | None = None,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
|
|
@ -539,6 +538,8 @@ async def fetch_relevant_documents(
|
|||
displaying connector names (like "Web Search" instead of "TAVILY_API") and adding
|
||||
relevant emojis to indicate the type of source being searched.
|
||||
|
||||
Uses combined chunk-level and document-level hybrid search with RRF fusion.
|
||||
|
||||
Args:
|
||||
research_questions: List of research questions to find documents for
|
||||
search_space_id: The search space ID
|
||||
|
|
@ -548,7 +549,6 @@ async def fetch_relevant_documents(
|
|||
state: The current state containing the streaming service
|
||||
top_k: Number of top results to retrieve per connector per question
|
||||
connector_service: An initialized connector service to use for searching
|
||||
search_mode: Search mode (CHUNKS or DOCUMENTS)
|
||||
user_selected_sources: Optional list of user-selected source objects
|
||||
start_date: Optional start date for filtering documents by updated_at
|
||||
end_date: Optional end date for filtering documents by updated_at
|
||||
|
|
@ -629,7 +629,6 @@ async def fetch_relevant_documents(
|
|||
user_query=reformulated_query,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
|
@ -657,7 +656,6 @@ async def fetch_relevant_documents(
|
|||
user_query=reformulated_query,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
|
@ -685,7 +683,6 @@ async def fetch_relevant_documents(
|
|||
user_query=reformulated_query,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
|
@ -710,7 +707,6 @@ async def fetch_relevant_documents(
|
|||
user_query=reformulated_query,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
|
@ -735,7 +731,6 @@ async def fetch_relevant_documents(
|
|||
user_query=reformulated_query,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
|
@ -763,7 +758,6 @@ async def fetch_relevant_documents(
|
|||
user_query=reformulated_query,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
|
@ -791,7 +785,6 @@ async def fetch_relevant_documents(
|
|||
user_query=reformulated_query,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
|
@ -819,7 +812,6 @@ async def fetch_relevant_documents(
|
|||
user_query=reformulated_query,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
|
@ -947,7 +939,6 @@ async def fetch_relevant_documents(
|
|||
user_query=reformulated_query,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
|
@ -970,7 +961,6 @@ async def fetch_relevant_documents(
|
|||
user_query=reformulated_query,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
|
@ -997,7 +987,6 @@ async def fetch_relevant_documents(
|
|||
user_query=reformulated_query,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
|
@ -1024,7 +1013,6 @@ async def fetch_relevant_documents(
|
|||
user_query=reformulated_query,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
|
@ -1051,7 +1039,6 @@ async def fetch_relevant_documents(
|
|||
user_query=reformulated_query,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
|
@ -1078,7 +1065,6 @@ async def fetch_relevant_documents(
|
|||
user_query=reformulated_query,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
|
@ -1105,7 +1091,6 @@ async def fetch_relevant_documents(
|
|||
user_query=reformulated_query,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
|
@ -1133,7 +1118,6 @@ async def fetch_relevant_documents(
|
|||
user_query=reformulated_query,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
|
@ -1161,7 +1145,6 @@ async def fetch_relevant_documents(
|
|||
user_query=reformulated_query,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
|
@ -1444,7 +1427,6 @@ async def handle_qna_workflow(
|
|||
state=state,
|
||||
top_k=top_k,
|
||||
connector_service=connector_service,
|
||||
search_mode=configuration.search_mode,
|
||||
user_selected_sources=user_selected_sources,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
|
|
|
|||
|
|
@ -89,10 +89,13 @@ async def rerank_documents(state: State, config: RunnableConfig) -> dict[str, An
|
|||
# Get reranker service from app config
|
||||
reranker_service = RerankerService.get_reranker_instance()
|
||||
|
||||
# If reranking is not enabled, return original documents without processing
|
||||
# If reranking is not enabled, sort by existing score and return
|
||||
if not reranker_service:
|
||||
print("Reranking is disabled. Using original document order.")
|
||||
return {"reranked_documents": documents}
|
||||
print("Reranking is disabled. Sorting documents by existing score.")
|
||||
sorted_documents = sorted(
|
||||
documents, key=lambda x: x.get("score", 0), reverse=True
|
||||
)
|
||||
return {"reranked_documents": sorted_documents}
|
||||
|
||||
# Perform reranking
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -29,7 +29,6 @@ from app.utils.validators import (
|
|||
validate_document_ids,
|
||||
validate_messages,
|
||||
validate_research_mode,
|
||||
validate_search_mode,
|
||||
validate_search_space_id,
|
||||
validate_top_k,
|
||||
)
|
||||
|
|
@ -61,7 +60,6 @@ async def handle_chat_data(
|
|||
document_ids_to_add_in_context = validate_document_ids(
|
||||
request_data.get("document_ids_to_add_in_context")
|
||||
)
|
||||
search_mode_str = validate_search_mode(request_data.get("search_mode"))
|
||||
top_k = validate_top_k(request_data.get("top_k"))
|
||||
# print("RESQUEST DATA:", request_data)
|
||||
# print("SELECTED CONNECTORS:", selected_connectors)
|
||||
|
|
@ -144,7 +142,6 @@ async def handle_chat_data(
|
|||
research_mode,
|
||||
selected_connectors,
|
||||
langchain_chat_history,
|
||||
search_mode_str,
|
||||
document_ids_to_add_in_context,
|
||||
language,
|
||||
top_k,
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -4,7 +4,6 @@ from uuid import UUID
|
|||
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.agents.researcher.configuration import SearchMode
|
||||
from app.agents.researcher.graph import graph as researcher_graph
|
||||
from app.agents.researcher.state import State
|
||||
from app.services.streaming_service import StreamingService
|
||||
|
|
@ -18,7 +17,6 @@ async def stream_connector_search_results(
|
|||
research_mode: str,
|
||||
selected_connectors: list[str],
|
||||
langchain_chat_history: list[Any],
|
||||
search_mode_str: str,
|
||||
document_ids_to_add_in_context: list[int],
|
||||
language: str | None = None,
|
||||
top_k: int = 10,
|
||||
|
|
@ -42,11 +40,6 @@ async def stream_connector_search_results(
|
|||
# Convert UUID to string if needed
|
||||
user_id_str = str(user_id) if isinstance(user_id, UUID) else user_id
|
||||
|
||||
if search_mode_str == "CHUNKS":
|
||||
search_mode = SearchMode.CHUNKS
|
||||
elif search_mode_str == "DOCUMENTS":
|
||||
search_mode = SearchMode.DOCUMENTS
|
||||
|
||||
# Sample configuration
|
||||
config = {
|
||||
"configurable": {
|
||||
|
|
@ -54,7 +47,6 @@ async def stream_connector_search_results(
|
|||
"connectors_to_search": selected_connectors,
|
||||
"user_id": user_id_str,
|
||||
"search_space_id": search_space_id,
|
||||
"search_mode": search_mode,
|
||||
"document_ids_to_add_in_context": document_ids_to_add_in_context,
|
||||
"language": language, # Add language to the configuration
|
||||
"top_k": top_k, # Add top_k to the configuration
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue