mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-06 20:15:17 +02:00
feat: Introduce the RAPTOR Search.
This commit is contained in:
parent
d3540d8cc5
commit
fbbb3294f4
11 changed files with 318 additions and 127 deletions
|
|
@ -3,10 +3,16 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, fields
|
||||
from enum import Enum
|
||||
from typing import Optional, List, Any
|
||||
|
||||
from langchain_core.runnables import RunnableConfig
|
||||
|
||||
class SearchMode(Enum):
|
||||
"""Enum defining the type of search mode."""
|
||||
CHUNKS = "CHUNKS"
|
||||
DOCUMENTS = "DOCUMENTS"
|
||||
|
||||
|
||||
@dataclass(kw_only=True)
|
||||
class Configuration:
|
||||
|
|
@ -18,6 +24,7 @@ class Configuration:
|
|||
connectors_to_search: List[str]
|
||||
user_id: str
|
||||
search_space_id: int
|
||||
search_mode: SearchMode
|
||||
|
||||
|
||||
@classmethod
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ from langchain_core.runnables import RunnableConfig
|
|||
from pydantic import BaseModel, Field
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from .configuration import Configuration
|
||||
from .configuration import Configuration, SearchMode
|
||||
from .prompts import get_answer_outline_system_prompt
|
||||
from .state import State
|
||||
from .sub_section_writer.graph import graph as sub_section_writer_graph
|
||||
|
|
@ -149,7 +149,8 @@ async def fetch_relevant_documents(
|
|||
writer: StreamWriter = None,
|
||||
state: State = None,
|
||||
top_k: int = 10,
|
||||
connector_service: ConnectorService = None
|
||||
connector_service: ConnectorService = None,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Fetch relevant documents for research questions using the provided connectors.
|
||||
|
|
@ -213,7 +214,8 @@ async def fetch_relevant_documents(
|
|||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k
|
||||
top_k=top_k,
|
||||
search_mode=search_mode
|
||||
)
|
||||
|
||||
# Add to sources and raw documents
|
||||
|
|
@ -231,7 +233,8 @@ async def fetch_relevant_documents(
|
|||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k
|
||||
top_k=top_k,
|
||||
search_mode=search_mode
|
||||
)
|
||||
|
||||
# Add to sources and raw documents
|
||||
|
|
@ -249,7 +252,8 @@ async def fetch_relevant_documents(
|
|||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k
|
||||
top_k=top_k,
|
||||
search_mode=search_mode
|
||||
)
|
||||
|
||||
# Add to sources and raw documents
|
||||
|
|
@ -267,7 +271,8 @@ async def fetch_relevant_documents(
|
|||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k
|
||||
top_k=top_k,
|
||||
search_mode=search_mode
|
||||
)
|
||||
|
||||
# Add to sources and raw documents
|
||||
|
|
@ -286,7 +291,8 @@ async def fetch_relevant_documents(
|
|||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k
|
||||
top_k=top_k,
|
||||
search_mode=search_mode
|
||||
)
|
||||
|
||||
# Add to sources and raw documents
|
||||
|
|
@ -304,7 +310,8 @@ async def fetch_relevant_documents(
|
|||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k
|
||||
top_k=top_k,
|
||||
search_mode=search_mode
|
||||
)
|
||||
|
||||
# Add to sources and raw documents
|
||||
|
|
@ -322,7 +329,8 @@ async def fetch_relevant_documents(
|
|||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k
|
||||
top_k=top_k,
|
||||
search_mode=search_mode
|
||||
)
|
||||
|
||||
# Add to sources and raw documents
|
||||
|
|
@ -340,7 +348,8 @@ async def fetch_relevant_documents(
|
|||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k
|
||||
top_k=top_k,
|
||||
search_mode=search_mode
|
||||
)
|
||||
|
||||
# Add to sources and raw documents
|
||||
|
|
@ -558,7 +567,8 @@ async def process_sections(state: State, config: RunnableConfig, writer: StreamW
|
|||
writer=writer,
|
||||
state=state,
|
||||
top_k=TOP_K,
|
||||
connector_service=connector_service
|
||||
connector_service=connector_service,
|
||||
search_mode=configuration.search_mode
|
||||
)
|
||||
except Exception as e:
|
||||
error_message = f"Error fetching relevant documents: {str(e)}"
|
||||
|
|
|
|||
|
|
@ -141,6 +141,11 @@ async def write_sub_section(state: State, config: RunnableConfig) -> Dict[str, A
|
|||
|
||||
# Construct a clear, structured query for the LLM
|
||||
human_message_content = f"""
|
||||
Source material:
|
||||
<documents>
|
||||
{documents_text}
|
||||
</documents>
|
||||
|
||||
Now user's query is:
|
||||
<user_query>
|
||||
{user_query}
|
||||
|
|
@ -158,11 +163,6 @@ async def write_sub_section(state: State, config: RunnableConfig) -> Dict[str, A
|
|||
<guiding_questions>
|
||||
{questions_text}
|
||||
</guiding_questions>
|
||||
|
||||
Use the provided documents as your source material and cite them properly using the IEEE citation format [X] where X is the source_id.
|
||||
<documents>
|
||||
{documents_text}
|
||||
</documents>
|
||||
"""
|
||||
|
||||
# Create messages for the LLM
|
||||
|
|
|
|||
|
|
@ -25,6 +25,8 @@ You are a research assistant tasked with analyzing documents and providing compr
|
|||
16. CRITICAL: Citations must ONLY appear as [X] or [X], [Y], [Z] format - never with parentheses, hyperlinks, or other formatting.
|
||||
17. CRITICAL: Never make up citation numbers. Only use source_id values that are explicitly provided in the document metadata.
|
||||
18. CRITICAL: If you are unsure about a source_id, do not include a citation rather than guessing or making one up.
|
||||
19. CRITICAL: Focus only on answering the user's query. Any guiding questions provided are for your thinking process only and should not be mentioned in your response.
|
||||
20. CRITICAL: Ensure your response aligns with the provided sub-section title and section position.
|
||||
</instructions>
|
||||
|
||||
<format>
|
||||
|
|
@ -37,6 +39,8 @@ You are a research assistant tasked with analyzing documents and providing compr
|
|||
- NEVER create your own citation numbering system - use the exact source_id values from the documents.
|
||||
- NEVER format citations as clickable links or as markdown links like "([1](https://example.com))". Always use plain square brackets only.
|
||||
- NEVER make up citation numbers if you are unsure about the source_id. It is better to omit the citation than to guess.
|
||||
- NEVER include or mention the guiding questions in your response. They are only to help guide your thinking.
|
||||
- ALWAYS focus on answering the user's query directly from the information in the documents.
|
||||
</format>
|
||||
|
||||
<input_example>
|
||||
|
|
@ -84,4 +88,21 @@ ONLY use plain square brackets [1] or multiple citations [1], [2], [3]
|
|||
</incorrect_citation_formats>
|
||||
|
||||
Note that the citation numbers match exactly with the source_id values (1, 13, and 21) and are not renumbered sequentially. Citations follow IEEE style with square brackets and appear at the end of sentences.
|
||||
|
||||
<user_query_instructions>
|
||||
When you see a user query like:
|
||||
<user_query>
|
||||
Give all linear issues.
|
||||
</user_query>
|
||||
|
||||
Focus exclusively on answering this query using information from the provided documents.
|
||||
|
||||
If guiding questions are provided in a <guiding_questions> section, use them only to guide your thinking process. Do not mention or list these questions in your response.
|
||||
|
||||
Make sure your response:
|
||||
1. Directly answers the user's query
|
||||
2. Fits the provided sub-section title and section position
|
||||
3. Uses proper citations for all information from documents
|
||||
4. Is well-structured and professional in tone
|
||||
</user_query_instructions>
|
||||
"""
|
||||
Loading…
Add table
Add a link
Reference in a new issue