mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-09 07:42:39 +02:00
Fixed all ruff lint and formatting errors
This commit is contained in:
parent
0a03c42cc5
commit
d359a59f6d
85 changed files with 5520 additions and 3870 deletions
|
|
@ -1,24 +1,28 @@
|
|||
from app.services.reranker_service import RerankerService
|
||||
from .configuration import Configuration
|
||||
from langchain_core.runnables import RunnableConfig
|
||||
from .state import State
|
||||
from typing import Any, Dict
|
||||
from .prompts import get_qna_citation_system_prompt, get_qna_no_documents_system_prompt
|
||||
from langchain_core.messages import HumanMessage, SystemMessage
|
||||
from ..utils import (
|
||||
optimize_documents_for_token_limit,
|
||||
calculate_token_count,
|
||||
format_documents_section
|
||||
)
|
||||
from typing import Any
|
||||
|
||||
async def rerank_documents(state: State, config: RunnableConfig) -> Dict[str, Any]:
|
||||
from langchain_core.messages import HumanMessage, SystemMessage
|
||||
from langchain_core.runnables import RunnableConfig
|
||||
|
||||
from app.services.reranker_service import RerankerService
|
||||
|
||||
from ..utils import (
|
||||
calculate_token_count,
|
||||
format_documents_section,
|
||||
optimize_documents_for_token_limit,
|
||||
)
|
||||
from .configuration import Configuration
|
||||
from .prompts import get_qna_citation_system_prompt, get_qna_no_documents_system_prompt
|
||||
from .state import State
|
||||
|
||||
|
||||
async def rerank_documents(state: State, config: RunnableConfig) -> dict[str, Any]:
|
||||
"""
|
||||
Rerank the documents based on relevance to the user's question.
|
||||
|
||||
|
||||
This node takes the relevant documents provided in the configuration,
|
||||
reranks them using the reranker service based on the user's query,
|
||||
and updates the state with the reranked documents.
|
||||
|
||||
|
||||
Returns:
|
||||
Dict containing the reranked documents.
|
||||
"""
|
||||
|
|
@ -30,16 +34,14 @@ async def rerank_documents(state: State, config: RunnableConfig) -> Dict[str, An
|
|||
|
||||
# If no documents were provided, return empty list
|
||||
if not documents or len(documents) == 0:
|
||||
return {
|
||||
"reranked_documents": []
|
||||
}
|
||||
|
||||
return {"reranked_documents": []}
|
||||
|
||||
# Get reranker service from app config
|
||||
reranker_service = RerankerService.get_reranker_instance()
|
||||
|
||||
|
||||
# Use documents as is if no reranker service is available
|
||||
reranked_docs = documents
|
||||
|
||||
|
||||
if reranker_service:
|
||||
try:
|
||||
# Convert documents to format expected by reranker if needed
|
||||
|
|
@ -51,58 +53,64 @@ async def rerank_documents(state: State, config: RunnableConfig) -> Dict[str, An
|
|||
"document": {
|
||||
"id": doc.get("document", {}).get("id", ""),
|
||||
"title": doc.get("document", {}).get("title", ""),
|
||||
"document_type": doc.get("document", {}).get("document_type", ""),
|
||||
"metadata": doc.get("document", {}).get("metadata", {})
|
||||
}
|
||||
} for i, doc in enumerate(documents)
|
||||
"document_type": doc.get("document", {}).get(
|
||||
"document_type", ""
|
||||
),
|
||||
"metadata": doc.get("document", {}).get("metadata", {}),
|
||||
},
|
||||
}
|
||||
for i, doc in enumerate(documents)
|
||||
]
|
||||
|
||||
|
||||
# Rerank documents using the user's query
|
||||
reranked_docs = reranker_service.rerank_documents(user_query + "\n" + reformulated_query, reranker_input_docs)
|
||||
|
||||
reranked_docs = reranker_service.rerank_documents(
|
||||
user_query + "\n" + reformulated_query, reranker_input_docs
|
||||
)
|
||||
|
||||
# Sort by score in descending order
|
||||
reranked_docs.sort(key=lambda x: x.get("score", 0), reverse=True)
|
||||
|
||||
print(f"Reranked {len(reranked_docs)} documents for Q&A query: {user_query}")
|
||||
except Exception as e:
|
||||
print(f"Error during reranking: {str(e)}")
|
||||
# Use original docs if reranking fails
|
||||
|
||||
return {
|
||||
"reranked_documents": reranked_docs
|
||||
}
|
||||
|
||||
async def answer_question(state: State, config: RunnableConfig) -> Dict[str, Any]:
|
||||
print(
|
||||
f"Reranked {len(reranked_docs)} documents for Q&A query: {user_query}"
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"Error during reranking: {e!s}")
|
||||
# Use original docs if reranking fails
|
||||
|
||||
return {"reranked_documents": reranked_docs}
|
||||
|
||||
|
||||
async def answer_question(state: State, config: RunnableConfig) -> dict[str, Any]:
|
||||
"""
|
||||
Answer the user's question using the provided documents.
|
||||
|
||||
|
||||
This node takes the relevant documents provided in the configuration and uses
|
||||
an LLM to generate a comprehensive answer to the user's question with
|
||||
proper citations. The citations follow IEEE format using source IDs from the
|
||||
documents. If no documents are provided, it will use chat history to generate
|
||||
an answer.
|
||||
|
||||
|
||||
Returns:
|
||||
Dict containing the final answer in the "final_answer" key.
|
||||
"""
|
||||
from app.services.llm_service import get_user_fast_llm
|
||||
|
||||
|
||||
# Get configuration and relevant documents from configuration
|
||||
configuration = Configuration.from_runnable_config(config)
|
||||
documents = state.reranked_documents
|
||||
user_query = configuration.user_query
|
||||
user_id = configuration.user_id
|
||||
|
||||
|
||||
# Get user's fast LLM
|
||||
llm = await get_user_fast_llm(state.db_session, user_id)
|
||||
if not llm:
|
||||
error_message = f"No fast LLM configured for user {user_id}"
|
||||
print(error_message)
|
||||
raise RuntimeError(error_message)
|
||||
|
||||
|
||||
# Determine if we have documents and optimize for token limits
|
||||
has_documents_initially = documents and len(documents) > 0
|
||||
|
||||
|
||||
if has_documents_initially:
|
||||
# Create base message template for token calculation (without documents)
|
||||
base_human_message_template = f"""
|
||||
|
|
@ -114,41 +122,49 @@ async def answer_question(state: State, config: RunnableConfig) -> Dict[str, Any
|
|||
|
||||
Please provide a detailed, comprehensive answer to the user's question using the information from their personal knowledge sources. Make sure to cite all information appropriately and engage in a conversational manner.
|
||||
"""
|
||||
|
||||
|
||||
# Use initial system prompt for token calculation
|
||||
initial_system_prompt = get_qna_citation_system_prompt()
|
||||
base_messages = state.chat_history + [
|
||||
base_messages = [
|
||||
*state.chat_history,
|
||||
SystemMessage(content=initial_system_prompt),
|
||||
HumanMessage(content=base_human_message_template)
|
||||
HumanMessage(content=base_human_message_template),
|
||||
]
|
||||
|
||||
|
||||
# Optimize documents to fit within token limits
|
||||
optimized_documents, has_optimized_documents = optimize_documents_for_token_limit(
|
||||
documents, base_messages, llm.model
|
||||
optimized_documents, has_optimized_documents = (
|
||||
optimize_documents_for_token_limit(documents, base_messages, llm.model)
|
||||
)
|
||||
|
||||
|
||||
# Update state based on optimization result
|
||||
documents = optimized_documents
|
||||
has_documents = has_optimized_documents
|
||||
else:
|
||||
has_documents = False
|
||||
|
||||
|
||||
# Choose system prompt based on final document availability
|
||||
system_prompt = get_qna_citation_system_prompt() if has_documents else get_qna_no_documents_system_prompt()
|
||||
|
||||
system_prompt = (
|
||||
get_qna_citation_system_prompt()
|
||||
if has_documents
|
||||
else get_qna_no_documents_system_prompt()
|
||||
)
|
||||
|
||||
# Generate documents section
|
||||
documents_text = format_documents_section(
|
||||
documents,
|
||||
"Source material from your personal knowledge base"
|
||||
) if has_documents else ""
|
||||
|
||||
documents_text = (
|
||||
format_documents_section(
|
||||
documents, "Source material from your personal knowledge base"
|
||||
)
|
||||
if has_documents
|
||||
else ""
|
||||
)
|
||||
|
||||
# Create final human message content
|
||||
instruction_text = (
|
||||
"Please provide a detailed, comprehensive answer to the user's question using the information from their personal knowledge sources. Make sure to cite all information appropriately and engage in a conversational manner."
|
||||
if has_documents else
|
||||
"Please provide a helpful answer to the user's question based on our conversation history and your general knowledge. Engage in a conversational manner."
|
||||
if has_documents
|
||||
else "Please provide a helpful answer to the user's question based on our conversation history and your general knowledge. Engage in a conversational manner."
|
||||
)
|
||||
|
||||
|
||||
human_message_content = f"""
|
||||
{documents_text}
|
||||
|
||||
|
|
@ -159,22 +175,20 @@ async def answer_question(state: State, config: RunnableConfig) -> Dict[str, Any
|
|||
|
||||
{instruction_text}
|
||||
"""
|
||||
|
||||
|
||||
# Create final messages for the LLM
|
||||
messages_with_chat_history = state.chat_history + [
|
||||
messages_with_chat_history = [
|
||||
*state.chat_history,
|
||||
SystemMessage(content=system_prompt),
|
||||
HumanMessage(content=human_message_content)
|
||||
HumanMessage(content=human_message_content),
|
||||
]
|
||||
|
||||
|
||||
# Log final token count
|
||||
total_tokens = calculate_token_count(messages_with_chat_history, llm.model)
|
||||
print(f"Final token count: {total_tokens}")
|
||||
|
||||
|
||||
|
||||
# Call the LLM and get the response
|
||||
response = await llm.ainvoke(messages_with_chat_history)
|
||||
final_answer = response.content
|
||||
|
||||
return {
|
||||
"final_answer": final_answer
|
||||
}
|
||||
|
||||
return {"final_answer": final_answer}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue