Fixed all ruff lint and formatting errors

2026-05-09 07:42:39 +02:00 · 2025-07-24 14:43:48 -07:00 · 2025-07-24 14:43:48 -07:00 · d359a59f6d
commit d359a59f6d
parent 0a03c42cc5
85 changed files with 5520 additions and 3870 deletions
--- a/surfsense_backend/app/agents/researcher/qna_agent/nodes.py
+++ b/surfsense_backend/app/agents/researcher/qna_agent/nodes.py
@ -1,24 +1,28 @@
-from app.services.reranker_service import RerankerService
-from .configuration import Configuration
-from langchain_core.runnables import RunnableConfig
-from .state import State
-from typing import Any, Dict
-from .prompts import get_qna_citation_system_prompt, get_qna_no_documents_system_prompt
-from langchain_core.messages import HumanMessage, SystemMessage
-from ..utils import (
-    optimize_documents_for_token_limit, 
-    calculate_token_count,
-    format_documents_section
-) 
+from typing import Any

-async def rerank_documents(state: State, config: RunnableConfig) -> Dict[str, Any]:
+from langchain_core.messages import HumanMessage, SystemMessage
+from langchain_core.runnables import RunnableConfig
+
+from app.services.reranker_service import RerankerService
+
+from ..utils import (
+    calculate_token_count,
+    format_documents_section,
+    optimize_documents_for_token_limit,
+)
+from .configuration import Configuration
+from .prompts import get_qna_citation_system_prompt, get_qna_no_documents_system_prompt
+from .state import State
+
+
+async def rerank_documents(state: State, config: RunnableConfig) -> dict[str, Any]:
    """
    Rerank the documents based on relevance to the user's question.
-    
+
    This node takes the relevant documents provided in the configuration,
    reranks them using the reranker service based on the user's query,
    and updates the state with the reranked documents.
-    
+
    Returns:
        Dict containing the reranked documents.
    """
@ -30,16 +34,14 @@ async def rerank_documents(state: State, config: RunnableConfig) -> Dict[str, An

    # If no documents were provided, return empty list
    if not documents or len(documents) == 0:
-        return {
-            "reranked_documents": []
-        }
-    
+        return {"reranked_documents": []}
+
    # Get reranker service from app config
    reranker_service = RerankerService.get_reranker_instance()
-    
+
    # Use documents as is if no reranker service is available
    reranked_docs = documents
-    
+
    if reranker_service:
        try:
            # Convert documents to format expected by reranker if needed
@ -51,58 +53,64 @@ async def rerank_documents(state: State, config: RunnableConfig) -> Dict[str, An
                    "document": {
                        "id": doc.get("document", {}).get("id", ""),
                        "title": doc.get("document", {}).get("title", ""),
-                        "document_type": doc.get("document", {}).get("document_type", ""),
-                        "metadata": doc.get("document", {}).get("metadata", {})
-                    }
-                } for i, doc in enumerate(documents)
+                        "document_type": doc.get("document", {}).get(
+                            "document_type", ""
+                        ),
+                        "metadata": doc.get("document", {}).get("metadata", {}),
+                    },
+                }
+                for i, doc in enumerate(documents)
            ]
-            
+
            # Rerank documents using the user's query
-            reranked_docs = reranker_service.rerank_documents(user_query + "\n" + reformulated_query, reranker_input_docs)  
-            
+            reranked_docs = reranker_service.rerank_documents(
+                user_query + "\n" + reformulated_query, reranker_input_docs
+            )
+
            # Sort by score in descending order
            reranked_docs.sort(key=lambda x: x.get("score", 0), reverse=True)
-            
-            print(f"Reranked {len(reranked_docs)} documents for Q&A query: {user_query}")
-        except Exception as e:
-            print(f"Error during reranking: {str(e)}")
-            # Use original docs if reranking fails
-    
-    return {
-        "reranked_documents": reranked_docs
-    }

-async def answer_question(state: State, config: RunnableConfig) -> Dict[str, Any]:
+            print(
+                f"Reranked {len(reranked_docs)} documents for Q&A query: {user_query}"
+            )
+        except Exception as e:
+            print(f"Error during reranking: {e!s}")
+            # Use original docs if reranking fails
+
+    return {"reranked_documents": reranked_docs}
+
+
+async def answer_question(state: State, config: RunnableConfig) -> dict[str, Any]:
    """
    Answer the user's question using the provided documents.
-    
+
    This node takes the relevant documents provided in the configuration and uses
    an LLM to generate a comprehensive answer to the user's question with
    proper citations. The citations follow IEEE format using source IDs from the
    documents. If no documents are provided, it will use chat history to generate
    an answer.
-    
+
    Returns:
        Dict containing the final answer in the "final_answer" key.
    """
    from app.services.llm_service import get_user_fast_llm
-    
+
    # Get configuration and relevant documents from configuration
    configuration = Configuration.from_runnable_config(config)
    documents = state.reranked_documents
    user_query = configuration.user_query
    user_id = configuration.user_id
-    
+
    # Get user's fast LLM
    llm = await get_user_fast_llm(state.db_session, user_id)
    if not llm:
        error_message = f"No fast LLM configured for user {user_id}"
        print(error_message)
        raise RuntimeError(error_message)
-    
+
    # Determine if we have documents and optimize for token limits
    has_documents_initially = documents and len(documents) > 0
-    
+
    if has_documents_initially:
        # Create base message template for token calculation (without documents)
        base_human_message_template = f"""
@ -114,41 +122,49 @@ async def answer_question(state: State, config: RunnableConfig) -> Dict[str, Any
        
        Please provide a detailed, comprehensive answer to the user's question using the information from their personal knowledge sources. Make sure to cite all information appropriately and engage in a conversational manner.
        """
-        
+
        # Use initial system prompt for token calculation
        initial_system_prompt = get_qna_citation_system_prompt()
-        base_messages = state.chat_history + [
+        base_messages = [
+            *state.chat_history,
            SystemMessage(content=initial_system_prompt),
-            HumanMessage(content=base_human_message_template)
+            HumanMessage(content=base_human_message_template),
        ]
-        
+
        # Optimize documents to fit within token limits
-        optimized_documents, has_optimized_documents = optimize_documents_for_token_limit(
-            documents, base_messages, llm.model
+        optimized_documents, has_optimized_documents = (
+            optimize_documents_for_token_limit(documents, base_messages, llm.model)
        )
-        
+
        # Update state based on optimization result
        documents = optimized_documents
        has_documents = has_optimized_documents
    else:
        has_documents = False
-    
+
    # Choose system prompt based on final document availability
-    system_prompt = get_qna_citation_system_prompt() if has_documents else get_qna_no_documents_system_prompt()
-    
+    system_prompt = (
+        get_qna_citation_system_prompt()
+        if has_documents
+        else get_qna_no_documents_system_prompt()
+    )
+
    # Generate documents section
-    documents_text = format_documents_section(
-        documents, 
-        "Source material from your personal knowledge base"
-    ) if has_documents else ""
-    
+    documents_text = (
+        format_documents_section(
+            documents, "Source material from your personal knowledge base"
+        )
+        if has_documents
+        else ""
+    )
+
    # Create final human message content
    instruction_text = (
        "Please provide a detailed, comprehensive answer to the user's question using the information from their personal knowledge sources. Make sure to cite all information appropriately and engage in a conversational manner."
-        if has_documents else
-        "Please provide a helpful answer to the user's question based on our conversation history and your general knowledge. Engage in a conversational manner."
+        if has_documents
+        else "Please provide a helpful answer to the user's question based on our conversation history and your general knowledge. Engage in a conversational manner."
    )
-    
+
    human_message_content = f"""
    {documents_text}
    
@ -159,22 +175,20 @@ async def answer_question(state: State, config: RunnableConfig) -> Dict[str, Any
    
    {instruction_text}
    """
-    
+
    # Create final messages for the LLM
-    messages_with_chat_history = state.chat_history + [
+    messages_with_chat_history = [
+        *state.chat_history,
        SystemMessage(content=system_prompt),
-        HumanMessage(content=human_message_content)
+        HumanMessage(content=human_message_content),
    ]
-    
+
    # Log final token count
    total_tokens = calculate_token_count(messages_with_chat_history, llm.model)
    print(f"Final token count: {total_tokens}")
-    
-    
+
    # Call the LLM and get the response
    response = await llm.ainvoke(messages_with_chat_history)
    final_answer = response.content
-    
-    return {
-        "final_answer": final_answer
-    }
+
+    return {"final_answer": final_answer}