refactor: Move utility services to a dedicated 'services' module

- Updated import paths for LLM, connector, query, and streaming services to reflect their new location in the 'services' module. - Removed obsolete utility service files that have been migrated.
2026-04-27 09:46:25 +02:00 · 2025-07-06 17:51:24 -07:00 · 2025-07-06 17:51:24 -07:00 · 5045b7433a
commit 5045b7433a
parent 59cab99737
14 changed files with 14 additions and 13 deletions
--- a/surfsense_backend/app/utils/connector_service.py
+++ b/surfsense_backend/app/utils/connector_service.py
--- a/surfsense_backend/app/utils/llm_service.py
+++ b/surfsense_backend/app/utils/llm_service.py
@ -1,120 +0,0 @@
-from typing import Optional
-from sqlalchemy.ext.asyncio import AsyncSession
-from sqlalchemy.future import select
-from langchain_community.chat_models import ChatLiteLLM
-import logging
-
-from app.db import User, LLMConfig
-
-logger = logging.getLogger(__name__)
-
-class LLMRole:
-    LONG_CONTEXT = "long_context"
-    FAST = "fast"
-    STRATEGIC = "strategic"
-
-async def get_user_llm_instance(
-    session: AsyncSession, 
-    user_id: str, 
-    role: str
-) -> Optional[ChatLiteLLM]:
-    """
-    Get a ChatLiteLLM instance for a specific user and role.
-    
-    Args:
-        session: Database session
-        user_id: User ID
-        role: LLM role ('long_context', 'fast', or 'strategic')
-        
-    Returns:
-        ChatLiteLLM instance or None if not found
-    """
-    try:
-        # Get user with their LLM preferences
-        result = await session.execute(
-            select(User).where(User.id == user_id)
-        )
-        user = result.scalars().first()
-        
-        if not user:
-            logger.error(f"User {user_id} not found")
-            return None
-        
-        # Get the appropriate LLM config ID based on role
-        llm_config_id = None
-        if role == LLMRole.LONG_CONTEXT:
-            llm_config_id = user.long_context_llm_id
-        elif role == LLMRole.FAST:
-            llm_config_id = user.fast_llm_id
-        elif role == LLMRole.STRATEGIC:
-            llm_config_id = user.strategic_llm_id
-        else:
-            logger.error(f"Invalid LLM role: {role}")
-            return None
-        
-        if not llm_config_id:
-            logger.error(f"No {role} LLM configured for user {user_id}")
-            return None
-        
-        # Get the LLM configuration
-        result = await session.execute(
-            select(LLMConfig).where(
-                LLMConfig.id == llm_config_id,
-                LLMConfig.user_id == user_id
-            )
-        )
-        llm_config = result.scalars().first()
-        
-        if not llm_config:
-            logger.error(f"LLM config {llm_config_id} not found for user {user_id}")
-            return None
-        
-        # Build the model string for litellm
-        if llm_config.custom_provider:
-            model_string = f"{llm_config.custom_provider}/{llm_config.model_name}"
-        else:
-            # Map provider enum to litellm format
-            provider_map = {
-                "OPENAI": "openai",
-                "ANTHROPIC": "anthropic", 
-                "GROQ": "groq",
-                "COHERE": "cohere",
-                "GOOGLE": "gemini",
-                "OLLAMA": "ollama",
-                "MISTRAL": "mistral",
-                # Add more mappings as needed
-            }
-            provider_prefix = provider_map.get(llm_config.provider.value, llm_config.provider.value.lower())
-            model_string = f"{provider_prefix}/{llm_config.model_name}"
-        
-        # Create ChatLiteLLM instance
-        litellm_kwargs = {
-            "model": model_string,
-            "api_key": llm_config.api_key,
-        }
-        
-        # Add optional parameters
-        if llm_config.api_base:
-            litellm_kwargs["api_base"] = llm_config.api_base
-        
-        # Add any additional litellm parameters
-        if llm_config.litellm_params:
-            litellm_kwargs.update(llm_config.litellm_params)
-        
-        return ChatLiteLLM(**litellm_kwargs)
-        
-    except Exception as e:
-        logger.error(f"Error getting LLM instance for user {user_id}, role {role}: {str(e)}")
-        return None
-
-async def get_user_long_context_llm(session: AsyncSession, user_id: str) -> Optional[ChatLiteLLM]:
-    """Get user's long context LLM instance."""
-    return await get_user_llm_instance(session, user_id, LLMRole.LONG_CONTEXT)
-
-async def get_user_fast_llm(session: AsyncSession, user_id: str) -> Optional[ChatLiteLLM]:
-    """Get user's fast LLM instance."""
-    return await get_user_llm_instance(session, user_id, LLMRole.FAST)
-
-async def get_user_strategic_llm(session: AsyncSession, user_id: str) -> Optional[ChatLiteLLM]:
-    """Get user's strategic LLM instance."""
-    return await get_user_llm_instance(session, user_id, LLMRole.STRATEGIC) 
--- a/surfsense_backend/app/utils/query_service.py
+++ b/surfsense_backend/app/utils/query_service.py
@ -1,112 +0,0 @@
-import datetime
-from langchain.schema import HumanMessage, SystemMessage, AIMessage
-from app.config import config
-from app.utils.llm_service import get_user_strategic_llm
-from sqlalchemy.ext.asyncio import AsyncSession
-from typing import Any, List, Optional
-
-
-class QueryService:
-    """
-    Service for query-related operations, including reformulation and processing.
-    """
-
-    @staticmethod
-    async def reformulate_query_with_chat_history(
-        user_query: str, 
-        session: AsyncSession, 
-        user_id: str, 
-        chat_history_str: Optional[str] = None
-    ) -> str:
-        """
-        Reformulate the user query using the user's strategic LLM to make it more 
-        effective for information retrieval and research purposes.
-
-        Args:
-            user_query: The original user query
-            session: Database session for accessing user LLM configs
-            user_id: User ID to get their specific LLM configuration
-            chat_history_str: Optional chat history string
-
-        Returns:
-            str: The reformulated query
-        """
-        if not user_query or not user_query.strip():
-            return user_query
-
-        try:
-            # Get the user's strategic LLM instance
-            llm = await get_user_strategic_llm(session, user_id)
-            if not llm:
-                print(f"Warning: No strategic LLM configured for user {user_id}. Using original query.")
-                return user_query
-
-            # Create system message with instructions
-            system_message = SystemMessage(
-                content=f"""
-                Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")}
-                You are a highly skilled AI assistant specializing in query optimization for advanced research.
-                Your primary objective is to transform a user's initial query into a highly effective search query.
-                This reformulated query will be used to retrieve information from diverse data sources.
-
-                **Chat History Context:**
-                {chat_history_str if chat_history_str else "No prior conversation history is available."}
-                If chat history is provided, analyze it to understand the user's evolving information needs and the broader context of their request. Use this understanding to refine the current query, ensuring it builds upon or clarifies previous interactions.
-
-                **Query Reformulation Guidelines:**
-                Your reformulated query should:
-                1.  **Enhance Specificity and Detail:** Add precision to narrow the search focus effectively, making the query less ambiguous and more targeted.
-                2.  **Resolve Ambiguities:** Identify and clarify vague terms or phrases. If a term has multiple meanings, orient the query towards the most likely one given the context.
-                3.  **Expand Key Concepts:** Incorporate relevant synonyms, related terms, and alternative phrasings for core concepts. This helps capture a wider range of relevant documents.
-                4.  **Deconstruct Complex Questions:** If the original query is multifaceted, break it down into its core searchable components or rephrase it to address each aspect clearly. The final output must still be a single, coherent query string.
-                5.  **Optimize for Comprehensiveness:** Ensure the query is structured to uncover all essential facets of the original request, aiming for thorough information retrieval suitable for research.
-                6.  **Maintain User Intent:** The reformulated query must stay true to the original intent of the user's query. Do not introduce new topics or shift the focus significantly.
-
-                **Crucial Constraints:**
-                *   **Conciseness and Effectiveness:** While aiming for comprehensiveness, the reformulated query MUST be as concise as possible. Eliminate all unnecessary verbosity. Focus on essential keywords, entities, and concepts that directly contribute to effective retrieval.
-                *   **Single, Direct Output:** Return ONLY the reformulated query itself. Do NOT include any explanations, introductory phrases (e.g., "Reformulated query:", "Here is the optimized query:"), or any other surrounding text or markdown formatting.
-
-                Your output should be a single, optimized query string, ready for immediate use in a search system.
-                """
-            )
-
-            # Create human message with the user query
-            human_message = HumanMessage(
-                content=f"Reformulate this query for better research results: {user_query}"
-            )
-
-            # Get the response from the LLM
-            response = await llm.agenerate(messages=[[system_message, human_message]])
-
-            # Extract the reformulated query from the response
-            reformulated_query = response.generations[0][0].text.strip()
-
-            # Return the original query if the reformulation is empty
-            if not reformulated_query:
-                return user_query
-
-            return reformulated_query
-
-        except Exception as e:
-            # Log the error and return the original query
-            print(f"Error reformulating query: {e}")
-            return user_query
-
-
-    @staticmethod
-    async def langchain_chat_history_to_str(chat_history: List[Any]) -> str:
-        """
-        Convert a list of chat history messages to a string.
-        """
-        chat_history_str = "<chat_history>\n"
-        
-        for chat_message in chat_history:
-            if isinstance(chat_message, HumanMessage):
-                chat_history_str += f"<user>{chat_message.content}</user>\n"
-            elif isinstance(chat_message, AIMessage):
-                chat_history_str += f"<assistant>{chat_message.content}</assistant>\n"
-            elif isinstance(chat_message, SystemMessage):
-                chat_history_str += f"<system>{chat_message.content}</system>\n"
-                
-        chat_history_str += "</chat_history>"
-        return chat_history_str
--- a/surfsense_backend/app/utils/reranker_service.py
+++ b/surfsense_backend/app/utils/reranker_service.py
@ -1,95 +0,0 @@
-import logging
-from typing import List, Dict, Any, Optional
-from rerankers import Document as RerankerDocument
-
-class RerankerService:
-    """
-    Service for reranking documents using a configured reranker
-    """
-    
-    def __init__(self, reranker_instance=None):
-        """
-        Initialize the reranker service
-        
-        Args:
-            reranker_instance: The reranker instance to use for reranking
-        """
-        self.reranker_instance = reranker_instance
-    
-    def rerank_documents(self, query_text: str, documents: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
-        """
-        Rerank documents using the configured reranker
-        
-        Args:
-            query_text: The query text to use for reranking
-            documents: List of document dictionaries to rerank
-            
-        Returns:
-            List[Dict[str, Any]]: Reranked documents
-        """
-        if not self.reranker_instance or not documents:
-            return documents
-        
-        try:
-            # Create Document objects for the rerankers library
-            reranker_docs = []
-            for i, doc in enumerate(documents):
-                chunk_id = doc.get("chunk_id", f"chunk_{i}")
-                content = doc.get("content", "")
-                score = doc.get("score", 0.0)
-                document_info = doc.get("document", {})
-                
-                reranker_docs.append(
-                    RerankerDocument(
-                        text=content,
-                        doc_id=chunk_id,
-                        metadata={
-                            'document_id': document_info.get("id", ""),
-                            'document_title': document_info.get("title", ""),
-                            'document_type': document_info.get("document_type", ""),
-                            'rrf_score': score
-                        }
-                    )
-                )
-            
-            # Rerank using the configured reranker
-            reranking_results = self.reranker_instance.rank(
-                query=query_text,
-                docs=reranker_docs
-            )
-            
-            # Process the results from the reranker
-            # Convert to serializable dictionaries
-            serialized_results = []
-            for result in reranking_results.results:
-                # Find the original document by id
-                original_doc = next((doc for doc in documents if doc.get("chunk_id") == result.document.doc_id), None)
-                if original_doc:
-                    # Create a new document with the reranked score
-                    reranked_doc = original_doc.copy()
-                    reranked_doc["score"] = float(result.score)
-                    reranked_doc["rank"] = result.rank
-                    serialized_results.append(reranked_doc)
-            
-            return serialized_results
-        
-        except Exception as e:
-            # Log the error
-            logging.error(f"Error during reranking: {str(e)}")
-            # Fall back to original documents without reranking
-            return documents
-    
-    @staticmethod
-    def get_reranker_instance() -> Optional['RerankerService']:
-        """
-        Get a reranker service instance from the global configuration.
-        
-        Returns:
-            Optional[RerankerService]: A reranker service instance if configured, None otherwise
-        """
-        from app.config import config
-        
-        if hasattr(config, 'reranker_instance') and config.reranker_instance:
-            return RerankerService(config.reranker_instance)
-        return None
-        
--- a/surfsense_backend/app/utils/streaming_service.py
+++ b/surfsense_backend/app/utils/streaming_service.py
@ -1,72 +0,0 @@
-import json
-from typing import Any, Dict, List
-
-
-class StreamingService:
-    def __init__(self):
-        self.terminal_idx = 1
-        self.message_annotations = [
-            {
-                "type": "TERMINAL_INFO",
-                "content": []
-            },
-            {
-                "type": "SOURCES",
-                "content": []
-            },
-            {
-                "type": "ANSWER",
-                "content": []
-            }
-        ]
-    # It is used to send annotations to the frontend
-    def _format_annotations(self) -> str:
-        """
-        Format the annotations as a string
-        
-        Returns:
-            str: The formatted annotations string
-        """
-        return f'8:{json.dumps(self.message_annotations)}\n'
-    
-    # It is used to end Streaming
-    def format_completion(self, prompt_tokens: int = 156, completion_tokens: int = 204) -> str:
-        """
-        Format a completion message
-        
-        Args:
-            prompt_tokens: Number of prompt tokens
-            completion_tokens: Number of completion tokens
-            
-        Returns:
-            str: The formatted completion string
-        """
-        total_tokens = prompt_tokens + completion_tokens
-        completion_data = {
-            "finishReason": "stop",
-            "usage": {
-                "promptTokens": prompt_tokens,
-                "completionTokens": completion_tokens,
-                "totalTokens": total_tokens
-            }
-        }
-        return f'd:{json.dumps(completion_data)}\n' 
-    
-    def only_update_terminal(self, text: str, message_type: str = "info") -> str:
-        self.message_annotations[0]["content"].append({
-            "id": self.terminal_idx,
-            "text": text,
-            "type": message_type
-        })
-        self.terminal_idx += 1
-        return self.message_annotations
-
-    def only_update_sources(self, sources: List[Dict[str, Any]]) -> str:
-        self.message_annotations[1]["content"] = sources
-        return self.message_annotations
-    
-    def only_update_answer(self, answer: List[str]) -> str:
-        self.message_annotations[2]["content"] = answer
-        return self.message_annotations
-    
-