chore: cleanup

2026-07-02 22:01:05 +02:00 · 2026-01-07 19:07:06 -08:00 · 2026-01-07 19:07:06 -08:00 · 48fc70a08b
commit 48fc70a08b
parent 33ab74f698
22 changed files with 8 additions and 1540 deletions
--- a/surfsense_backend/.env.example
+++ b/surfsense_backend/.env.example
@ -55,10 +55,11 @@ DISCORD_CLIENT_SECRET=your_discord_client_secret_here
 DISCORD_REDIRECT_URI=http://localhost:8000/api/v1/auth/discord/connector/callback
 DISCORD_BOT_TOKEN=your_bot_token_from_developer_portal

-# Jira OAuth Configuration
-JIRA_CLIENT_ID=your_jira_client_id_here
-JIRA_CLIENT_SECRET=your_jira_client_secret_here
+# Atlassian OAuth Configuration
+ATLASSIAN_CLIENT_ID=V4Axk5VLcsAKJxffMjRGSHtlh17uVswl
+ATLASSIAN_CLIENT_SECRET=ATOAmjcoJ_wpyr98F5nF9BVZFDtXpLHs53YnK8TVQhjJh2LuRPYrnDirBwW5lV5cWRbK9B430F02
 JIRA_REDIRECT_URI=http://localhost:8000/api/v1/auth/jira/connector/callback
+CONFLUENCE_REDIRECT_URI=http://localhost:8000/api/v1/auth/confluence/connector/callback

 # Linear OAuth Configuration
 LINEAR_CLIENT_ID=your_linear_client_id_here
--- a/surfsense_backend/app/services/docling_service.py
+++ b/surfsense_backend/app/services/docling_service.py
@ -128,42 +128,6 @@ class DoclingService:
            logger.error(f"❌ Docling initialization failed: {e}")
            raise RuntimeError(f"Docling initialization failed: {e}") from e

-    def _configure_easyocr_local_models(self):
-        """Configure EasyOCR to use pre-downloaded local models."""
-        try:
-            import os
-
-            import easyocr
-
-            # Set SSL environment for EasyOCR downloads
-            os.environ["CURL_CA_BUNDLE"] = ""
-            os.environ["REQUESTS_CA_BUNDLE"] = ""
-
-            # Try to use local models first, fallback to download if needed
-            try:
-                reader = easyocr.Reader(
-                    ["en"],
-                    download_enabled=False,
-                    model_storage_directory="/root/.EasyOCR/model",
-                )
-                logger.info("✅ EasyOCR configured for local models")
-                return reader
-            except Exception:
-                # If local models fail, allow download with SSL bypass
-                logger.info(
-                    "🔄 Local models failed, attempting download with SSL bypass..."
-                )
-                reader = easyocr.Reader(
-                    ["en"],
-                    download_enabled=True,
-                    model_storage_directory="/root/.EasyOCR/model",
-                )
-                logger.info("✅ EasyOCR configured with downloaded models")
-                return reader
-        except Exception as e:
-            logger.warning(f"⚠️ EasyOCR configuration failed: {e}")
-            return None
-
    async def process_document(
        self, file_path: str, filename: str | None = None
    ) -> dict[str, Any]:
--- a/surfsense_backend/app/services/llm_service.py
+++ b/surfsense_backend/app/services/llm_service.py
@ -342,40 +342,7 @@ async def get_document_summary_llm(
    )


-# Backward-compatible aliases (deprecated - will be removed in future versions)
-async def get_user_llm_instance(
-    session: AsyncSession, user_id: str, search_space_id: int, role: str
-) -> ChatLiteLLM | None:
-    """
-    Deprecated: Use get_search_space_llm_instance instead.
-    LLM preferences are now stored at the search space level, not per-user.
-    """
-    return await get_search_space_llm_instance(session, search_space_id, role)
-
-
-# Legacy aliases for backward compatibility
-async def get_long_context_llm(
-    session: AsyncSession, search_space_id: int
-) -> ChatLiteLLM | None:
-    """Deprecated: Use get_document_summary_llm instead."""
-    return await get_document_summary_llm(session, search_space_id)
-
-
-async def get_fast_llm(
-    session: AsyncSession, search_space_id: int
-) -> ChatLiteLLM | None:
-    """Deprecated: Use get_agent_llm instead."""
-    return await get_agent_llm(session, search_space_id)
-
-
-async def get_strategic_llm(
-    session: AsyncSession, search_space_id: int
-) -> ChatLiteLLM | None:
-    """Deprecated: Use get_document_summary_llm instead."""
-    return await get_document_summary_llm(session, search_space_id)
-
-
-# User-based legacy aliases (LLM preferences are now per-search-space, not per-user)
+# Backward-compatible alias (LLM preferences are now per-search-space, not per-user)
 async def get_user_long_context_llm(
    session: AsyncSession, user_id: str, search_space_id: int
 ) -> ChatLiteLLM | None:
@ -384,23 +351,3 @@ async def get_user_long_context_llm(
    The user_id parameter is ignored as LLM preferences are now per-search-space.
    """
    return await get_document_summary_llm(session, search_space_id)
-
-
-async def get_user_fast_llm(
-    session: AsyncSession, user_id: str, search_space_id: int
-) -> ChatLiteLLM | None:
-    """
-    Deprecated: Use get_agent_llm instead.
-    The user_id parameter is ignored as LLM preferences are now per-search-space.
-    """
-    return await get_agent_llm(session, search_space_id)
-
-
-async def get_user_strategic_llm(
-    session: AsyncSession, user_id: str, search_space_id: int
-) -> ChatLiteLLM | None:
-    """
-    Deprecated: Use get_document_summary_llm instead.
-    The user_id parameter is ignored as LLM preferences are now per-search-space.
-    """
-    return await get_document_summary_llm(session, search_space_id)
--- a/surfsense_backend/app/services/query_service.py
+++ b/surfsense_backend/app/services/query_service.py
@ -1,114 +0,0 @@
-import datetime
-from typing import Any
-
-from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
-from sqlalchemy.ext.asyncio import AsyncSession
-
-from app.services.llm_service import get_document_summary_llm
-
-
-class QueryService:
-    """
-    Service for query-related operations, including reformulation and processing.
-    """
-
-    @staticmethod
-    async def reformulate_query_with_chat_history(
-        user_query: str,
-        session: AsyncSession,
-        search_space_id: int,
-        chat_history_str: str | None = None,
-    ) -> str:
-        """
-        Reformulate the user query using the search space's document summary LLM to make it more
-        effective for information retrieval and research purposes.
-
-        Args:
-            user_query: The original user query
-            session: Database session for accessing LLM configs
-            search_space_id: Search Space ID to get LLM preferences
-            chat_history_str: Optional chat history string
-
-        Returns:
-            str: The reformulated query
-        """
-        if not user_query or not user_query.strip():
-            return user_query
-
-        try:
-            # Get the search space's document summary LLM instance
-            llm = await get_document_summary_llm(session, search_space_id)
-            if not llm:
-                print(
-                    f"Warning: No document summary LLM configured for search space {search_space_id}. Using original query."
-                )
-                return user_query
-
-            # Create system message with instructions
-            system_message = SystemMessage(
-                content=f"""
-                Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")}
-                You are a highly skilled AI assistant specializing in query optimization for advanced research.
-                Your primary objective is to transform a user's initial query into a highly effective search query.
-                This reformulated query will be used to retrieve information from diverse data sources.
-
-                **Chat History Context:**
-                {chat_history_str if chat_history_str else "No prior conversation history is available."}
-                If chat history is provided, analyze it to understand the user's evolving information needs and the broader context of their request. Use this understanding to refine the current query, ensuring it builds upon or clarifies previous interactions.
-
-                **Query Reformulation Guidelines:**
-                Your reformulated query should:
-                1.  **Enhance Specificity and Detail:** Add precision to narrow the search focus effectively, making the query less ambiguous and more targeted.
-                2.  **Resolve Ambiguities:** Identify and clarify vague terms or phrases. If a term has multiple meanings, orient the query towards the most likely one given the context.
-                3.  **Expand Key Concepts:** Incorporate relevant synonyms, related terms, and alternative phrasings for core concepts. This helps capture a wider range of relevant documents.
-                4.  **Deconstruct Complex Questions:** If the original query is multifaceted, break it down into its core searchable components or rephrase it to address each aspect clearly. The final output must still be a single, coherent query string.
-                5.  **Optimize for Comprehensiveness:** Ensure the query is structured to uncover all essential facets of the original request, aiming for thorough information retrieval suitable for research.
-                6.  **Maintain User Intent:** The reformulated query must stay true to the original intent of the user's query. Do not introduce new topics or shift the focus significantly.
-
-                **Crucial Constraints:**
-                *   **Conciseness and Effectiveness:** While aiming for comprehensiveness, the reformulated query MUST be as concise as possible. Eliminate all unnecessary verbosity. Focus on essential keywords, entities, and concepts that directly contribute to effective retrieval.
-                *   **Single, Direct Output:** Return ONLY the reformulated query itself. Do NOT include any explanations, introductory phrases (e.g., "Reformulated query:", "Here is the optimized query:"), or any other surrounding text or markdown formatting.
-
-                Your output should be a single, optimized query string, ready for immediate use in a search system.
-                """
-            )
-
-            # Create human message with the user query
-            human_message = HumanMessage(
-                content=f"Reformulate this query for better research results: {user_query}"
-            )
-
-            # Get the response from the LLM
-            response = await llm.agenerate(messages=[[system_message, human_message]])
-
-            # Extract the reformulated query from the response
-            reformulated_query = response.generations[0][0].text.strip()
-
-            # Return the original query if the reformulation is empty
-            if not reformulated_query:
-                return user_query
-
-            return reformulated_query
-
-        except Exception as e:
-            # Log the error and return the original query
-            print(f"Error reformulating query: {e}")
-            return user_query
-
-    @staticmethod
-    async def langchain_chat_history_to_str(chat_history: list[Any]) -> str:
-        """
-        Convert a list of chat history messages to a string.
-        """
-        chat_history_str = "<chat_history>\n"
-
-        for chat_message in chat_history:
-            if isinstance(chat_message, HumanMessage):
-                chat_history_str += f"<user>{chat_message.content}</user>\n"
-            elif isinstance(chat_message, AIMessage):
-                chat_history_str += f"<assistant>{chat_message.content}</assistant>\n"
-            elif isinstance(chat_message, SystemMessage):
-                chat_history_str += f"<system>{chat_message.content}</system>\n"
-
-        chat_history_str += "</chat_history>"
-        return chat_history_str
--- a/surfsense_backend/app/utils/document_converters.py
+++ b/surfsense_backend/app/utils/document_converters.py
@ -222,88 +222,6 @@ async def convert_document_to_markdown(elements):
    return "".join(markdown_parts)


-def convert_chunks_to_langchain_documents(chunks):
-    """
-    Convert chunks from hybrid search results to LangChain Document objects.
-
-    Args:
-        chunks: List of chunk dictionaries from hybrid search results
-
-    Returns:
-        List of LangChain Document objects
-    """
-    try:
-        from langchain_core.documents import Document as LangChainDocument
-    except ImportError:
-        raise ImportError(
-            "LangChain is not installed. Please install it with `pip install langchain langchain-core`"
-        ) from None
-
-    langchain_docs = []
-
-    for chunk in chunks:
-        # Extract content from the chunk
-        content = chunk.get("content", "")
-
-        # Create metadata dictionary
-        metadata = {
-            "chunk_id": chunk.get("chunk_id"),
-            "score": chunk.get("score"),
-            "rank": chunk.get("rank") if "rank" in chunk else None,
-        }
-
-        # Add document information to metadata
-        if "document" in chunk:
-            doc = chunk["document"]
-            metadata.update(
-                {
-                    "document_id": doc.get("id"),
-                    "document_title": doc.get("title"),
-                    "document_type": doc.get("document_type"),
-                }
-            )
-
-            # Add document metadata if available
-            if "metadata" in doc:
-                # Prefix document metadata keys to avoid conflicts
-                doc_metadata = {
-                    f"doc_meta_{k}": v for k, v in doc.get("metadata", {}).items()
-                }
-                metadata.update(doc_metadata)
-
-                # Add source URL if available in metadata
-                if "url" in doc.get("metadata", {}):
-                    metadata["source"] = doc["metadata"]["url"]
-                elif "sourceURL" in doc.get("metadata", {}):
-                    metadata["source"] = doc["metadata"]["sourceURL"]
-
-        # Ensure source_id is set for citation purposes
-        # Use document_id as the source_id if available
-        if "document_id" in metadata:
-            metadata["source_id"] = metadata["document_id"]
-
-        # Update content for citation mode - format as XML with explicit source_id
-        new_content = f"""
-        <document>
-            <metadata>
-                <source_id>{metadata.get("source_id", metadata.get("document_id", "unknown"))}</source_id>
-            </metadata>
-            <content>
-                <text>
-                    {content}
-                </text>
-            </content>
-        </document>
-        """
-
-        # Create LangChain Document
-        langchain_doc = LangChainDocument(page_content=new_content, metadata=metadata)
-
-        langchain_docs.append(langchain_doc)
-
-    return langchain_docs
-
-
 def generate_content_hash(content: str, search_space_id: int) -> str:
    """Generate SHA-256 hash for the given content combined with search space ID."""
    combined_data = f"{search_space_id}:{content}"