chore: update configuration for rerankers

- Added RERANKERS_ENABLED option to control reranking functionality. - Updated rerank_documents function to handle cases when reranking is disabled. - Enhanced documentation for environment variables related to rerankers in installation guides.
2026-06-10 20:35:17 +02:00 · 2025-10-29 23:23:08 -07:00 · 2025-10-29 23:23:08 -07:00 · 0987440893
commit 0987440893
parent b71e1546b8
5 changed files with 62 additions and 51 deletions
--- a/surfsense_backend/.env.example
+++ b/surfsense_backend/.env.example
@ -3,6 +3,7 @@ DATABASE_URL=postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense
 #Celery Config
 CELERY_BROKER_URL=redis://localhost:6379/0
 CELERY_RESULT_BACKEND=redis://localhost:6379/0
+
 # Periodic task interval
 # # Run every minute (default)
 # SCHEDULE_CHECKER_INTERVAL=1m
@ -18,7 +19,6 @@ CELERY_RESULT_BACKEND=redis://localhost:6379/0

 # # Run every 2 hours
 # SCHEDULE_CHECKER_INTERVAL=2h
-
 SCHEDULE_CHECKER_INTERVAL=5m

 SECRET_KEY=SECRET
@ -26,14 +26,16 @@ NEXT_FRONTEND_URL=http://localhost:3000

 # Auth
 AUTH_TYPE=GOOGLE or LOCAL
-REGISTRATION_ENABLED= TRUE or FALSE
+REGISTRATION_ENABLED=TRUE or FALSE
 # For Google Auth Only
 GOOGLE_OAUTH_CLIENT_ID=924507538m
 GOOGLE_OAUTH_CLIENT_SECRET=GOCSV
+
+# Connector Specific Configs
 GOOGLE_CALENDAR_REDIRECT_URI=http://localhost:8000/api/v1/auth/google/calendar/connector/callback
 GOOGLE_GMAIL_REDIRECT_URI=http://localhost:8000/api/v1/auth/google/gmail/connector/callback

-# Airtable OAuth
+# Airtable OAuth for Aitable Connector
 AIRTABLE_CLIENT_ID=your_airtable_client_id
 AIRTABLE_CLIENT_SECRET=your_airtable_client_secret
 AIRTABLE_REDIRECT_URI=http://localhost:8000/api/v1/auth/airtable/connector/callback
@ -51,20 +53,21 @@ AIRTABLE_REDIRECT_URI=http://localhost:8000/api/v1/auth/airtable/connector/callb

 #     # Get Cohere embeddings
 #     embeddings = AutoEmbeddings.get_embeddings("cohere://embed-english-light-v3.0", api_key="...")
-
 EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2

+# Rerankers Config
+RERANKERS_ENABLED=TRUE or FALSE(Default: FALSE)
 RERANKERS_MODEL_NAME=ms-marco-MiniLM-L-12-v2
 RERANKERS_MODEL_TYPE=flashrank


 # TTS_SERVICE=local/kokoro for local Kokoro TTS or
 # LiteLLM TTS Provider: https://docs.litellm.ai/docs/text_to_speech#supported-providers
-TTS_SERVICE=openai/tts-1
+TTS_SERVICE=local/kokoro
 # Respective TTS Service API
-TTS_SERVICE_API_KEY=
+# TTS_SERVICE_API_KEY=
 # OPTIONAL: TTS Provider API Base
-TTS_SERVICE_API_BASE=
+# TTS_SERVICE_API_BASE=

 # STT Service Configuration
 # For local Faster-Whisper: local/MODEL_SIZE (tiny, base, small, medium, large-v3)
--- a/surfsense_backend/app/agents/researcher/qna_agent/nodes.py
+++ b/surfsense_backend/app/agents/researcher/qna_agent/nodes.py
@ -24,6 +24,8 @@ async def rerank_documents(state: State, config: RunnableConfig) -> dict[str, An
    reranks them using the reranker service based on the user's query,
    and updates the state with the reranked documents.

+    If reranking is disabled, returns the original documents without processing.
+
    Returns:
        Dict containing the reranked documents.
    """
@ -40,45 +42,45 @@ async def rerank_documents(state: State, config: RunnableConfig) -> dict[str, An
    # Get reranker service from app config
    reranker_service = RerankerService.get_reranker_instance()

-    # Use documents as is if no reranker service is available
-    reranked_docs = documents
+    # If reranking is not enabled, return original documents without processing
+    if not reranker_service:
+        print("Reranking is disabled. Using original document order.")
+        return {"reranked_documents": documents}

-    if reranker_service:
-        try:
-            # Convert documents to format expected by reranker if needed
-            reranker_input_docs = [
-                {
-                    "chunk_id": doc.get("chunk_id", f"chunk_{i}"),
-                    "content": doc.get("content", ""),
-                    "score": doc.get("score", 0.0),
-                    "document": {
-                        "id": doc.get("document", {}).get("id", ""),
-                        "title": doc.get("document", {}).get("title", ""),
-                        "document_type": doc.get("document", {}).get(
-                            "document_type", ""
-                        ),
-                        "metadata": doc.get("document", {}).get("metadata", {}),
-                    },
-                }
-                for i, doc in enumerate(documents)
-            ]
+    # Perform reranking
+    try:
+        # Convert documents to format expected by reranker if needed
+        reranker_input_docs = [
+            {
+                "chunk_id": doc.get("chunk_id", f"chunk_{i}"),
+                "content": doc.get("content", ""),
+                "score": doc.get("score", 0.0),
+                "document": {
+                    "id": doc.get("document", {}).get("id", ""),
+                    "title": doc.get("document", {}).get("title", ""),
+                    "document_type": doc.get("document", {}).get("document_type", ""),
+                    "metadata": doc.get("document", {}).get("metadata", {}),
+                },
+            }
+            for i, doc in enumerate(documents)
+        ]

-            # Rerank documents using the user's query
-            reranked_docs = reranker_service.rerank_documents(
-                user_query + "\n" + reformulated_query, reranker_input_docs
-            )
+        # Rerank documents using the user's query
+        reranked_docs = reranker_service.rerank_documents(
+            user_query + "\n" + reformulated_query, reranker_input_docs
+        )

-            # Sort by score in descending order
-            reranked_docs.sort(key=lambda x: x.get("score", 0), reverse=True)
+        # Sort by score in descending order
+        reranked_docs.sort(key=lambda x: x.get("score", 0), reverse=True)

-            print(
-                f"Reranked {len(reranked_docs)} documents for Q&A query: {user_query}"
-            )
-        except Exception as e:
-            print(f"Error during reranking: {e!s}")
-            # Use original docs if reranking fails
+        print(f"Reranked {len(reranked_docs)} documents for Q&A query: {user_query}")

-    return {"reranked_documents": reranked_docs}
+        return {"reranked_documents": reranked_docs}
+
+    except Exception as e:
+        print(f"Error during reranking: {e!s}")
+        # Fall back to original documents if reranking fails
+        return {"reranked_documents": documents}


 async def answer_question(state: State, config: RunnableConfig) -> dict[str, Any]:
--- a/surfsense_backend/app/config/init.py
+++ b/surfsense_backend/app/config/init.py
@ -92,12 +92,16 @@ class Config:
    )

    # Reranker's Configuration | Pinecode, Cohere etc. Read more at https://github.com/AnswerDotAI/rerankers?tab=readme-ov-file#usage
-    RERANKERS_MODEL_NAME = os.getenv("RERANKERS_MODEL_NAME")
-    RERANKERS_MODEL_TYPE = os.getenv("RERANKERS_MODEL_TYPE")
-    reranker_instance = Reranker(
-        model_name=RERANKERS_MODEL_NAME,
-        model_type=RERANKERS_MODEL_TYPE,
-    )
+    RERANKERS_ENABLED = os.getenv("RERANKERS_ENABLED", "FALSE").upper() == "TRUE"
+    if RERANKERS_ENABLED:
+        RERANKERS_MODEL_NAME = os.getenv("RERANKERS_MODEL_NAME")
+        RERANKERS_MODEL_TYPE = os.getenv("RERANKERS_MODEL_TYPE")
+        reranker_instance = Reranker(
+            model_name=RERANKERS_MODEL_NAME,
+            model_type=RERANKERS_MODEL_TYPE,
+        )
+    else:
+        reranker_instance = None

    # OAuth JWT
    SECRET_KEY = os.getenv("SECRET_KEY")
--- a/surfsense_web/content/docs/docker-installation.mdx
+++ b/surfsense_web/content/docs/docker-installation.mdx
@ -88,8 +88,9 @@ Before you begin, ensure you have:
 | GOOGLE_OAUTH_CLIENT_ID     | (Optional) Client ID from Google Cloud Console (required if AUTH_TYPE=GOOGLE)                                                                                                                        |
 | GOOGLE_OAUTH_CLIENT_SECRET | (Optional) Client secret from Google Cloud Console (required if AUTH_TYPE=GOOGLE)                                                                                                                    |
 | EMBEDDING_MODEL            | Name of the embedding model (e.g., `sentence-transformers/all-MiniLM-L6-v2`, `openai://text-embedding-ada-002`)                                                                                                                 |
-| RERANKERS_MODEL_NAME       | Name of the reranker model (e.g., `ms-marco-MiniLM-L-12-v2`)                                                                                                                              |
-| RERANKERS_MODEL_TYPE       | Type of reranker model (e.g., `flashrank`)                                                                                                                                                |
+| RERANKERS_ENABLED          | (Optional) Enable or disable document reranking for improved search results (e.g., `TRUE` or `FALSE`, default: `FALSE`)                                                                  |
+| RERANKERS_MODEL_NAME       | Name of the reranker model (e.g., `ms-marco-MiniLM-L-12-v2`) (required if RERANKERS_ENABLED=TRUE)                                                                                        |
+| RERANKERS_MODEL_TYPE       | Type of reranker model (e.g., `flashrank`) (required if RERANKERS_ENABLED=TRUE)                                                                                                          |
 | TTS_SERVICE                | Text-to-Speech API provider for Podcasts (e.g., `local/kokoro`, `openai/tts-1`). See [supported providers](https://docs.litellm.ai/docs/text_to_speech#supported-providers)                            |
 | TTS_SERVICE_API_KEY        | (Optional if local) API key for the Text-to-Speech service                                                                                                                                                    |
 | TTS_SERVICE_API_BASE       | (Optional) Custom API base URL for the Text-to-Speech service                                                                                                                           |
--- a/surfsense_web/content/docs/manual-installation.mdx
+++ b/surfsense_web/content/docs/manual-installation.mdx
@ -73,8 +73,9 @@ Edit the `.env` file and set the following variables:
 | GOOGLE_OAUTH_CLIENT_ID     | (Optional) Client ID from Google Cloud Console (required if AUTH_TYPE=GOOGLE)                                                                                                                        |
 | GOOGLE_OAUTH_CLIENT_SECRET | (Optional) Client secret from Google Cloud Console (required if AUTH_TYPE=GOOGLE)                                                                                                                    |
 | EMBEDDING_MODEL            | Name of the embedding model (e.g., `sentence-transformers/all-MiniLM-L6-v2`, `openai://text-embedding-ada-002`)                                                                                                                 |
-| RERANKERS_MODEL_NAME       | Name of the reranker model (e.g., `ms-marco-MiniLM-L-12-v2`)                                                                                                                              |
-| RERANKERS_MODEL_TYPE       | Type of reranker model (e.g., `flashrank`)                                                                                                                                                |
+| RERANKERS_ENABLED          | (Optional) Enable or disable document reranking for improved search results (e.g., `TRUE` or `FALSE`, default: `FALSE`)                                                                  |
+| RERANKERS_MODEL_NAME       | Name of the reranker model (e.g., `ms-marco-MiniLM-L-12-v2`) (required if RERANKERS_ENABLED=TRUE)                                                                                        |
+| RERANKERS_MODEL_TYPE       | Type of reranker model (e.g., `flashrank`) (required if RERANKERS_ENABLED=TRUE)                                                                                                          |
 | TTS_SERVICE                | Text-to-Speech API provider for Podcasts (e.g., `local/kokoro`, `openai/tts-1`). See [supported providers](https://docs.litellm.ai/docs/text_to_speech#supported-providers)                            |
 | TTS_SERVICE_API_KEY        | (Optional if local) API key for the Text-to-Speech service                                                                                                                                                    |
 | TTS_SERVICE_API_BASE       | (Optional) Custom API base URL for the Text-to-Speech service                                                                                                                           |