diff --git a/surfsense_backend/.env.example b/surfsense_backend/.env.example index 6264ee349..349cb0307 100644 --- a/surfsense_backend/.env.example +++ b/surfsense_backend/.env.example @@ -3,6 +3,7 @@ DATABASE_URL=postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense #Celery Config CELERY_BROKER_URL=redis://localhost:6379/0 CELERY_RESULT_BACKEND=redis://localhost:6379/0 + # Periodic task interval # # Run every minute (default) # SCHEDULE_CHECKER_INTERVAL=1m @@ -18,7 +19,6 @@ CELERY_RESULT_BACKEND=redis://localhost:6379/0 # # Run every 2 hours # SCHEDULE_CHECKER_INTERVAL=2h - SCHEDULE_CHECKER_INTERVAL=5m SECRET_KEY=SECRET @@ -26,14 +26,16 @@ NEXT_FRONTEND_URL=http://localhost:3000 # Auth AUTH_TYPE=GOOGLE or LOCAL -REGISTRATION_ENABLED= TRUE or FALSE +REGISTRATION_ENABLED=TRUE or FALSE # For Google Auth Only GOOGLE_OAUTH_CLIENT_ID=924507538m GOOGLE_OAUTH_CLIENT_SECRET=GOCSV + +# Connector Specific Configs GOOGLE_CALENDAR_REDIRECT_URI=http://localhost:8000/api/v1/auth/google/calendar/connector/callback GOOGLE_GMAIL_REDIRECT_URI=http://localhost:8000/api/v1/auth/google/gmail/connector/callback -# Airtable OAuth +# Airtable OAuth for Aitable Connector AIRTABLE_CLIENT_ID=your_airtable_client_id AIRTABLE_CLIENT_SECRET=your_airtable_client_secret AIRTABLE_REDIRECT_URI=http://localhost:8000/api/v1/auth/airtable/connector/callback @@ -51,20 +53,21 @@ AIRTABLE_REDIRECT_URI=http://localhost:8000/api/v1/auth/airtable/connector/callb # # Get Cohere embeddings # embeddings = AutoEmbeddings.get_embeddings("cohere://embed-english-light-v3.0", api_key="...") - EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2 +# Rerankers Config +RERANKERS_ENABLED=TRUE or FALSE(Default: FALSE) RERANKERS_MODEL_NAME=ms-marco-MiniLM-L-12-v2 RERANKERS_MODEL_TYPE=flashrank # TTS_SERVICE=local/kokoro for local Kokoro TTS or # LiteLLM TTS Provider: https://docs.litellm.ai/docs/text_to_speech#supported-providers -TTS_SERVICE=openai/tts-1 +TTS_SERVICE=local/kokoro # Respective TTS Service API -TTS_SERVICE_API_KEY= +# TTS_SERVICE_API_KEY= # OPTIONAL: TTS Provider API Base -TTS_SERVICE_API_BASE= +# TTS_SERVICE_API_BASE= # STT Service Configuration # For local Faster-Whisper: local/MODEL_SIZE (tiny, base, small, medium, large-v3) diff --git a/surfsense_backend/app/agents/researcher/qna_agent/nodes.py b/surfsense_backend/app/agents/researcher/qna_agent/nodes.py index c4e79d685..c077899c7 100644 --- a/surfsense_backend/app/agents/researcher/qna_agent/nodes.py +++ b/surfsense_backend/app/agents/researcher/qna_agent/nodes.py @@ -24,6 +24,8 @@ async def rerank_documents(state: State, config: RunnableConfig) -> dict[str, An reranks them using the reranker service based on the user's query, and updates the state with the reranked documents. + If reranking is disabled, returns the original documents without processing. + Returns: Dict containing the reranked documents. """ @@ -40,45 +42,45 @@ async def rerank_documents(state: State, config: RunnableConfig) -> dict[str, An # Get reranker service from app config reranker_service = RerankerService.get_reranker_instance() - # Use documents as is if no reranker service is available - reranked_docs = documents + # If reranking is not enabled, return original documents without processing + if not reranker_service: + print("Reranking is disabled. Using original document order.") + return {"reranked_documents": documents} - if reranker_service: - try: - # Convert documents to format expected by reranker if needed - reranker_input_docs = [ - { - "chunk_id": doc.get("chunk_id", f"chunk_{i}"), - "content": doc.get("content", ""), - "score": doc.get("score", 0.0), - "document": { - "id": doc.get("document", {}).get("id", ""), - "title": doc.get("document", {}).get("title", ""), - "document_type": doc.get("document", {}).get( - "document_type", "" - ), - "metadata": doc.get("document", {}).get("metadata", {}), - }, - } - for i, doc in enumerate(documents) - ] + # Perform reranking + try: + # Convert documents to format expected by reranker if needed + reranker_input_docs = [ + { + "chunk_id": doc.get("chunk_id", f"chunk_{i}"), + "content": doc.get("content", ""), + "score": doc.get("score", 0.0), + "document": { + "id": doc.get("document", {}).get("id", ""), + "title": doc.get("document", {}).get("title", ""), + "document_type": doc.get("document", {}).get("document_type", ""), + "metadata": doc.get("document", {}).get("metadata", {}), + }, + } + for i, doc in enumerate(documents) + ] - # Rerank documents using the user's query - reranked_docs = reranker_service.rerank_documents( - user_query + "\n" + reformulated_query, reranker_input_docs - ) + # Rerank documents using the user's query + reranked_docs = reranker_service.rerank_documents( + user_query + "\n" + reformulated_query, reranker_input_docs + ) - # Sort by score in descending order - reranked_docs.sort(key=lambda x: x.get("score", 0), reverse=True) + # Sort by score in descending order + reranked_docs.sort(key=lambda x: x.get("score", 0), reverse=True) - print( - f"Reranked {len(reranked_docs)} documents for Q&A query: {user_query}" - ) - except Exception as e: - print(f"Error during reranking: {e!s}") - # Use original docs if reranking fails + print(f"Reranked {len(reranked_docs)} documents for Q&A query: {user_query}") - return {"reranked_documents": reranked_docs} + return {"reranked_documents": reranked_docs} + + except Exception as e: + print(f"Error during reranking: {e!s}") + # Fall back to original documents if reranking fails + return {"reranked_documents": documents} async def answer_question(state: State, config: RunnableConfig) -> dict[str, Any]: diff --git a/surfsense_backend/app/config/__init__.py b/surfsense_backend/app/config/__init__.py index d37c2fad4..ec78dfbac 100644 --- a/surfsense_backend/app/config/__init__.py +++ b/surfsense_backend/app/config/__init__.py @@ -92,12 +92,16 @@ class Config: ) # Reranker's Configuration | Pinecode, Cohere etc. Read more at https://github.com/AnswerDotAI/rerankers?tab=readme-ov-file#usage - RERANKERS_MODEL_NAME = os.getenv("RERANKERS_MODEL_NAME") - RERANKERS_MODEL_TYPE = os.getenv("RERANKERS_MODEL_TYPE") - reranker_instance = Reranker( - model_name=RERANKERS_MODEL_NAME, - model_type=RERANKERS_MODEL_TYPE, - ) + RERANKERS_ENABLED = os.getenv("RERANKERS_ENABLED", "FALSE").upper() == "TRUE" + if RERANKERS_ENABLED: + RERANKERS_MODEL_NAME = os.getenv("RERANKERS_MODEL_NAME") + RERANKERS_MODEL_TYPE = os.getenv("RERANKERS_MODEL_TYPE") + reranker_instance = Reranker( + model_name=RERANKERS_MODEL_NAME, + model_type=RERANKERS_MODEL_TYPE, + ) + else: + reranker_instance = None # OAuth JWT SECRET_KEY = os.getenv("SECRET_KEY") diff --git a/surfsense_web/content/docs/docker-installation.mdx b/surfsense_web/content/docs/docker-installation.mdx index 535325b50..507003a5f 100644 --- a/surfsense_web/content/docs/docker-installation.mdx +++ b/surfsense_web/content/docs/docker-installation.mdx @@ -88,8 +88,9 @@ Before you begin, ensure you have: | GOOGLE_OAUTH_CLIENT_ID | (Optional) Client ID from Google Cloud Console (required if AUTH_TYPE=GOOGLE) | | GOOGLE_OAUTH_CLIENT_SECRET | (Optional) Client secret from Google Cloud Console (required if AUTH_TYPE=GOOGLE) | | EMBEDDING_MODEL | Name of the embedding model (e.g., `sentence-transformers/all-MiniLM-L6-v2`, `openai://text-embedding-ada-002`) | -| RERANKERS_MODEL_NAME | Name of the reranker model (e.g., `ms-marco-MiniLM-L-12-v2`) | -| RERANKERS_MODEL_TYPE | Type of reranker model (e.g., `flashrank`) | +| RERANKERS_ENABLED | (Optional) Enable or disable document reranking for improved search results (e.g., `TRUE` or `FALSE`, default: `FALSE`) | +| RERANKERS_MODEL_NAME | Name of the reranker model (e.g., `ms-marco-MiniLM-L-12-v2`) (required if RERANKERS_ENABLED=TRUE) | +| RERANKERS_MODEL_TYPE | Type of reranker model (e.g., `flashrank`) (required if RERANKERS_ENABLED=TRUE) | | TTS_SERVICE | Text-to-Speech API provider for Podcasts (e.g., `local/kokoro`, `openai/tts-1`). See [supported providers](https://docs.litellm.ai/docs/text_to_speech#supported-providers) | | TTS_SERVICE_API_KEY | (Optional if local) API key for the Text-to-Speech service | | TTS_SERVICE_API_BASE | (Optional) Custom API base URL for the Text-to-Speech service | diff --git a/surfsense_web/content/docs/manual-installation.mdx b/surfsense_web/content/docs/manual-installation.mdx index a81e531ce..228eeb190 100644 --- a/surfsense_web/content/docs/manual-installation.mdx +++ b/surfsense_web/content/docs/manual-installation.mdx @@ -73,8 +73,9 @@ Edit the `.env` file and set the following variables: | GOOGLE_OAUTH_CLIENT_ID | (Optional) Client ID from Google Cloud Console (required if AUTH_TYPE=GOOGLE) | | GOOGLE_OAUTH_CLIENT_SECRET | (Optional) Client secret from Google Cloud Console (required if AUTH_TYPE=GOOGLE) | | EMBEDDING_MODEL | Name of the embedding model (e.g., `sentence-transformers/all-MiniLM-L6-v2`, `openai://text-embedding-ada-002`) | -| RERANKERS_MODEL_NAME | Name of the reranker model (e.g., `ms-marco-MiniLM-L-12-v2`) | -| RERANKERS_MODEL_TYPE | Type of reranker model (e.g., `flashrank`) | +| RERANKERS_ENABLED | (Optional) Enable or disable document reranking for improved search results (e.g., `TRUE` or `FALSE`, default: `FALSE`) | +| RERANKERS_MODEL_NAME | Name of the reranker model (e.g., `ms-marco-MiniLM-L-12-v2`) (required if RERANKERS_ENABLED=TRUE) | +| RERANKERS_MODEL_TYPE | Type of reranker model (e.g., `flashrank`) (required if RERANKERS_ENABLED=TRUE) | | TTS_SERVICE | Text-to-Speech API provider for Podcasts (e.g., `local/kokoro`, `openai/tts-1`). See [supported providers](https://docs.litellm.ai/docs/text_to_speech#supported-providers) | | TTS_SERVICE_API_KEY | (Optional if local) API key for the Text-to-Speech service | | TTS_SERVICE_API_BASE | (Optional) Custom API base URL for the Text-to-Speech service |