diff --git a/README.md b/README.md index dbd969f25..5cd73d0c3 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ # SurfSense -While tools like NotebookLM and Perplexity are impressive and highly effective for conducting research on any topic/query, SurfSense elevates this capability by integrating with your personal knowledge base. It is a highly customizable AI research agent, connected to external sources such as Search Engines (SearxNG, Tavily, LinkUp), Slack, Linear, Jira, ClickUp, Confluence, Gmail, Notion, YouTube, GitHub, Discord, Airtable, Google Calendar, Luma and more to come. +While tools like NotebookLM and Perplexity are impressive and highly effective for conducting research on any topic/query, SurfSense elevates this capability by integrating with your personal knowledge base. It is a highly customizable AI research agent, connected to external sources such as Search Engines (SearxNG, Tavily, LinkUp), Slack, Linear, Jira, ClickUp, Confluence, Gmail, Notion, YouTube, GitHub, Discord, Airtable, Google Calendar, Luma, Elasticsearch and more to come.
MODSetter%2FSurfSense | Trendshift @@ -76,6 +76,7 @@ Open source and easy to deploy locally. - Airtable - Google Calendar - Luma +- Elasticsearch - and more to come..... ## 📄 **Supported File Extensions** diff --git a/surfsense_backend/alembic/versions/31_add_elasticsearch_connector_enums.py b/surfsense_backend/alembic/versions/31_add_elasticsearch_connector_enums.py new file mode 100644 index 000000000..82311054b --- /dev/null +++ b/surfsense_backend/alembic/versions/31_add_elasticsearch_connector_enums.py @@ -0,0 +1,60 @@ +"""Add ElasticSearch connector enums + +Revision ID: 31 +Revises: 30 +""" + +from collections.abc import Sequence + +from alembic import op + +# revision identifiers +revision: str = "31" +down_revision: str | None = "30" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + # Add enum values + op.execute( + """ + DO $$ + BEGIN + IF NOT EXISTS ( + SELECT 1 FROM pg_type t + JOIN pg_enum e ON t.oid = e.enumtypid + WHERE t.typname = 'searchsourceconnectortype' AND e.enumlabel = 'ELASTICSEARCH_CONNECTOR' + ) THEN + ALTER TYPE searchsourceconnectortype ADD VALUE 'ELASTICSEARCH_CONNECTOR'; + END IF; + END + $$; + """ + ) + op.execute( + """ + DO $$ + BEGIN + IF NOT EXISTS ( + SELECT 1 FROM pg_type t + JOIN pg_enum e ON t.oid = e.enumtypid + WHERE t.typname = 'documenttype' AND e.enumlabel = 'ELASTICSEARCH_CONNECTOR' + ) THEN + ALTER TYPE documenttype ADD VALUE 'ELASTICSEARCH_CONNECTOR'; + END IF; + END + $$; + """ + ) + + +def downgrade() -> None: + """Remove 'ELASTICSEARCH_CONNECTOR' from enum types. + + Note: PostgreSQL does not support removing enum values that may be in use. + Manual intervention would be required if rollback is necessary: + 1. Delete all rows using ELASTICSEARCH_CONNECTOR + 2. Manually remove the enum value using ALTER TYPE ... DROP VALUE (requires no dependencies) + """ + pass diff --git a/surfsense_backend/app/agents/researcher/nodes.py b/surfsense_backend/app/agents/researcher/nodes.py index 80862d1f7..7824e06ea 100644 --- a/surfsense_backend/app/agents/researcher/nodes.py +++ b/surfsense_backend/app/agents/researcher/nodes.py @@ -443,6 +443,25 @@ async def fetch_documents_by_ids( ) url = metadata.get("url", "") + elif doc_type == "ELASTICSEARCH_CONNECTOR": + # Prefer explicit title in metadata/source, otherwise fallback to doc.title + es_title = ( + metadata.get("title") + or metadata.get("es_title") + or doc.title + or f"Elasticsearch: {metadata.get('elasticsearch_index', '')}" + ) + title = es_title + description = metadata.get("description") or ( + doc.content[:100] + "..." + if len(doc.content) > 100 + else doc.content + ) + # If a link or index info is stored, surface it + url = metadata.get("url", "") or metadata.get( + "elasticsearch_index", "" + ) + else: # FILE and other types title = doc.title description = doc.content @@ -464,6 +483,7 @@ async def fetch_documents_by_ids( "SLACK_CONNECTOR": "Slack (Selected)", "NOTION_CONNECTOR": "Notion (Selected)", "GITHUB_CONNECTOR": "GitHub (Selected)", + "ELASTICSEARCH_CONNECTOR": "Elasticsearch (Selected)", "YOUTUBE_VIDEO": "YouTube Videos (Selected)", "DISCORD_CONNECTOR": "Discord (Selected)", "JIRA_CONNECTOR": "Jira Issues (Selected)", @@ -1269,6 +1289,33 @@ async def fetch_relevant_documents( } ) + elif connector == "ELASTICSEARCH_CONNECTOR": + ( + source_object, + elasticsearch_chunks, + ) = await connector_service.search_elasticsearch( + user_query=reformulated_query, + user_id=user_id, + search_space_id=search_space_id, + top_k=top_k, + search_mode=search_mode, + ) + + # Add to sources and raw documents + if source_object: + all_sources.append(source_object) + all_raw_documents.extend(elasticsearch_chunks) + + # Stream found document count + if streaming_service and writer: + writer( + { + "yield_value": streaming_service.format_terminal_info_delta( + f"🔎 Found {len(elasticsearch_chunks)} Elasticsearch chunks related to your query" + ) + } + ) + except Exception as e: logging.error("Error in search_airtable: %s", traceback.format_exc()) error_message = f"Error searching connector {connector}: {e!s}" diff --git a/surfsense_backend/app/agents/researcher/qna_agent/prompts.py b/surfsense_backend/app/agents/researcher/qna_agent/prompts.py index 9c35f90cc..bad0fa813 100644 --- a/surfsense_backend/app/agents/researcher/qna_agent/prompts.py +++ b/surfsense_backend/app/agents/researcher/qna_agent/prompts.py @@ -34,6 +34,7 @@ You are SurfSense, an advanced AI research assistant that provides detailed, wel - NOTION_CONNECTOR: "Notion workspace pages and databases" (personal knowledge management) - YOUTUBE_VIDEO: "YouTube video transcripts and metadata" (personally saved videos) - GITHUB_CONNECTOR: "GitHub repository content and issues" (personal repositories and interactions) +- ELASTICSEARCH_CONNECTOR: "Elasticsearch indexed documents and data" (personal Elasticsearch instances and custom data sources) - LINEAR_CONNECTOR: "Linear project issues and discussions" (personal project management) - JIRA_CONNECTOR: "Jira project issues, tickets, and comments" (personal project tracking) - CONFLUENCE_CONNECTOR: "Confluence pages and comments" (personal project documentation) diff --git a/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py b/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py index 3c34eb474..a0134bf7d 100644 --- a/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py +++ b/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py @@ -35,6 +35,7 @@ You are SurfSense, an advanced AI research assistant that synthesizes informatio - NOTION_CONNECTOR: "Notion workspace pages and databases" (personal knowledge management) - YOUTUBE_VIDEO: "YouTube video transcripts and metadata" (personally saved videos) - GITHUB_CONNECTOR: "GitHub repository content and issues" (personal repositories and interactions) +- ELASTICSEARCH_CONNECTOR: "Elasticsearch documents and indices (indexed content from your ES connector)" (personal search index) - LINEAR_CONNECTOR: "Linear project issues and discussions" (personal project management) - JIRA_CONNECTOR: "Jira project issues, tickets, and comments" (personal project tracking) - CONFLUENCE_CONNECTOR: "Confluence pages and comments" (personal project documentation) diff --git a/surfsense_backend/app/agents/researcher/utils.py b/surfsense_backend/app/agents/researcher/utils.py index bdfaa4d13..01d9f8f95 100644 --- a/surfsense_backend/app/agents/researcher/utils.py +++ b/surfsense_backend/app/agents/researcher/utils.py @@ -52,6 +52,7 @@ def get_connector_emoji(connector_name: str) -> str: "GOOGLE_CALENDAR_CONNECTOR": "📅", "AIRTABLE_CONNECTOR": "🗃️", "LUMA_CONNECTOR": "✨", + "ELASTICSEARCH_CONNECTOR": "⚡", } return connector_emojis.get(connector_name, "🔎") @@ -76,6 +77,7 @@ def get_connector_friendly_name(connector_name: str) -> str: "BAIDU_SEARCH_API": "Baidu Search", "AIRTABLE_CONNECTOR": "Airtable", "LUMA_CONNECTOR": "Luma", + "ELASTICSEARCH_CONNECTOR": "Elasticsearch", } return connector_friendly_names.get(connector_name, connector_name) diff --git a/surfsense_backend/app/connectors/elasticsearch_connector.py b/surfsense_backend/app/connectors/elasticsearch_connector.py new file mode 100644 index 000000000..1a4f53a46 --- /dev/null +++ b/surfsense_backend/app/connectors/elasticsearch_connector.py @@ -0,0 +1,264 @@ +""" +Elasticsearch connector for SurfSense +""" + +import logging +from typing import Any + +from elasticsearch import AsyncElasticsearch +from elasticsearch.exceptions import ( + AuthenticationException, + ConnectionError, + NotFoundError, +) + +logger = logging.getLogger(__name__) + + +class ElasticsearchConnector: + """ + Connector for Elasticsearch instances + """ + + def __init__( + self, + url: str, + api_key: str | None = None, + username: str | None = None, + password: str | None = None, + verify_certs: bool = True, + ca_certs: str | None = None, + ): + """ + Initialize Elasticsearch connector + + Args: + url: Full Elasticsearch URL (e.g., https://host:port or cloud endpoint) + api_key: API key for authentication (preferred method) + username: Username for basic authentication + password: Password for basic authentication + verify_certs: Whether to verify SSL certificates + ca_certs: Path to CA certificates file + """ + self.url = url + self.api_key = api_key + self.username = username + self.password = password + self.verify_certs = verify_certs + self.ca_certs = ca_certs + + # Build connection configuration + self.es_config = self._build_config() + + # Initialize Elasticsearch client + try: + self.client = AsyncElasticsearch(**self.es_config) + except Exception as e: + logger.error(f"Failed to initialize Elasticsearch client: {e}") + raise + + def _build_config(self) -> dict[str, Any]: + """Build Elasticsearch client configuration""" + config = { + "hosts": [self.url], + "verify_certs": self.verify_certs, + "request_timeout": 30, + "max_retries": 3, + "retry_on_timeout": True, + } + + # Authentication - API key takes precedence + if self.api_key: + config["api_key"] = self.api_key + elif self.username and self.password: + config["basic_auth"] = (self.username, self.password) + + # SSL configuration + if self.ca_certs: + config["ca_certs"] = self.ca_certs + + return config + + async def search( + self, + index: str | list[str], + query: dict[str, Any], + size: int = 100, + from_: int = 0, + fields: list[str] | None = None, + sort: list[dict[str, Any]] | None = None, + ) -> dict[str, Any]: + """ + Search documents in Elasticsearch + + Args: + index: Elasticsearch index name or list of indices + query: Elasticsearch query DSL + size: Number of results to return + from_: Starting offset for pagination + fields: List of fields to include in response + sort: Sort configuration + + Returns: + Elasticsearch search response + """ + try: + search_body: dict[str, Any] = { + "query": query, + "size": size, + "from": from_, + } + + if fields: + search_body["_source"] = fields + + if sort: + search_body["sort"] = sort + + response = await self.client.search(index=index, body=search_body) + + total_hits = response.get("hits", {}).get("total", {}) + # normalize total value (could be dict or int depending on server) + total_val = ( + total_hits.get("value", total_hits) + if isinstance(total_hits, dict) + else total_hits + ) + logger.info( + f"Successfully searched index '{index}', found {total_val} results" + ) + return response + + except NotFoundError: + logger.error(f"Index '{index}' not found") + raise + except AuthenticationException: + logger.error("Authentication failed") + raise + except ConnectionError: + logger.error("Failed to connect to Elasticsearch") + raise + except Exception as e: + logger.error(f"Search failed: {e}") + raise + + async def get_indices(self) -> list[str]: + """ + Get list of available indices + + Returns: + List of index names + """ + try: + indices = await self.client.indices.get_alias(index="*") + return list(indices.keys()) + except Exception as e: + logger.error(f"Failed to get indices: {e}") + raise + + async def get_mapping(self, index: str) -> dict[str, Any]: + """ + Get mapping for an index + + Args: + index: Index name + + Returns: + Index mapping + """ + try: + mapping = await self.client.indices.get_mapping(index=index) + return mapping[index]["mappings"] if index in mapping else {} + except Exception as e: + logger.error(f"Failed to get mapping for index '{index}': {e}") + raise + + async def scroll_search( + self, + index: str | list[str], + query: dict[str, Any], + size: int = 1000, + scroll_timeout: str = "5m", + fields: list[str] | None = None, + ): + """ + Perform a scroll search for large result sets + + Args: + index: Elasticsearch index name or list of indices + query: Elasticsearch query DSL + size: Number of results per scroll + scroll_timeout: Scroll timeout + fields: List of fields to include in response + + Yields: + Document hits from Elasticsearch + """ + try: + search_body: dict[str, Any] = { + "query": query, + "size": size, + } + + if fields: + search_body["_source"] = fields + + # Initial search + response = await self.client.search( + index=index, body=search_body, scroll=scroll_timeout + ) + + scroll_id = response.get("_scroll_id") + hits = response.get("hits", {}).get("hits", []) + + while hits: + for hit in hits: + yield hit + + # Continue scrolling + if scroll_id: + response = await self.client.scroll( + scroll_id=scroll_id, scroll=scroll_timeout + ) + scroll_id = response.get("_scroll_id") + hits = response.get("hits", {}).get("hits", []) + + # Clear scroll + if scroll_id: + try: + await self.client.clear_scroll(scroll_id=scroll_id) + except Exception: + logger.debug("Failed to clear scroll id (non-fatal)") + + except Exception as e: + logger.error(f"Scroll search failed: {e}", exc_info=True) + raise + + async def count_documents( + self, index: str | list[str], query: dict[str, Any] | None = None + ) -> int: + """ + Count documents in an index + + Args: + index: Index name or list of indices + query: Optional query to filter documents + + Returns: + Number of documents + """ + try: + if query: + response = await self.client.count(index=index, body={"query": query}) + else: + response = await self.client.count(index=index) + + return response["count"] + except Exception as e: + logger.error(f"Failed to count documents in index '{index}': {e}") + raise + + async def close(self): + """Close the Elasticsearch client connection""" + if hasattr(self, "client"): + await self.client.close() diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index 91639d64d..db5ea73ea 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -50,6 +50,7 @@ class DocumentType(str, Enum): GOOGLE_GMAIL_CONNECTOR = "GOOGLE_GMAIL_CONNECTOR" AIRTABLE_CONNECTOR = "AIRTABLE_CONNECTOR" LUMA_CONNECTOR = "LUMA_CONNECTOR" + ELASTICSEARCH_CONNECTOR = "ELASTICSEARCH_CONNECTOR" class SearchSourceConnectorType(str, Enum): @@ -70,6 +71,7 @@ class SearchSourceConnectorType(str, Enum): GOOGLE_GMAIL_CONNECTOR = "GOOGLE_GMAIL_CONNECTOR" AIRTABLE_CONNECTOR = "AIRTABLE_CONNECTOR" LUMA_CONNECTOR = "LUMA_CONNECTOR" + ELASTICSEARCH_CONNECTOR = "ELASTICSEARCH_CONNECTOR" class ChatType(str, Enum): diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py index fc0f0b892..aef330e7f 100644 --- a/surfsense_backend/app/routes/search_source_connectors_routes.py +++ b/surfsense_backend/app/routes/search_source_connectors_routes.py @@ -40,6 +40,7 @@ from app.tasks.connector_indexers import ( index_clickup_tasks, index_confluence_pages, index_discord_messages, + index_elasticsearch_documents, index_github_repos, index_google_calendar_events, index_google_gmail_messages, @@ -363,6 +364,7 @@ async def index_connector_content( - JIRA_CONNECTOR: Indexes issues and comments from Jira - DISCORD_CONNECTOR: Indexes messages from all accessible Discord channels - LUMA_CONNECTOR: Indexes events from Luma + - ELASTICSEARCH_CONNECTOR: Indexes documents from Elasticsearch Args: connector_id: ID of the connector to use @@ -589,6 +591,24 @@ async def index_connector_content( ) response_message = "Luma indexing started in the background." + elif ( + connector.connector_type + == SearchSourceConnectorType.ELASTICSEARCH_CONNECTOR + ): + # Run indexing in background + logger.info( + f"Triggering Elasticsearch indexing for connector {connector_id} into search space {search_space_id}" + ) + background_tasks.add_task( + run_elasticsearch_indexing_with_new_session, + connector_id, + search_space_id, + str(user.id), + indexing_from, + indexing_to, + ) + response_message = "Elasticsearch indexing started in the background." + else: raise HTTPException( status_code=400, @@ -1358,3 +1378,61 @@ async def run_luma_indexing( ) except Exception as e: logger.error(f"Error in background Luma indexing task: {e!s}") + + +async def run_elasticsearch_indexing_with_new_session( + connector_id: int, + search_space_id: int, + user_id: str, + start_date: str, + end_date: str, +): + """Wrapper to run Elasticsearch indexing with its own database session.""" + logger.info( + f"Background task started: Indexing Elasticsearch connector {connector_id} into space {search_space_id}" + ) + async with async_session_maker() as session: + await run_elasticsearch_indexing( + session, connector_id, search_space_id, user_id, start_date, end_date + ) + logger.info( + f"Background task finished: Indexing Elasticsearch connector {connector_id}" + ) + + +async def run_elasticsearch_indexing( + session: AsyncSession, + connector_id: int, + search_space_id: int, + user_id: str, + start_date: str, + end_date: str, +): + """Runs the Elasticsearch indexing task and updates the timestamp.""" + try: + indexed_count, error_message = await index_elasticsearch_documents( + session, + connector_id, + search_space_id, + user_id, + start_date, + end_date, + update_last_indexed=False, + ) + if error_message: + logger.error( + f"Elasticsearch indexing failed for connector {connector_id}: {error_message}" + ) + else: + logger.info( + f"Elasticsearch indexing successful for connector {connector_id}. Indexed {indexed_count} documents." + ) + # Update the last indexed timestamp only on success + await update_connector_last_indexed(session, connector_id) + await session.commit() + except Exception as e: + await session.rollback() + logger.error( + f"Critical error in run_elasticsearch_indexing for connector {connector_id}: {e}", + exc_info=True, + ) diff --git a/surfsense_backend/app/services/connector_service.py b/surfsense_backend/app/services/connector_service.py index 76326b6ac..1ab70276f 100644 --- a/surfsense_backend/app/services/connector_service.py +++ b/surfsense_backend/app/services/connector_service.py @@ -2421,3 +2421,117 @@ class ConnectorService: } return result_object, luma_chunks + + async def search_elasticsearch( + self, + user_query: str, + user_id: str, + search_space_id: int, + top_k: int = 20, + search_mode: SearchMode = SearchMode.CHUNKS, + ) -> tuple: + """ + Search for Elasticsearch documents and return both the source information and langchain documents + + Args: + user_query: The user's query + user_id: The user's ID + search_space_id: The search space ID to search in + top_k: Maximum number of results to return + search_mode: Search mode (CHUNKS or DOCUMENTS) + + Returns: + tuple: (sources_info, langchain_documents) + """ + if search_mode == SearchMode.CHUNKS: + elasticsearch_chunks = await self.chunk_retriever.hybrid_search( + query_text=user_query, + top_k=top_k, + user_id=user_id, + search_space_id=search_space_id, + document_type="ELASTICSEARCH_CONNECTOR", + ) + elif search_mode == SearchMode.DOCUMENTS: + elasticsearch_chunks = await self.document_retriever.hybrid_search( + query_text=user_query, + top_k=top_k, + user_id=user_id, + search_space_id=search_space_id, + document_type="ELASTICSEARCH_CONNECTOR", + ) + # Transform document retriever results to match expected format + elasticsearch_chunks = self._transform_document_results( + elasticsearch_chunks + ) + + # Early return if no results + if not elasticsearch_chunks: + return { + "id": 34, + "name": "Elasticsearch", + "type": "ELASTICSEARCH_CONNECTOR", + "sources": [], + }, [] + + # Process each chunk and create sources directly without deduplication + sources_list = [] + async with self.counter_lock: + for _i, chunk in enumerate(elasticsearch_chunks): + # Extract document metadata + document = chunk.get("document", {}) + metadata = document.get("metadata", {}) + + # Extract Elasticsearch-specific metadata + es_id = metadata.get("elasticsearch_id", "") + es_index = metadata.get("elasticsearch_index", "") + es_score = metadata.get("elasticsearch_score", "") + + # Create a more descriptive title for Elasticsearch documents + title = document.get("title", "Elasticsearch Document") + if es_index: + title = f"{title} (Index: {es_index})" + + # Create a more descriptive description for Elasticsearch documents + description = chunk.get("content", "")[:150] + if len(description) == 150: + description += "..." + + # Add Elasticsearch info to description + info_parts = [] + if es_id: + info_parts.append(f"ID: {es_id}") + if es_score: + info_parts.append(f"Score: {es_score}") + + if info_parts: + if description: + description = f"{description} | {' | '.join(info_parts)}" + else: + description = " | ".join(info_parts) + + # For URL, we could construct a URL to view the document if we have the Elasticsearch UI URL + url = "" + # Could be extended to include Kibana or other UI URLs if configured + + source = { + "id": chunk.get("chunk_id", self.source_id_counter), + "title": title, + "description": description, + "url": url, + "elasticsearch_id": es_id, + "elasticsearch_index": es_index, + "elasticsearch_score": es_score, + } + + self.source_id_counter += 1 + sources_list.append(source) + + # Create result object + result_object = { + "id": 34, # Assign a unique ID for the Elasticsearch connector + "name": "Elasticsearch", + "type": "ELASTICSEARCH_CONNECTOR", + "sources": sources_list, + } + + return result_object, elasticsearch_chunks diff --git a/surfsense_backend/app/tasks/connector_indexers/__init__.py b/surfsense_backend/app/tasks/connector_indexers/__init__.py index fb7936126..766506f70 100644 --- a/surfsense_backend/app/tasks/connector_indexers/__init__.py +++ b/surfsense_backend/app/tasks/connector_indexers/__init__.py @@ -17,6 +17,7 @@ Available indexers: - Google Gmail: Index messages from Google Gmail - Google Calendar: Index events from Google Calendar - Luma: Index events from Luma +- Elasticsearch: Index documents from Elasticsearch instances """ # Communication platforms @@ -27,6 +28,7 @@ from .confluence_indexer import index_confluence_pages from .discord_indexer import index_discord_messages # Development platforms +from .elasticsearch_indexer import index_elasticsearch_documents from .github_indexer import index_github_repos from .google_calendar_indexer import index_google_calendar_events from .google_gmail_indexer import index_google_gmail_messages @@ -46,6 +48,7 @@ __all__ = [ # noqa: RUF022 "index_confluence_pages", "index_discord_messages", # Development platforms + "index_elasticsearch_documents", "index_github_repos", # Calendar and scheduling "index_google_calendar_events", diff --git a/surfsense_backend/app/tasks/connector_indexers/elasticsearch_indexer.py b/surfsense_backend/app/tasks/connector_indexers/elasticsearch_indexer.py new file mode 100644 index 000000000..b99c77e95 --- /dev/null +++ b/surfsense_backend/app/tasks/connector_indexers/elasticsearch_indexer.py @@ -0,0 +1,399 @@ +""" +Elasticsearch indexer for SurfSense +""" + +import hashlib +import json +import logging +from datetime import UTC, datetime +from typing import Any + +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.future import select + +from app.connectors.elasticsearch_connector import ElasticsearchConnector +from app.db import Document, DocumentType, SearchSourceConnector +from app.services.task_logging_service import TaskLoggingService +from app.utils.document_converters import ( + create_document_chunks, + generate_content_hash, + generate_unique_identifier_hash, +) + +from .base import check_document_by_unique_identifier, check_duplicate_document_by_hash + +logger = logging.getLogger(__name__) + + +async def index_elasticsearch_documents( + session: AsyncSession, + connector_id: int, + search_space_id: int, + user_id: str, + start_date: str, + end_date: str, + update_last_indexed: bool = True, +) -> tuple[int, str | None]: + """ + Index documents from Elasticsearch into SurfSense + + Args: + session: Database session + connector_id: Elasticsearch connector ID + search_space_id: Search space ID + user_id: User ID + start_date: Start date for indexing (not used for Elasticsearch, kept for compatibility) + end_date: End date for indexing (not used for Elasticsearch, kept for compatibility) + update_last_indexed: Whether to update the last indexed timestamp + + Returns: + Tuple of (number of documents processed, error message if any) + """ + task_logger = TaskLoggingService(session, search_space_id) + log_entry = await task_logger.log_task_start( + task_name="elasticsearch_indexing", + source="connector_indexing_task", + message=f"Starting Elasticsearch indexing for connector {connector_id}", + metadata={ + "connector_id": connector_id, + "user_id": str(user_id), + "index": None, + "start_date": start_date, + "end_date": end_date, + }, + ) + es_connector = None + try: + # Get the connector configuration + result = await session.execute( + select(SearchSourceConnector).filter( + SearchSourceConnector.id == connector_id + ) + ) + connector = result.scalars().first() + + if not connector: + error_msg = f"Elasticsearch connector with ID {connector_id} not found" + logger.error(error_msg) + await task_logger.log_task_failure( + log_entry, + "Connector not found", + error_msg, + {"connector_id": connector_id}, + ) + return 0, error_msg + + # Get connector configuration + config = connector.config + + # Validate required fields - now only URL and INDEX are required + # Authentication can be either API key OR username/password + if "ELASTICSEARCH_URL" not in config: + error_msg = "Missing required field in connector config: ELASTICSEARCH_URL" + logger.error(error_msg) + return 0, error_msg + + # Allow missing/empty index: default to searching all indices ("*" or "_all") + index_name = config.get("ELASTICSEARCH_INDEX") + if not index_name: + index_name = "*" + logger.info( + "ELASTICSEARCH_INDEX missing or empty in connector config; defaulting to '*' (search all indices)" + ) + await task_logger.log_task_progress( + log_entry, + "Using default index", + {"index": index_name, "stage": "index_defaulted"}, + ) + + # Check authentication - must have either API key or username+password + has_api_key = ( + "ELASTICSEARCH_API_KEY" in config and config["ELASTICSEARCH_API_KEY"] + ) + has_basic_auth = ( + "ELASTICSEARCH_USERNAME" in config + and config["ELASTICSEARCH_USERNAME"] + and "ELASTICSEARCH_PASSWORD" in config + and config["ELASTICSEARCH_PASSWORD"] + ) + + if not has_api_key and not has_basic_auth: + error_msg = "Missing authentication: provide either ELASTICSEARCH_API_KEY or ELASTICSEARCH_USERNAME + ELASTICSEARCH_PASSWORD" + logger.error(error_msg) + return 0, error_msg + + # Initialize document service + # document_service = _DocumentService(session) + + # Initialize Elasticsearch connector + es_connector = ElasticsearchConnector( + url=config["ELASTICSEARCH_URL"], + api_key=config.get("ELASTICSEARCH_API_KEY"), + username=config.get("ELASTICSEARCH_USERNAME"), + password=config.get("ELASTICSEARCH_PASSWORD"), + verify_certs=config.get("ELASTICSEARCH_VERIFY_CERTS", True), + ca_certs=config.get("ELASTICSEARCH_CA_CERTS"), + ) + await task_logger.log_task_progress( + log_entry, + "Initialized Elasticsearch connector", + {"index": index_name, "stage": "connector_initialized"}, + ) + + # Build query based on configuration + query = _build_elasticsearch_query(config) + + # Get max documents to index + max_documents = config.get("ELASTICSEARCH_MAX_DOCUMENTS", 1000) + + logger.info( + f"Starting Elasticsearch indexing for index '{index_name}' with max {max_documents} documents" + ) + + documents_processed = 0 + + try: + await task_logger.log_task_progress( + log_entry, + "Starting scroll search", + { + "index": index_name, + "stage": "scroll_start", + "max_documents": max_documents, + }, + ) + # Use scroll search for large result sets + async for hit in es_connector.scroll_search( + index=index_name, + query=query, + size=min(max_documents, 100), # Scroll in batches + fields=config.get("ELASTICSEARCH_FIELDS"), + ): + if documents_processed >= max_documents: + break + + try: + # Extract document data + doc_id = hit["_id"] + source = hit.get("_source", {}) + + # Build document title + title_field = config.get("ELASTICSEARCH_TITLE_FIELD") + if not title_field: + for candidate in ("title", "name", "subject"): + if candidate in source: + title_field = candidate + break + title = ( + str(source.get(title_field, doc_id)) + if title_field is not None + else str(doc_id) + ) + + # Build document content + content = _build_document_content(source, config) + + if not content.strip(): + logger.warning(f"Skipping document {doc_id} - no content found") + continue + + # Create content hash + content_hash = generate_content_hash(content, search_space_id) + + # Build metadata + metadata = { + "elasticsearch_id": doc_id, + "elasticsearch_index": hit.get("_index", index_name), + "elasticsearch_score": hit.get("_score"), + "indexed_at": datetime.now().isoformat(), + "source": "ELASTICSEARCH_CONNECTOR", + } + + # Add any additional metadata fields specified in config + if "ELASTICSEARCH_METADATA_FIELDS" in config: + for field in config["ELASTICSEARCH_METADATA_FIELDS"]: + if field in source: + metadata[f"es_{field}"] = source[field] + + # Build source-unique identifier and hash (prefer source id dedupe) + source_identifier = f"{hit.get('_index', index_name)}:{doc_id}" + unique_identifier_hash = generate_unique_identifier_hash( + DocumentType.ELASTICSEARCH_CONNECTOR, + source_identifier, + search_space_id, + ) + + # Two-step duplicate detection: first by source-unique id, then by content hash + existing_doc = await check_document_by_unique_identifier( + session, unique_identifier_hash + ) + if not existing_doc: + existing_doc = await check_duplicate_document_by_hash( + session, content_hash + ) + + if existing_doc: + # If content is unchanged, skip. Otherwise update the existing document. + if existing_doc.content_hash == content_hash: + logger.info( + f"Skipping ES doc {doc_id} — already indexed (doc id {existing_doc.id})" + ) + continue + else: + logger.info( + f"Updating existing document {existing_doc.id} for ES doc {doc_id}" + ) + existing_doc.title = title + existing_doc.content = content + existing_doc.content_hash = content_hash + existing_doc.document_metadata = metadata + existing_doc.unique_identifier_hash = unique_identifier_hash + chunks = await create_document_chunks(content) + existing_doc.chunks = chunks + await session.flush() + documents_processed += 1 + if documents_processed % 10 == 0: + await session.commit() + continue + + # Create document + document = Document( + title=title, + content=content, + content_hash=content_hash, + unique_identifier_hash=unique_identifier_hash, + document_type=DocumentType.ELASTICSEARCH_CONNECTOR, + document_metadata=metadata, + search_space_id=search_space_id, + ) + + # Create chunks and attach to document (persist via relationship) + chunks = await create_document_chunks(content) + document.chunks = chunks + session.add(document) + await session.flush() + + documents_processed += 1 + + if documents_processed % 10 == 0: + logger.info( + f"Processed {documents_processed} Elasticsearch documents" + ) + await session.commit() + + except Exception as e: + msg = f"Error processing Elasticsearch document {hit.get('_id', 'unknown')}: {e}" + logger.error(msg) + await task_logger.log_task_failure( + log_entry, + "Document processing error", + msg, + { + "document_id": hit.get("_id", "unknown"), + "error_type": type(e).__name__, + }, + ) + continue + + # Final commit + await session.commit() + + await task_logger.log_task_success( + log_entry, + f"Successfully indexed {documents_processed} documents from Elasticsearch", + {"documents_indexed": documents_processed, "index": index_name}, + ) + logger.info( + f"Successfully indexed {documents_processed} documents from Elasticsearch" + ) + + # Update last indexed timestamp if requested + if update_last_indexed and documents_processed > 0: + # connector.last_indexed_at = datetime.now() + connector.last_indexed_at = ( + datetime.now(UTC).isoformat().replace("+00:00", "Z") + ) + await session.commit() + await task_logger.log_task_progress( + log_entry, + "Updated connector.last_indexed_at", + {"last_indexed_at": connector.last_indexed_at}, + ) + + return documents_processed, None + + finally: + # Clean up Elasticsearch connection + if es_connector: + await es_connector.close() + + except Exception as e: + error_msg = f"Error indexing Elasticsearch documents: {e}" + logger.error(error_msg, exc_info=True) + await task_logger.log_task_failure( + log_entry, "Indexing failed", error_msg, {"error_type": type(e).__name__} + ) + await session.rollback() + if es_connector: + await es_connector.close() + return 0, error_msg + + +def _build_elasticsearch_query(config: dict[str, Any]) -> dict[str, Any]: + """ + Build Elasticsearch query from connector configuration + + Args: + config: Connector configuration + + Returns: + Elasticsearch query DSL + """ + # Check if custom query is provided + if config.get("ELASTICSEARCH_QUERY"): + try: + if isinstance(config["ELASTICSEARCH_QUERY"], str): + return json.loads(config["ELASTICSEARCH_QUERY"]) + else: + return config["ELASTICSEARCH_QUERY"] + except (json.JSONDecodeError, TypeError) as e: + logger.warning(f"Invalid custom query, using match_all: {e}") + + # Default to match all documents + return {"match_all": {}} + + +def _build_document_content(source: dict[str, Any], config: dict[str, Any]) -> str: + """ + Build document content from Elasticsearch document source + + Args: + source: Elasticsearch document source + config: Connector configuration + + Returns: + Formatted document content + """ + content_parts = [] + + # Get content fields from config + content_fields = config.get("ELASTICSEARCH_CONTENT_FIELDS", []) + + if content_fields: + # Use specified content fields + for field in content_fields: + if field in source: + field_value = source[field] + if isinstance(field_value, str | int | float): + content_parts.append(f"{field}: {field_value}") + elif isinstance(field_value, list | dict): + content_parts.append(f"{field}: {json.dumps(field_value)}") + else: + # Use all fields if no specific content fields specified + for key, value in source.items(): + if isinstance(value, str | int | float): + content_parts.append(f"{key}: {value}") + elif isinstance(value, list | dict): + content_parts.append(f"{key}: {json.dumps(value)}") + + return "\n".join(content_parts) diff --git a/surfsense_backend/pyproject.toml b/surfsense_backend/pyproject.toml index 34085f28f..ea23407ba 100644 --- a/surfsense_backend/pyproject.toml +++ b/surfsense_backend/pyproject.toml @@ -43,6 +43,7 @@ dependencies = [ "youtube-transcript-api>=1.0.3", "litellm>=1.77.5", "langchain-litellm>=0.2.3", + "elasticsearch>=9.1.1", "faster-whisper>=1.1.0", ] diff --git a/surfsense_backend/uv.lock b/surfsense_backend/uv.lock index f6f7ca9d1..03d58b822 100644 --- a/surfsense_backend/uv.lock +++ b/surfsense_backend/uv.lock @@ -1161,6 +1161,33 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9c/13/563119fe0af82aca5a3b89399c435953072c39515c2e818eb82793955c3b/effdet-0.4.1-py3-none-any.whl", hash = "sha256:10889a226228d515c948e3fcf811e64c0d78d7aa94823a300045653b9c284cb7", size = 112513, upload-time = "2023-05-21T22:17:58.47Z" }, ] +[[package]] +name = "elastic-transport" +version = "9.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ae/1f/2d1a1790df2b75e1e1eb90d8a3fe066a47ef95e34430657447e549cc274c/elastic_transport-9.1.0.tar.gz", hash = "sha256:1590e44a25b0fe208107d5e8d7dea15c070525f3ac9baafbe4cb659cd14f073d", size = 76483, upload-time = "2025-07-24T16:41:31.017Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ef/5d/dd5a919dd887fe20a91f18faf5b4345ee3a058e483d2aa84cef0f2567e17/elastic_transport-9.1.0-py3-none-any.whl", hash = "sha256:369fa56874c74daae4ea10cbf40636d139f38f42bec0e006b9cd45a168ee7fce", size = 65142, upload-time = "2025-07-24T16:41:29.648Z" }, +] + +[[package]] +name = "elasticsearch" +version = "9.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "elastic-transport" }, + { name = "python-dateutil" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/59/6a/5eecef6f1ac8005b04714405cb65971d46031bd897e47c29af86e0f87353/elasticsearch-9.1.1.tar.gz", hash = "sha256:be20acda2a97591a9a6cf4981fc398ee6fca3291cf9e7a9e52b6a9f41a46d393", size = 857802, upload-time = "2025-09-12T13:27:38.62Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cf/4c/c0c95d3d881732a5d1b28e12c9be4dea5953ade71810f94565bd5bd2101a/elasticsearch-9.1.1-py3-none-any.whl", hash = "sha256:2a5c27c57ca3dd3365f665c82c9dcd8666ccfb550d5b07c688c21ec636c104e5", size = 937483, upload-time = "2025-09-12T13:27:34.948Z" }, +] + [[package]] name = "email-validator" version = "2.2.0" @@ -5402,6 +5429,7 @@ dependencies = [ { name = "chonkie", extra = ["all"] }, { name = "discord-py" }, { name = "docling" }, + { name = "elasticsearch" }, { name = "en-core-web-sm" }, { name = "fastapi" }, { name = "fastapi-users", extra = ["oauth", "sqlalchemy"] }, @@ -5450,6 +5478,7 @@ requires-dist = [ { name = "chonkie", extras = ["all"], specifier = ">=1.0.6" }, { name = "discord-py", specifier = ">=2.5.2" }, { name = "docling", specifier = ">=2.15.0" }, + { name = "elasticsearch", specifier = ">=9.1.1" }, { name = "en-core-web-sm", url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl" }, { name = "fastapi", specifier = ">=0.115.8" }, { name = "fastapi-users", extras = ["oauth", "sqlalchemy"], specifier = ">=14.0.1" }, diff --git a/surfsense_web/app/dashboard/[search_space_id]/connectors/[connector_id]/edit/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/connectors/[connector_id]/edit/page.tsx index 87844e8c8..f09069521 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/connectors/[connector_id]/edit/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/[connector_id]/edit/page.tsx @@ -271,6 +271,17 @@ export default function EditConnectorPage() { placeholder="API Key..." /> )} + + {/* == Elasticsearch == */} + {connector.connector_type === "ELASTICSEARCH_CONNECTOR" && ( + + )} + + {/* Header */} +
+
+
+ {getConnectorIcon(EnumConnectorName.ELASTICSEARCH_CONNECTOR, "h-6 w-6")} +
+
+

Connect Elasticsearch

+

+ Connect to your Elasticsearch cluster to search and index documents. +

+
+
+
+ + + + + Connect + Documentation + + + + + + Connect Elasticsearch Cluster + + Connect to your Elasticsearch instance to search and index documents for enhanced + search capabilities. + + + +
+ + {/* Connector Name */} + ( + + Connector Name + + + + + A friendly name to identify this connector. + + + + )} + /> + + {/* Connection Details */} +
+

Connection Details

+ + ( + + Elasticsearch Endpoint URL + + + + + Enter the complete Elasticsearch endpoint URL. We'll automatically + extract the hostname, port, and SSL settings. + + + + )} + /> + + {/* Show parsed URL details */} + {form.watch("endpoint_url") && ( +
+

Parsed Connection Details:

+
+ {(() => { + try { + const url = new URL(form.watch("endpoint_url")); + return ( + <> +
+ Hostname: {url.hostname} +
+
+ Port:{" "} + {url.port || (url.protocol === "https:" ? "443" : "80")} +
+
+ SSL/TLS:{" "} + {url.protocol === "https:" ? "Enabled" : "Disabled"} +
+ + ); + } catch { + return
Invalid URL format
; + } + })()} +
+
+ )} +
+ + {/* Authentication */} +
+

Authentication

+ + ( + + + { + field.onChange(value); + // Clear auth fields when method changes + if (value !== "basic") { + form.setValue("username", ""); + form.setValue("password", ""); + } + if (value !== "api_key") { + form.setValue("ELASTICSEARCH_API_KEY", ""); + } + }} + value={field.value} + className="flex flex-col space-y-2" + > +
+ + +
+ + + +
+ +
+ + +
+ + + +
+ + + + + )} + /> + + {/* Basic Auth Fields */} + {form.watch("auth_method") === "basic" && ( +
+ ( + + Username + + + + + + )} + /> + + ( + + Password + + + + + + )} + /> +
+ )} + + {/* API Key Field */} + {form.watch("auth_method") === "api_key" && ( + ( + + API Key + + + + + Enter your Elasticsearch API key (base64 encoded). This will be + stored securely. + + + + )} + /> + )} + + {/* Index Selection */} + ( + + Index Selection + + + + + Comma-separated indices to search (e.g., "logs-*, documents-*"). + + + + )} + /> + + {/* Show parsed indices as badges */} + {form.watch("indices")?.trim() && ( +
+

Selected Indices:

+
+ {stringToArray(form.watch("indices")).map((index) => ( + + {index} + + ))} +
+
+ )} + + + + Index Selection Tips + +
    +
  • Use wildcards like "logs-*" to match multiple indices
  • +
  • Separate multiple indices with commas
  • +
  • + Leave empty to search all accessible indices including internal ones +
  • +
  • Choosing specific indices improves search performance
  • +
+
+
+
+ + {/* Advanced Configuration */} + + + Advanced Configuration + + {/* Default Search Query */} + ( + + + Default Search Query{" "} + (Optional) + + + + + + Default Elasticsearch query to use for searches. Use "*" to match + all documents. + + + + )} + /> + + {/* Form Fields */} + ( + + + Search Fields{" "} + (Optional) + + + + + + Comma-separated list of specific fields to search in (e.g., + "title, content, description"). Leave empty to search all fields. + + + + )} + /> + + {/* Show parsed search fields as badges */} + {form.watch("search_fields")?.trim() && ( +
+

Search Fields:

+
+ {stringToArray(form.watch("search_fields")).map((field) => ( + + {field} + + ))} +
+
+ )} + + ( + + + Maximum Documents{" "} + (Optional) + + + + field.onChange( + e.target.value === "" + ? undefined + : parseInt(e.target.value, 10) + ) + } + /> + + + Maximum number of documents to retrieve per search (1-10,000). + Leave empty to use Elasticsearch's default limit. + + + + )} + /> +
+
+
+ + + +
+ +
+ + + + +

+ What you get with Elasticsearch integration: +

+
    +
  • Search across your indexed documents and logs
  • +
  • Access structured and unstructured data from your cluster
  • +
  • Leverage existing Elasticsearch indices for enhanced search
  • +
  • Real-time search capabilities with powerful query features
  • +
  • Integration with your existing Elasticsearch infrastructure
  • +
+
+ + + + + + + + Elasticsearch Connector Documentation + + + Learn how to set up and use the Elasticsearch connector to search your data. + + + +
+

How it works

+

+ The Elasticsearch connector allows you to search and retrieve documents from + your Elasticsearch cluster. Configure connection details, select specific + indices, and set search parameters to make your existing data searchable within + SurfSense. +

+
+ + + + + Connection Setup + + +
    +
  1. + Endpoint URL: Enter the complete Elasticsearch endpoint + URL (e.g., https://your-cluster.es.region.aws.com:443). We'll + automatically extract hostname, port, and SSL settings. +
  2. +
  3. + Authentication: Choose the appropriate method: +
      +
    • + API Key: Base64 encoded API key (recommended for + security) +
    • +
    • + Username/Password: Basic authentication credentials +
    • +
    +
  4. +
  5. + Index Selection: Specify which indices to search using + comma-separated patterns (e.g., "logs-*, documents-*") +
  6. +
+
+
+ + + + Advanced Configuration + + +

+ Fine-tune your Elasticsearch connector with these optional settings: +

+
    +
  • + Search Fields: Limit searches to specific fields (e.g., + "title, content") for better relevance +
  • +
  • + Default Query: Set a default Elasticsearch query pattern +
  • +
  • + Max Documents: Limit the number of documents returned per + search (1-10,000) +
  • +
+
+
+ + + + Troubleshooting + + +
+
+

Common Connection Issues:

+
    +
  • + Connection Refused: Check hostname and port. Ensure + Elasticsearch is running. +
  • +
  • + Authentication Failed: Verify credentials. For API + keys, ensure they have proper permissions. +
  • +
  • + SSL Errors: Try disabling SSL for local development + or check certificate validity. +
  • +
  • + No Indices Found: Ensure your credentials have + permission to list and read indices. +
  • +
+
+ + + + Security Note + + For production environments, use API keys with minimal required + permissions: cluster monitoring and read access to specific indices. + + +
+
+
+
+
+
+
+ + +
+ ); +} diff --git a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/page.tsx index b823e177a..ccf02f107 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/page.tsx @@ -59,6 +59,13 @@ const connectorCategories: ConnectorCategory[] = [ icon: getConnectorIcon(EnumConnectorName.LINKUP_API, "h-6 w-6"), status: "available", }, + { + id: "elasticsearch-connector", + title: "Elasticsearch", + description: "Connect to Elasticsearch to index and search documents, logs and metrics.", + icon: getConnectorIcon(EnumConnectorName.ELASTICSEARCH_CONNECTOR, "h-6 w-6"), + status: "available", + }, { id: "baidu-search-api", title: "Baidu Search", diff --git a/surfsense_web/components/dashboard-breadcrumb.tsx b/surfsense_web/components/dashboard-breadcrumb.tsx index 6eb71ef5d..e77a1fdb3 100644 --- a/surfsense_web/components/dashboard-breadcrumb.tsx +++ b/surfsense_web/components/dashboard-breadcrumb.tsx @@ -88,6 +88,7 @@ export function DashboardBreadcrumb() { "serper-api": "Serper API", "linkup-api": "LinkUp API", "luma-connector": "Luma", + "elasticsearch-connector": "Elasticsearch", }; const connectorLabel = connectorLabels[connectorType] || connectorType; diff --git a/surfsense_web/components/editConnector/types.ts b/surfsense_web/components/editConnector/types.ts index c069972ba..8a9ef29d2 100644 --- a/surfsense_web/components/editConnector/types.ts +++ b/surfsense_web/components/editConnector/types.ts @@ -51,5 +51,6 @@ export const editConnectorSchema = z.object({ GOOGLE_CALENDAR_REFRESH_TOKEN: z.string().optional(), GOOGLE_CALENDAR_CALENDAR_IDS: z.string().optional(), LUMA_API_KEY: z.string().optional(), + ELASTICSEARCH_API_KEY: z.string().optional(), }); export type EditConnectorFormValues = z.infer; diff --git a/surfsense_web/components/homepage/integrations.tsx b/surfsense_web/components/homepage/integrations.tsx index f94292e9b..6d8433ac0 100644 --- a/surfsense_web/components/homepage/integrations.tsx +++ b/surfsense_web/components/homepage/integrations.tsx @@ -13,6 +13,7 @@ const INTEGRATIONS: Integration[] = [ name: "LinkUp", icon: "https://framerusercontent.com/images/7zeIm6t3f1HaSltkw8upEvsD80.png?scale-down-to=512", }, + { name: "Elasticsearch", icon: "https://cdn.simpleicons.org/elastic/00A9E5" }, // Communication { name: "Slack", icon: "https://cdn.simpleicons.org/slack/4A154B" }, diff --git a/surfsense_web/contracts/enums/connector.ts b/surfsense_web/contracts/enums/connector.ts index 9f886f5d2..50486c92e 100644 --- a/surfsense_web/contracts/enums/connector.ts +++ b/surfsense_web/contracts/enums/connector.ts @@ -16,4 +16,5 @@ export enum EnumConnectorName { GOOGLE_GMAIL_CONNECTOR = "GOOGLE_GMAIL_CONNECTOR", AIRTABLE_CONNECTOR = "AIRTABLE_CONNECTOR", LUMA_CONNECTOR = "LUMA_CONNECTOR", + ELASTICSEARCH_CONNECTOR = "ELASTICSEARCH_CONNECTOR", } diff --git a/surfsense_web/contracts/enums/connectorIcons.tsx b/surfsense_web/contracts/enums/connectorIcons.tsx index 6af40b7df..66e9edfcc 100644 --- a/surfsense_web/contracts/enums/connectorIcons.tsx +++ b/surfsense_web/contracts/enums/connectorIcons.tsx @@ -1,6 +1,7 @@ import { IconBook, IconBrandDiscord, + IconBrandElastic, IconBrandGithub, IconBrandNotion, IconBrandSlack, @@ -56,6 +57,8 @@ export const getConnectorIcon = (connectorType: EnumConnectorName | string, clas return ; case EnumConnectorName.LUMA_CONNECTOR: return ; + case EnumConnectorName.ELASTICSEARCH_CONNECTOR: + return ; // Additional cases for non-enum connector types case "YOUTUBE_VIDEO": return ; diff --git a/surfsense_web/hooks/use-connector-edit-page.ts b/surfsense_web/hooks/use-connector-edit-page.ts index 850f7e3e5..1e1d93590 100644 --- a/surfsense_web/hooks/use-connector-edit-page.ts +++ b/surfsense_web/hooks/use-connector-edit-page.ts @@ -96,6 +96,7 @@ export function useConnectorEditPage(connectorId: number, searchSpaceId: string) JIRA_EMAIL: "", JIRA_API_TOKEN: "", LUMA_API_KEY: "", + ELASTICSEARCH_API_KEY: "", }, }); @@ -140,6 +141,7 @@ export function useConnectorEditPage(connectorId: number, searchSpaceId: string) JIRA_EMAIL: config.JIRA_EMAIL || "", JIRA_API_TOKEN: config.JIRA_API_TOKEN || "", LUMA_API_KEY: config.LUMA_API_KEY || "", + ELASTICSEARCH_API_KEY: config.ELASTICSEARCH_API_KEY || "", }); if (currentConnector.connector_type === "GITHUB_CONNECTOR") { const savedRepos = config.repo_full_names || []; @@ -457,6 +459,16 @@ export function useConnectorEditPage(connectorId: number, searchSpaceId: string) newConfig = { LUMA_API_KEY: formData.LUMA_API_KEY }; } break; + case "ELASTICSEARCH_CONNECTOR": + if (formData.ELASTICSEARCH_API_KEY !== originalConfig.ELASTICSEARCH_API_KEY) { + if (!formData.ELASTICSEARCH_API_KEY) { + toast.error("Elasticsearch API Key cannot be empty."); + setIsSaving(false); + return; + } + newConfig = { ELASTICSEARCH_API_KEY: formData.ELASTICSEARCH_API_KEY }; + } + break; } if (newConfig !== null) { @@ -545,6 +557,11 @@ export function useConnectorEditPage(connectorId: number, searchSpaceId: string) editForm.setValue("JIRA_API_TOKEN", newlySavedConfig.JIRA_API_TOKEN || ""); } else if (connector.connector_type === "LUMA_CONNECTOR") { editForm.setValue("LUMA_API_KEY", newlySavedConfig.LUMA_API_KEY || ""); + } else if (connector.connector_type === "ELASTICSEARCH_CONNECTOR") { + editForm.setValue( + "ELASTICSEARCH_API_KEY", + newlySavedConfig.ELASTICSEARCH_API_KEY || "" + ); } } if (connector.connector_type === "GITHUB_CONNECTOR") { diff --git a/surfsense_web/hooks/use-document-by-chunk.ts b/surfsense_web/hooks/use-document-by-chunk.ts index 25dc9813a..dd36fcab1 100644 --- a/surfsense_web/hooks/use-document-by-chunk.ts +++ b/surfsense_web/hooks/use-document-by-chunk.ts @@ -35,7 +35,8 @@ export type DocumentType = | "CLICKUP_CONNECTOR" | "GOOGLE_CALENDAR_CONNECTOR" | "GOOGLE_GMAIL_CONNECTOR" - | "LUMA_CONNECTOR"; + | "LUMA_CONNECTOR" + | "ELASTICSEARCH_CONNECTOR"; export function useDocumentByChunk() { const [document, setDocument] = useState(null); diff --git a/surfsense_web/hooks/use-documents.ts b/surfsense_web/hooks/use-documents.ts index 4c22a8707..764b000eb 100644 --- a/surfsense_web/hooks/use-documents.ts +++ b/surfsense_web/hooks/use-documents.ts @@ -29,7 +29,8 @@ export type DocumentType = | "GOOGLE_CALENDAR_CONNECTOR" | "GOOGLE_GMAIL_CONNECTOR" | "AIRTABLE_CONNECTOR" - | "LUMA_CONNECTOR"; + | "LUMA_CONNECTOR" + | "ELASTICSEARCH_CONNECTOR"; export interface UseDocumentsOptions { page?: number; diff --git a/surfsense_web/lib/connectors/utils.ts b/surfsense_web/lib/connectors/utils.ts index 5b542d32d..f2052900c 100644 --- a/surfsense_web/lib/connectors/utils.ts +++ b/surfsense_web/lib/connectors/utils.ts @@ -17,6 +17,7 @@ export const getConnectorTypeDisplay = (type: string): string => { GOOGLE_GMAIL_CONNECTOR: "Google Gmail", AIRTABLE_CONNECTOR: "Airtable", LUMA_CONNECTOR: "Luma", + ELASTICSEARCH_CONNECTOR: "Elasticsearch", }; return typeMap[type] || type; }; diff --git a/surfsense_web/package.json b/surfsense_web/package.json index f65904784..8897fd6ef 100644 --- a/surfsense_web/package.json +++ b/surfsense_web/package.json @@ -32,6 +32,7 @@ "@radix-ui/react-dropdown-menu": "^2.1.15", "@radix-ui/react-label": "^2.1.7", "@radix-ui/react-popover": "^1.1.14", + "@radix-ui/react-radio-group": "^1.3.8", "@radix-ui/react-scroll-area": "^1.2.9", "@radix-ui/react-select": "^2.2.5", "@radix-ui/react-separator": "^1.1.7", diff --git a/surfsense_web/pnpm-lock.yaml b/surfsense_web/pnpm-lock.yaml index 3463929ec..562a7a7bd 100644 --- a/surfsense_web/pnpm-lock.yaml +++ b/surfsense_web/pnpm-lock.yaml @@ -47,6 +47,9 @@ importers: '@radix-ui/react-popover': specifier: ^1.1.14 version: 1.1.14(@types/react-dom@19.1.6(@types/react@19.1.8))(@types/react@19.1.8)(react-dom@19.1.0(react@19.1.0))(react@19.1.0) + '@radix-ui/react-radio-group': + specifier: ^1.3.8 + version: 1.3.8(@types/react-dom@19.1.6(@types/react@19.1.8))(@types/react@19.1.8)(react-dom@19.1.0(react@19.1.0))(react@19.1.0) '@radix-ui/react-scroll-area': specifier: ^1.2.9 version: 1.2.9(@types/react-dom@19.1.6(@types/react@19.1.8))(@types/react@19.1.8)(react-dom@19.1.0(react@19.1.0))(react@19.1.0) @@ -118,7 +121,7 @@ importers: version: 15.6.6(@types/react@19.1.8)(next@15.4.4(@opentelemetry/api@1.9.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(react-dom@19.1.0(react@19.1.0))(react@19.1.0) fumadocs-mdx: specifier: ^11.7.1 - version: 11.7.1(acorn@8.14.0)(fumadocs-core@15.6.6(@types/react@19.1.8)(next@15.4.4(@opentelemetry/api@1.9.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(next@15.4.4(@opentelemetry/api@1.9.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(react@19.1.0) + version: 11.7.1(acorn@8.15.0)(fumadocs-core@15.6.6(@types/react@19.1.8)(next@15.4.4(@opentelemetry/api@1.9.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(next@15.4.4(@opentelemetry/api@1.9.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(react@19.1.0) fumadocs-ui: specifier: ^15.6.6 version: 15.6.6(@types/react-dom@19.1.6(@types/react@19.1.8))(@types/react@19.1.8)(next@15.4.4(@opentelemetry/api@1.9.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(tailwindcss@4.1.11) @@ -1735,6 +1738,19 @@ packages: '@types/react-dom': optional: true + '@radix-ui/react-presence@1.1.5': + resolution: {integrity: sha512-/jfEwNDdQVBCNvjkGit4h6pMOzq8bHkopq458dPt2lMjx+eBQUohZNG9A7DtO/O5ukSbxuaNGXMjHicgwy6rQQ==} + peerDependencies: + '@types/react': '*' + '@types/react-dom': '*' + react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc + react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc + peerDependenciesMeta: + '@types/react': + optional: true + '@types/react-dom': + optional: true + '@radix-ui/react-primitive@1.0.0': resolution: {integrity: sha512-EyXe6mnRlHZ8b6f4ilTDrXmkLShICIuOTTj0GX4w1rp+wSxf3+TD05u1UOITC8VsJ2a9nwHvdXtOXEOl0Cw/zQ==} peerDependencies: @@ -1793,6 +1809,19 @@ packages: '@types/react-dom': optional: true + '@radix-ui/react-radio-group@1.3.8': + resolution: {integrity: sha512-VBKYIYImA5zsxACdisNQ3BjCBfmbGH3kQlnFVqlWU4tXwjy7cGX8ta80BcrO+WJXIn5iBylEH3K6ZTlee//lgQ==} + peerDependencies: + '@types/react': '*' + '@types/react-dom': '*' + react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc + react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc + peerDependenciesMeta: + '@types/react': + optional: true + '@types/react-dom': + optional: true + '@radix-ui/react-roving-focus@1.1.10': resolution: {integrity: sha512-dT9aOXUen9JSsxnMPv/0VqySQf5eDQ6LCk5Sw28kamz8wSOW2bJdlX2Bg5VUIIcV+6XlHpWTIuTPCf/UNIyq8Q==} peerDependencies: @@ -1806,6 +1835,19 @@ packages: '@types/react-dom': optional: true + '@radix-ui/react-roving-focus@1.1.11': + resolution: {integrity: sha512-7A6S9jSgm/S+7MdtNDSb+IU859vQqJ/QAtcYQcfFC6W8RS4IxIZDldLR0xqCFZ6DCyrQLjLPsxtTNch5jVA4lA==} + peerDependencies: + '@types/react': '*' + '@types/react-dom': '*' + react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc + react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc + peerDependenciesMeta: + '@types/react': + optional: true + '@types/react-dom': + optional: true + '@radix-ui/react-scroll-area@1.2.9': resolution: {integrity: sha512-YSjEfBXnhUELsO2VzjdtYYD4CfQjvao+lhhrX5XsHD7/cyUNzljF1FHEbgTPN7LH2MClfwRMIsYlqTYpKTTe2A==} peerDependencies: @@ -6554,7 +6596,7 @@ snapshots: '@marijn/find-cluster-break@1.0.2': {} - '@mdx-js/mdx@3.1.0(acorn@8.14.0)': + '@mdx-js/mdx@3.1.0(acorn@8.15.0)': dependencies: '@types/estree': 1.0.8 '@types/estree-jsx': 1.0.5 @@ -6568,7 +6610,7 @@ snapshots: hast-util-to-jsx-runtime: 2.3.6 markdown-extensions: 2.0.0 recma-build-jsx: 1.0.0 - recma-jsx: 1.0.0(acorn@8.14.0) + recma-jsx: 1.0.0(acorn@8.15.0) recma-stringify: 1.0.0 rehype-recma: 1.0.0 remark-mdx: 3.1.0 @@ -7260,6 +7302,16 @@ snapshots: '@types/react': 19.1.8 '@types/react-dom': 19.1.6(@types/react@19.1.8) + '@radix-ui/react-presence@1.1.5(@types/react-dom@19.1.6(@types/react@19.1.8))(@types/react@19.1.8)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)': + dependencies: + '@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.8)(react@19.1.0) + '@radix-ui/react-use-layout-effect': 1.1.1(@types/react@19.1.8)(react@19.1.0) + react: 19.1.0 + react-dom: 19.1.0(react@19.1.0) + optionalDependencies: + '@types/react': 19.1.8 + '@types/react-dom': 19.1.6(@types/react@19.1.8) + '@radix-ui/react-primitive@1.0.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0)': dependencies: '@babel/runtime': 7.26.9 @@ -7305,6 +7357,24 @@ snapshots: '@types/react': 19.1.8 '@types/react-dom': 19.1.6(@types/react@19.1.8) + '@radix-ui/react-radio-group@1.3.8(@types/react-dom@19.1.6(@types/react@19.1.8))(@types/react@19.1.8)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)': + dependencies: + '@radix-ui/primitive': 1.1.3 + '@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.8)(react@19.1.0) + '@radix-ui/react-context': 1.1.2(@types/react@19.1.8)(react@19.1.0) + '@radix-ui/react-direction': 1.1.1(@types/react@19.1.8)(react@19.1.0) + '@radix-ui/react-presence': 1.1.5(@types/react-dom@19.1.6(@types/react@19.1.8))(@types/react@19.1.8)(react-dom@19.1.0(react@19.1.0))(react@19.1.0) + '@radix-ui/react-primitive': 2.1.3(@types/react-dom@19.1.6(@types/react@19.1.8))(@types/react@19.1.8)(react-dom@19.1.0(react@19.1.0))(react@19.1.0) + '@radix-ui/react-roving-focus': 1.1.11(@types/react-dom@19.1.6(@types/react@19.1.8))(@types/react@19.1.8)(react-dom@19.1.0(react@19.1.0))(react@19.1.0) + '@radix-ui/react-use-controllable-state': 1.2.2(@types/react@19.1.8)(react@19.1.0) + '@radix-ui/react-use-previous': 1.1.1(@types/react@19.1.8)(react@19.1.0) + '@radix-ui/react-use-size': 1.1.1(@types/react@19.1.8)(react@19.1.0) + react: 19.1.0 + react-dom: 19.1.0(react@19.1.0) + optionalDependencies: + '@types/react': 19.1.8 + '@types/react-dom': 19.1.6(@types/react@19.1.8) + '@radix-ui/react-roving-focus@1.1.10(@types/react-dom@19.1.6(@types/react@19.1.8))(@types/react@19.1.8)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)': dependencies: '@radix-ui/primitive': 1.1.2 @@ -7322,6 +7392,23 @@ snapshots: '@types/react': 19.1.8 '@types/react-dom': 19.1.6(@types/react@19.1.8) + '@radix-ui/react-roving-focus@1.1.11(@types/react-dom@19.1.6(@types/react@19.1.8))(@types/react@19.1.8)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)': + dependencies: + '@radix-ui/primitive': 1.1.3 + '@radix-ui/react-collection': 1.1.7(@types/react-dom@19.1.6(@types/react@19.1.8))(@types/react@19.1.8)(react-dom@19.1.0(react@19.1.0))(react@19.1.0) + '@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.8)(react@19.1.0) + '@radix-ui/react-context': 1.1.2(@types/react@19.1.8)(react@19.1.0) + '@radix-ui/react-direction': 1.1.1(@types/react@19.1.8)(react@19.1.0) + '@radix-ui/react-id': 1.1.1(@types/react@19.1.8)(react@19.1.0) + '@radix-ui/react-primitive': 2.1.3(@types/react-dom@19.1.6(@types/react@19.1.8))(@types/react@19.1.8)(react-dom@19.1.0(react@19.1.0))(react@19.1.0) + '@radix-ui/react-use-callback-ref': 1.1.1(@types/react@19.1.8)(react@19.1.0) + '@radix-ui/react-use-controllable-state': 1.2.2(@types/react@19.1.8)(react@19.1.0) + react: 19.1.0 + react-dom: 19.1.0(react@19.1.0) + optionalDependencies: + '@types/react': 19.1.8 + '@types/react-dom': 19.1.6(@types/react@19.1.8) + '@radix-ui/react-scroll-area@1.2.9(@types/react-dom@19.1.6(@types/react@19.1.8))(@types/react@19.1.8)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)': dependencies: '@radix-ui/number': 1.1.1 @@ -9073,9 +9160,9 @@ snapshots: transitivePeerDependencies: - supports-color - fumadocs-mdx@11.7.1(acorn@8.14.0)(fumadocs-core@15.6.6(@types/react@19.1.8)(next@15.4.4(@opentelemetry/api@1.9.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(next@15.4.4(@opentelemetry/api@1.9.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(react@19.1.0): + fumadocs-mdx@11.7.1(acorn@8.15.0)(fumadocs-core@15.6.6(@types/react@19.1.8)(next@15.4.4(@opentelemetry/api@1.9.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(next@15.4.4(@opentelemetry/api@1.9.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(react@19.1.0): dependencies: - '@mdx-js/mdx': 3.1.0(acorn@8.14.0) + '@mdx-js/mdx': 3.1.0(acorn@8.15.0) '@standard-schema/spec': 1.0.0 chokidar: 4.0.3 esbuild: 0.25.8 @@ -11137,9 +11224,9 @@ snapshots: estree-util-build-jsx: 3.0.1 vfile: 6.0.3 - recma-jsx@1.0.0(acorn@8.14.0): + recma-jsx@1.0.0(acorn@8.15.0): dependencies: - acorn-jsx: 5.3.2(acorn@8.14.0) + acorn-jsx: 5.3.2(acorn@8.15.0) estree-util-to-js: 2.0.0 recma-parse: 1.0.0 recma-stringify: 1.0.0