From 476c76461126d8479f2d6b1eb3aa0a09a5b7f3f2 Mon Sep 17 00:00:00 2001 From: "DESKTOP-RTLN3BA\\$punk" Date: Tue, 30 Dec 2025 12:13:18 -0800 Subject: [PATCH] feat: fix Circleback connector and update related enums --- .../56_add_circleback_connector_enums.py | 39 ++----- .../agents/new_chat/tools/knowledge_base.py | 11 ++ .../app/services/connector_service.py | 102 ++++++++++++++++++ .../contracts/types/connector.types.ts | 1 + .../contracts/types/document.types.ts | 1 + 5 files changed, 123 insertions(+), 31 deletions(-) diff --git a/surfsense_backend/alembic/versions/56_add_circleback_connector_enums.py b/surfsense_backend/alembic/versions/56_add_circleback_connector_enums.py index 551b8180b..0c06ea139 100644 --- a/surfsense_backend/alembic/versions/56_add_circleback_connector_enums.py +++ b/surfsense_backend/alembic/versions/56_add_circleback_connector_enums.py @@ -19,40 +19,17 @@ depends_on: str | Sequence[str] | None = None def upgrade() -> None: """Safely add 'CIRCLEBACK' to documenttype and 'CIRCLEBACK_CONNECTOR' to searchsourceconnectortype enums if missing.""" + from sqlalchemy import text - # Add to documenttype enum - op.execute( - """ - DO $$ - BEGIN - IF NOT EXISTS ( - SELECT 1 FROM pg_type t - JOIN pg_enum e ON t.oid = e.enumtypid - WHERE t.typname = 'documenttype' AND e.enumlabel = 'CIRCLEBACK' - ) THEN - ALTER TYPE documenttype ADD VALUE 'CIRCLEBACK'; - END IF; - END - $$; - """ - ) + # Get connection and commit current transaction to allow ALTER TYPE + connection = op.get_bind() + connection.execute(text("COMMIT")) + + # Add to documenttype enum (must be outside transaction) + connection.execute(text("ALTER TYPE documenttype ADD VALUE IF NOT EXISTS 'CIRCLEBACK'")) # Add to searchsourceconnectortype enum - op.execute( - """ - DO $$ - BEGIN - IF NOT EXISTS ( - SELECT 1 FROM pg_type t - JOIN pg_enum e ON t.oid = e.enumtypid - WHERE t.typname = 'searchsourceconnectortype' AND e.enumlabel = 'CIRCLEBACK_CONNECTOR' - ) THEN - ALTER TYPE searchsourceconnectortype ADD VALUE 'CIRCLEBACK_CONNECTOR'; - END IF; - END - $$; - """ - ) + connection.execute(text("ALTER TYPE searchsourceconnectortype ADD VALUE IF NOT EXISTS 'CIRCLEBACK_CONNECTOR'")) def downgrade() -> None: diff --git a/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py b/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py index 2096ce2b9..a3cdad359 100644 --- a/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py +++ b/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py @@ -497,6 +497,16 @@ async def search_knowledge_base_async( ) all_documents.extend(chunks) + elif connector == "CIRCLEBACK": + _, chunks = await connector_service.search_circleback( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + except Exception as e: print(f"Error searching connector {connector}: {e}") continue @@ -583,6 +593,7 @@ def create_search_knowledge_base_tool( - LUMA_CONNECTOR: "Luma events" - WEBCRAWLER_CONNECTOR: "Webpages indexed by SurfSense" (personally selected websites) - BOOKSTACK_CONNECTOR: "BookStack pages" (personal documentation) + - CIRCLEBACK: "Circleback meeting notes, transcripts, and action items" (personal meeting records) NOTE: `WEBCRAWLER_CONNECTOR` is mapped internally to the canonical document type `CRAWLED_URL`. diff --git a/surfsense_backend/app/services/connector_service.py b/surfsense_backend/app/services/connector_service.py index cf0a83dc8..26c687dd7 100644 --- a/surfsense_backend/app/services/connector_service.py +++ b/surfsense_backend/app/services/connector_service.py @@ -2606,3 +2606,105 @@ class ConnectorService: } return result_object, bookstack_docs + + async def search_circleback( + self, + user_query: str, + search_space_id: int, + top_k: int = 20, + start_date: datetime | None = None, + end_date: datetime | None = None, + ) -> tuple: + """ + Search for Circleback meeting notes and return both the source information and langchain documents. + + Uses combined chunk-level and document-level hybrid search with RRF fusion. + + Args: + user_query: The user's query + search_space_id: The search space ID to search in + top_k: Maximum number of results to return + start_date: Optional start date for filtering documents by updated_at + end_date: Optional end date for filtering documents by updated_at + + Returns: + tuple: (sources_info, langchain_documents) + """ + circleback_docs = await self._combined_rrf_search( + query_text=user_query, + search_space_id=search_space_id, + document_type="CIRCLEBACK", + top_k=top_k, + start_date=start_date, + end_date=end_date, + ) + + # Early return if no results + if not circleback_docs: + return { + "id": 52, + "name": "Circleback Meetings", + "type": "CIRCLEBACK", + "sources": [], + }, [] + + def _title_fn(doc_info: dict[str, Any], metadata: dict[str, Any]) -> str: + meeting_name = metadata.get("meeting_name", "") + meeting_date = metadata.get("meeting_date", "") + title = doc_info.get("title") or meeting_name or "Circleback Meeting" + if meeting_date: + title += f" ({meeting_date})" + return title + + def _url_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str: + meeting_id = metadata.get("circleback_meeting_id", "") + return ( + f"https://app.circleback.ai/meetings/{meeting_id}" + if meeting_id + else "" + ) + + def _description_fn( + chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any] + ) -> str: + description = self._chunk_preview(chunk.get("content", ""), limit=200) + info_parts = [] + duration = metadata.get("duration_seconds") + attendee_count = metadata.get("attendee_count") + if duration: + minutes = int(duration) // 60 + info_parts.append(f"Duration: {minutes} min") + if attendee_count: + info_parts.append(f"Attendees: {attendee_count}") + if info_parts: + description = (description + " | " + " | ".join(info_parts)).strip(" |") + return description + + def _extra_fields_fn( + _chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any] + ) -> dict[str, Any]: + return { + "circleback_meeting_id": metadata.get("circleback_meeting_id", ""), + "meeting_name": metadata.get("meeting_name", ""), + "meeting_date": metadata.get("meeting_date", ""), + "duration_seconds": metadata.get("duration_seconds", 0), + "attendee_count": metadata.get("attendee_count", 0), + } + + sources_list = self._build_chunk_sources_from_documents( + circleback_docs, + title_fn=_title_fn, + url_fn=_url_fn, + description_fn=_description_fn, + extra_fields_fn=_extra_fields_fn, + ) + + # Create result object + result_object = { + "id": 52, + "name": "Circleback Meetings", + "type": "CIRCLEBACK", + "sources": sources_list, + } + + return result_object, circleback_docs \ No newline at end of file diff --git a/surfsense_web/contracts/types/connector.types.ts b/surfsense_web/contracts/types/connector.types.ts index c590f3941..bc7664777 100644 --- a/surfsense_web/contracts/types/connector.types.ts +++ b/surfsense_web/contracts/types/connector.types.ts @@ -23,6 +23,7 @@ export const searchSourceConnectorTypeEnum = z.enum([ "ELASTICSEARCH_CONNECTOR", "WEBCRAWLER_CONNECTOR", "BOOKSTACK_CONNECTOR", + "CIRCLEBACK_CONNECTOR", ]); export const searchSourceConnector = z.object({ diff --git a/surfsense_web/contracts/types/document.types.ts b/surfsense_web/contracts/types/document.types.ts index 94ff27940..f7eb8f278 100644 --- a/surfsense_web/contracts/types/document.types.ts +++ b/surfsense_web/contracts/types/document.types.ts @@ -21,6 +21,7 @@ export const documentTypeEnum = z.enum([ "ELASTICSEARCH_CONNECTOR", "LINEAR_CONNECTOR", "NOTE", + "CIRCLEBACK", ]); export const document = z.object({