feat: fix Circleback connector and update related enums

2026-05-15 18:25:18 +02:00 · 2025-12-30 12:13:18 -08:00 · 2025-12-30 12:13:18 -08:00 · 476c764611
commit 476c764611
parent c19d300c9d
5 changed files with 123 additions and 31 deletions
--- a/surfsense_backend/alembic/versions/56_add_circleback_connector_enums.py
+++ b/surfsense_backend/alembic/versions/56_add_circleback_connector_enums.py
@ -19,40 +19,17 @@ depends_on: str | Sequence[str] | None = None

 def upgrade() -> None:
    """Safely add 'CIRCLEBACK' to documenttype and 'CIRCLEBACK_CONNECTOR' to searchsourceconnectortype enums if missing."""
+    from sqlalchemy import text

-    # Add to documenttype enum
-    op.execute(
-        """
-    DO $$
-    BEGIN
-        IF NOT EXISTS (
-            SELECT 1 FROM pg_type t
-            JOIN pg_enum e ON t.oid = e.enumtypid
-            WHERE t.typname = 'documenttype' AND e.enumlabel = 'CIRCLEBACK'
-        ) THEN
-            ALTER TYPE documenttype ADD VALUE 'CIRCLEBACK';
-        END IF;
-    END
-    $$;
-    """
-    )
+    # Get connection and commit current transaction to allow ALTER TYPE
+    connection = op.get_bind()
+    connection.execute(text("COMMIT"))
+
+    # Add to documenttype enum (must be outside transaction)
+    connection.execute(text("ALTER TYPE documenttype ADD VALUE IF NOT EXISTS 'CIRCLEBACK'"))

    # Add to searchsourceconnectortype enum
-    op.execute(
-        """
-    DO $$
-    BEGIN
-        IF NOT EXISTS (
-            SELECT 1 FROM pg_type t
-            JOIN pg_enum e ON t.oid = e.enumtypid
-            WHERE t.typname = 'searchsourceconnectortype' AND e.enumlabel = 'CIRCLEBACK_CONNECTOR'
-        ) THEN
-            ALTER TYPE searchsourceconnectortype ADD VALUE 'CIRCLEBACK_CONNECTOR';
-        END IF;
-    END
-    $$;
-    """
-    )
+    connection.execute(text("ALTER TYPE searchsourceconnectortype ADD VALUE IF NOT EXISTS 'CIRCLEBACK_CONNECTOR'"))


 def downgrade() -> None:
--- a/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py
+++ b/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py
@ -497,6 +497,16 @@ async def search_knowledge_base_async(
                )
                all_documents.extend(chunks)

+            elif connector == "CIRCLEBACK":
+                _, chunks = await connector_service.search_circleback(
+                    user_query=query,
+                    search_space_id=search_space_id,
+                    top_k=top_k,
+                    start_date=resolved_start_date,
+                    end_date=resolved_end_date,
+                )
+                all_documents.extend(chunks)
+
        except Exception as e:
            print(f"Error searching connector {connector}: {e}")
            continue
@ -583,6 +593,7 @@ def create_search_knowledge_base_tool(
        - LUMA_CONNECTOR: "Luma events"
        - WEBCRAWLER_CONNECTOR: "Webpages indexed by SurfSense" (personally selected websites)
        - BOOKSTACK_CONNECTOR: "BookStack pages" (personal documentation)
+        - CIRCLEBACK: "Circleback meeting notes, transcripts, and action items" (personal meeting records)

        NOTE: `WEBCRAWLER_CONNECTOR` is mapped internally to the canonical document type `CRAWLED_URL`.

--- a/surfsense_backend/app/services/connector_service.py
+++ b/surfsense_backend/app/services/connector_service.py
@ -2606,3 +2606,105 @@ class ConnectorService:
        }

        return result_object, bookstack_docs
+
+    async def search_circleback(
+        self,
+        user_query: str,
+        search_space_id: int,
+        top_k: int = 20,
+        start_date: datetime | None = None,
+        end_date: datetime | None = None,
+    ) -> tuple:
+        """
+        Search for Circleback meeting notes and return both the source information and langchain documents.
+
+        Uses combined chunk-level and document-level hybrid search with RRF fusion.
+
+        Args:
+            user_query: The user's query
+            search_space_id: The search space ID to search in
+            top_k: Maximum number of results to return
+            start_date: Optional start date for filtering documents by updated_at
+            end_date: Optional end date for filtering documents by updated_at
+
+        Returns:
+            tuple: (sources_info, langchain_documents)
+        """
+        circleback_docs = await self._combined_rrf_search(
+            query_text=user_query,
+            search_space_id=search_space_id,
+            document_type="CIRCLEBACK",
+            top_k=top_k,
+            start_date=start_date,
+            end_date=end_date,
+        )
+
+        # Early return if no results
+        if not circleback_docs:
+            return {
+                "id": 52,
+                "name": "Circleback Meetings",
+                "type": "CIRCLEBACK",
+                "sources": [],
+            }, []
+
+        def _title_fn(doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
+            meeting_name = metadata.get("meeting_name", "")
+            meeting_date = metadata.get("meeting_date", "")
+            title = doc_info.get("title") or meeting_name or "Circleback Meeting"
+            if meeting_date:
+                title += f" ({meeting_date})"
+            return title
+
+        def _url_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
+            meeting_id = metadata.get("circleback_meeting_id", "")
+            return (
+                f"https://app.circleback.ai/meetings/{meeting_id}"
+                if meeting_id
+                else ""
+            )
+
+        def _description_fn(
+            chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any]
+        ) -> str:
+            description = self._chunk_preview(chunk.get("content", ""), limit=200)
+            info_parts = []
+            duration = metadata.get("duration_seconds")
+            attendee_count = metadata.get("attendee_count")
+            if duration:
+                minutes = int(duration) // 60
+                info_parts.append(f"Duration: {minutes} min")
+            if attendee_count:
+                info_parts.append(f"Attendees: {attendee_count}")
+            if info_parts:
+                description = (description + " | " + " | ".join(info_parts)).strip(" |")
+            return description
+
+        def _extra_fields_fn(
+            _chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any]
+        ) -> dict[str, Any]:
+            return {
+                "circleback_meeting_id": metadata.get("circleback_meeting_id", ""),
+                "meeting_name": metadata.get("meeting_name", ""),
+                "meeting_date": metadata.get("meeting_date", ""),
+                "duration_seconds": metadata.get("duration_seconds", 0),
+                "attendee_count": metadata.get("attendee_count", 0),
+            }
+
+        sources_list = self._build_chunk_sources_from_documents(
+            circleback_docs,
+            title_fn=_title_fn,
+            url_fn=_url_fn,
+            description_fn=_description_fn,
+            extra_fields_fn=_extra_fields_fn,
+        )
+
+        # Create result object
+        result_object = {
+            "id": 52,
+            "name": "Circleback Meetings",
+            "type": "CIRCLEBACK",
+            "sources": sources_list,
+        }
+
+        return result_object, circleback_docs
--- a/surfsense_web/contracts/types/connector.types.ts
+++ b/surfsense_web/contracts/types/connector.types.ts
@ -23,6 +23,7 @@ export const searchSourceConnectorTypeEnum = z.enum([
 	"ELASTICSEARCH_CONNECTOR",
 	"WEBCRAWLER_CONNECTOR",
 	"BOOKSTACK_CONNECTOR",
+	"CIRCLEBACK_CONNECTOR",
 ]);

 export const searchSourceConnector = z.object({
--- a/surfsense_web/contracts/types/document.types.ts
+++ b/surfsense_web/contracts/types/document.types.ts
@ -21,6 +21,7 @@ export const documentTypeEnum = z.enum([
 	"ELASTICSEARCH_CONNECTOR",
 	"LINEAR_CONNECTOR",
 	"NOTE",
+	"CIRCLEBACK",
 ]);

 export const document = z.object({