const LOCAL_FILE_PATH_REGEX = /^\/[a-z0-9_-]+\/[^\s`]+(?:\/[^\s`]+)*$/;
@@ -288,7 +281,7 @@ function FilePathLink({ path, className }: { path: string; className?: string })
function MarkdownImage({ src, alt }: { src?: string; alt?: string }) {
if (!src) return null;
- const domain = extractDomain(src);
+ const domain = tryGetHostname(src) ?? "";
return (
diff --git a/surfsense_web/components/tool-ui/citation/citation.tsx b/surfsense_web/components/tool-ui/citation/citation.tsx
index 05ce214f3..a7c82de8c 100644
--- a/surfsense_web/components/tool-ui/citation/citation.tsx
+++ b/surfsense_web/components/tool-ui/citation/citation.tsx
@@ -6,19 +6,11 @@ import * as React from "react";
import { openSafeNavigationHref, sanitizeHref } from "../shared/media";
import { cn, Popover, PopoverContent, PopoverTrigger } from "./_adapter";
import type { CitationVariant, SerializableCitation } from "./schema";
+import { tryGetHostname } from "@/lib/url";
import { TYPE_ICONS } from "./type-icons";
const FALLBACK_LOCALE = "en-US";
-function extractDomain(url: string): string | undefined {
- try {
- const urlObj = new URL(url);
- return urlObj.hostname.replace(/^www\./, "");
- } catch {
- return undefined;
- }
-}
-
function formatDate(isoString: string, locale: string): string {
try {
const date = new Date(isoString);
@@ -78,7 +70,7 @@ export function Citation(props: CitationProps) {
const locale = providedLocale ?? FALLBACK_LOCALE;
const sanitizedHref = sanitizeHref(rawHref);
- const domain = providedDomain ?? extractDomain(rawHref);
+ const domain = providedDomain ?? tryGetHostname(rawHref);
const citationData: SerializableCitation = {
...serializable,
diff --git a/surfsense_web/lib/url.ts b/surfsense_web/lib/url.ts
new file mode 100644
index 000000000..0c9227581
--- /dev/null
+++ b/surfsense_web/lib/url.ts
@@ -0,0 +1,14 @@
+/**
+ * Extract a normalized hostname from a URL. Strips a leading `www.`.
+ * Returns `undefined` if the input is not a parseable URL.
+ *
+ * This is the canonical replacement for the four previously-duplicated
+ * `extractDomain` helpers that had subtly different error fallbacks.
+ */
+export function tryGetHostname(url: string): string | undefined {
+ try {
+ return new URL(url).hostname.replace(/^www\./, "");
+ } catch {
+ return undefined;
+ }
+}
From cc06cff4fb0054890e9bf52efbfa3345b904c11f Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Sat, 16 May 2026 20:20:04 +0530
Subject: [PATCH 14/16] feat(tests): add mock response for file ownership in
composio_module
---
.../tests/e2e/fakes/composio_module.py | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
diff --git a/surfsense_backend/tests/e2e/fakes/composio_module.py b/surfsense_backend/tests/e2e/fakes/composio_module.py
index 38c4d4c46..16a93d0f1 100644
--- a/surfsense_backend/tests/e2e/fakes/composio_module.py
+++ b/surfsense_backend/tests/e2e/fakes/composio_module.py
@@ -350,6 +350,25 @@ def _drive_list_files(args: dict[str, Any]) -> dict[str, Any]:
folder id and serve the matching fixture list.
"""
q = args.get("q", "")
+ if "in owners" in q:
+ return {
+ "data": {
+ "files": [
+ {
+ "id": "fake-file-owner-probe",
+ "name": "owner-probe",
+ "owners": [
+ {
+ "me": True,
+ "emailAddress": "e2e-fake@surfsense.example",
+ }
+ ],
+ }
+ ],
+ "nextPageToken": None,
+ }
+ }
+
folder_id = "root"
if "in parents" in q:
# q looks like: '' in parents and trashed = false ...
From a0f2563dc35158be98f997272a72ed01b9cd2fec Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Sat, 16 May 2026 21:13:17 +0530
Subject: [PATCH 15/16] test: update Stripe and Google Calendar integration
tests to use ComposioService
---
.../test_stripe_page_purchases.py | 1 +
.../test_calendar_indexer_credentials.py | 61 +++++++++++--------
2 files changed, 37 insertions(+), 25 deletions(-)
diff --git a/surfsense_backend/tests/integration/document_upload/test_stripe_page_purchases.py b/surfsense_backend/tests/integration/document_upload/test_stripe_page_purchases.py
index 1c8f7f990..143c9e252 100644
--- a/surfsense_backend/tests/integration/document_upload/test_stripe_page_purchases.py
+++ b/surfsense_backend/tests/integration/document_upload/test_stripe_page_purchases.py
@@ -204,6 +204,7 @@ class TestStripeCheckoutSessionCreation:
assert (
fake_client.last_params["success_url"]
== f"http://localhost:3000/dashboard/{search_space_id}/purchase-success"
+ "?session_id={CHECKOUT_SESSION_ID}"
)
assert (
fake_client.last_params["cancel_url"]
diff --git a/surfsense_backend/tests/integration/google_unification/test_calendar_indexer_credentials.py b/surfsense_backend/tests/integration/google_unification/test_calendar_indexer_credentials.py
index 795f0d564..44ff5c48a 100644
--- a/surfsense_backend/tests/integration/google_unification/test_calendar_indexer_credentials.py
+++ b/surfsense_backend/tests/integration/google_unification/test_calendar_indexer_credentials.py
@@ -7,7 +7,7 @@ mocked at their system boundaries.
from __future__ import annotations
-from unittest.mock import AsyncMock, MagicMock, patch
+from unittest.mock import ANY, AsyncMock, MagicMock, patch
import pytest
import pytest_asyncio
@@ -25,6 +25,7 @@ pytestmark = pytest.mark.integration
_COMPOSIO_ACCOUNT_ID = "composio-calendar-test-789"
_INDEXER_MODULE = "app.tasks.connector_indexers.google_calendar_indexer"
+_GET_ACCESS_TOKEN = "app.services.composio_service.ComposioService.get_access_token"
@pytest_asyncio.fixture
@@ -69,32 +70,29 @@ async def native_calendar(async_engine):
await cleanup_space(async_engine, data["search_space_id"])
+@patch(_GET_ACCESS_TOKEN)
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
@patch(f"{_INDEXER_MODULE}.GoogleCalendarConnector")
-@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
-async def test_composio_calendar_uses_composio_credentials(
- mock_build_creds,
+@patch(f"{_INDEXER_MODULE}.ComposioService")
+async def test_composio_calendar_uses_composio_service(
+ mock_composio_service_cls,
mock_cal_cls,
mock_tl_cls,
+ mock_get_access_token,
async_engine,
composio_calendar,
):
- """Calendar indexer calls build_composio_credentials for a Composio connector."""
+ """Calendar indexer uses Composio tools directly for a Composio connector."""
from app.tasks.connector_indexers.google_calendar_indexer import (
index_google_calendar_events,
)
data = composio_calendar
- mock_creds = MagicMock(name="composio-creds")
- mock_build_creds.return_value = mock_creds
+ mock_composio_service = MagicMock()
+ mock_composio_service.get_calendar_events = AsyncMock(return_value=([], None))
+ mock_composio_service_cls.return_value = mock_composio_service
mock_tl_cls.return_value = mock_task_logger()
- mock_cal_instance = MagicMock()
- mock_cal_instance.get_all_primary_calendar_events = AsyncMock(
- return_value=([], None)
- )
- mock_cal_cls.return_value = mock_cal_instance
-
maker = make_session_factory(async_engine)
async with maker() as session:
await index_google_calendar_events(
@@ -104,17 +102,25 @@ async def test_composio_calendar_uses_composio_credentials(
user_id=data["user_id"],
)
- mock_build_creds.assert_called_once_with(_COMPOSIO_ACCOUNT_ID)
- mock_cal_cls.assert_called_once()
- _, kwargs = mock_cal_cls.call_args
- assert kwargs.get("credentials") is mock_creds
+ mock_composio_service_cls.assert_called_once()
+ mock_composio_service.get_calendar_events.assert_called_once_with(
+ connected_account_id=_COMPOSIO_ACCOUNT_ID,
+ entity_id=f"surfsense_{data['user_id']}",
+ time_min=ANY,
+ time_max=ANY,
+ max_results=250,
+ )
+ mock_cal_cls.assert_not_called()
+ mock_get_access_token.assert_not_called()
+@patch(_GET_ACCESS_TOKEN)
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
-@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
+@patch(f"{_INDEXER_MODULE}.ComposioService")
async def test_composio_calendar_without_account_id_returns_error(
- mock_build_creds,
+ mock_composio_service_cls,
mock_tl_cls,
+ mock_get_access_token,
async_engine,
composio_calendar_no_id,
):
@@ -138,20 +144,23 @@ async def test_composio_calendar_without_account_id_returns_error(
assert count == 0
assert error is not None
assert "composio" in error.lower()
- mock_build_creds.assert_not_called()
+ mock_composio_service_cls.assert_not_called()
+ mock_get_access_token.assert_not_called()
+@patch(_GET_ACCESS_TOKEN)
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
+@patch(f"{_INDEXER_MODULE}.ComposioService")
@patch(f"{_INDEXER_MODULE}.GoogleCalendarConnector")
-@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
-async def test_native_calendar_does_not_use_composio_credentials(
- mock_build_creds,
+async def test_native_calendar_uses_google_calendar_connector(
mock_cal_cls,
+ mock_composio_service_cls,
mock_tl_cls,
+ mock_get_access_token,
async_engine,
native_calendar,
):
- """Calendar indexer does NOT call build_composio_credentials for a native connector."""
+ """Native Calendar connector uses GoogleCalendarConnector with no Composio path."""
from app.tasks.connector_indexers.google_calendar_indexer import (
index_google_calendar_events,
)
@@ -174,4 +183,6 @@ async def test_native_calendar_does_not_use_composio_credentials(
user_id=data["user_id"],
)
- mock_build_creds.assert_not_called()
+ mock_cal_cls.assert_called_once()
+ mock_composio_service_cls.assert_not_called()
+ mock_get_access_token.assert_not_called()
From cb9a0f327caa8b2bc3a5e7e40f737c22a77a1823 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Sat, 16 May 2026 21:26:40 +0530
Subject: [PATCH 16/16] test: refactor Gmail indexer tests to utilize
ComposioService and hybrid chunking
---
.../tests/integration/conftest.py | 4 ++
.../test_gmail_indexer_credentials.py | 62 ++++++++++++-------
.../adapters/test_file_upload_adapter.py | 2 +-
3 files changed, 44 insertions(+), 24 deletions(-)
diff --git a/surfsense_backend/tests/integration/conftest.py b/surfsense_backend/tests/integration/conftest.py
index 2f4b3fe9a..e03101e63 100644
--- a/surfsense_backend/tests/integration/conftest.py
+++ b/surfsense_backend/tests/integration/conftest.py
@@ -160,6 +160,10 @@ def patched_chunk_text(monkeypatch) -> MagicMock:
"app.indexing_pipeline.indexing_pipeline_service.chunk_text",
mock,
)
+ monkeypatch.setattr(
+ "app.indexing_pipeline.indexing_pipeline_service.chunk_text_hybrid",
+ mock,
+ )
return mock
diff --git a/surfsense_backend/tests/integration/google_unification/test_gmail_indexer_credentials.py b/surfsense_backend/tests/integration/google_unification/test_gmail_indexer_credentials.py
index afb3e64c3..b869f5607 100644
--- a/surfsense_backend/tests/integration/google_unification/test_gmail_indexer_credentials.py
+++ b/surfsense_backend/tests/integration/google_unification/test_gmail_indexer_credentials.py
@@ -7,7 +7,7 @@ mocked at their system boundaries.
from __future__ import annotations
-from unittest.mock import AsyncMock, MagicMock, patch
+from unittest.mock import ANY, AsyncMock, MagicMock, patch
import pytest
import pytest_asyncio
@@ -25,6 +25,7 @@ pytestmark = pytest.mark.integration
_COMPOSIO_ACCOUNT_ID = "composio-gmail-test-456"
_INDEXER_MODULE = "app.tasks.connector_indexers.google_gmail_indexer"
+_GET_ACCESS_TOKEN = "app.services.composio_service.ComposioService.get_access_token"
@pytest_asyncio.fixture
@@ -69,30 +70,32 @@ async def native_gmail(async_engine):
await cleanup_space(async_engine, data["search_space_id"])
+@patch(_GET_ACCESS_TOKEN)
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
@patch(f"{_INDEXER_MODULE}.GoogleGmailConnector")
-@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
-async def test_composio_gmail_uses_composio_credentials(
- mock_build_creds,
+@patch(f"{_INDEXER_MODULE}.ComposioService")
+async def test_composio_gmail_uses_composio_service(
+ mock_composio_service_cls,
mock_gmail_cls,
mock_tl_cls,
+ mock_get_access_token,
async_engine,
composio_gmail,
):
- """Gmail indexer calls build_composio_credentials for a Composio connector."""
+ """Gmail indexer uses Composio tools directly for a Composio connector."""
from app.tasks.connector_indexers.google_gmail_indexer import (
index_google_gmail_messages,
)
data = composio_gmail
- mock_creds = MagicMock(name="composio-creds")
- mock_build_creds.return_value = mock_creds
+ mock_composio_service = MagicMock()
+ mock_composio_service.get_gmail_messages = AsyncMock(
+ return_value=([], None, None, None)
+ )
+ mock_composio_service.get_gmail_message_detail = AsyncMock(return_value=({}, None))
+ mock_composio_service_cls.return_value = mock_composio_service
mock_tl_cls.return_value = mock_task_logger()
- mock_gmail_instance = MagicMock()
- mock_gmail_instance.get_recent_messages = AsyncMock(return_value=([], None))
- mock_gmail_cls.return_value = mock_gmail_instance
-
maker = make_session_factory(async_engine)
async with maker() as session:
await index_google_gmail_messages(
@@ -102,17 +105,25 @@ async def test_composio_gmail_uses_composio_credentials(
user_id=data["user_id"],
)
- mock_build_creds.assert_called_once_with(_COMPOSIO_ACCOUNT_ID)
- mock_gmail_cls.assert_called_once()
- args, _ = mock_gmail_cls.call_args
- assert args[0] is mock_creds
+ mock_composio_service_cls.assert_called_once()
+ mock_composio_service.get_gmail_messages.assert_called_once_with(
+ connected_account_id=_COMPOSIO_ACCOUNT_ID,
+ entity_id=f"surfsense_{data['user_id']}",
+ query=ANY,
+ max_results=ANY,
+ page_token=None,
+ )
+ mock_gmail_cls.assert_not_called()
+ mock_get_access_token.assert_not_called()
+@patch(_GET_ACCESS_TOKEN)
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
-@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
+@patch(f"{_INDEXER_MODULE}.ComposioService")
async def test_composio_gmail_without_account_id_returns_error(
- mock_build_creds,
+ mock_composio_service_cls,
mock_tl_cls,
+ mock_get_access_token,
async_engine,
composio_gmail_no_id,
):
@@ -136,20 +147,23 @@ async def test_composio_gmail_without_account_id_returns_error(
assert count == 0
assert error is not None
assert "composio" in error.lower()
- mock_build_creds.assert_not_called()
+ mock_composio_service_cls.assert_not_called()
+ mock_get_access_token.assert_not_called()
+@patch(_GET_ACCESS_TOKEN)
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
+@patch(f"{_INDEXER_MODULE}.ComposioService")
@patch(f"{_INDEXER_MODULE}.GoogleGmailConnector")
-@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
-async def test_native_gmail_does_not_use_composio_credentials(
- mock_build_creds,
+async def test_native_gmail_uses_google_gmail_connector(
mock_gmail_cls,
+ mock_composio_service_cls,
mock_tl_cls,
+ mock_get_access_token,
async_engine,
native_gmail,
):
- """Gmail indexer does NOT call build_composio_credentials for a native connector."""
+ """Native Gmail connector uses GoogleGmailConnector with no Composio path."""
from app.tasks.connector_indexers.google_gmail_indexer import (
index_google_gmail_messages,
)
@@ -170,4 +184,6 @@ async def test_native_gmail_does_not_use_composio_credentials(
user_id=data["user_id"],
)
- mock_build_creds.assert_not_called()
+ mock_gmail_cls.assert_called_once()
+ mock_composio_service_cls.assert_not_called()
+ mock_get_access_token.assert_not_called()
diff --git a/surfsense_backend/tests/integration/indexing_pipeline/adapters/test_file_upload_adapter.py b/surfsense_backend/tests/integration/indexing_pipeline/adapters/test_file_upload_adapter.py
index 9fc802aa6..6bb1d2094 100644
--- a/surfsense_backend/tests/integration/indexing_pipeline/adapters/test_file_upload_adapter.py
+++ b/surfsense_backend/tests/integration/indexing_pipeline/adapters/test_file_upload_adapter.py
@@ -200,7 +200,7 @@ async def test_reindex_sets_status_ready(db_session, db_search_space, db_user, m
async def test_reindex_replaces_chunks(db_session, db_search_space, db_user, mocker):
"""Reindexing replaces old chunks with new content rather than appending."""
mocker.patch(
- "app.indexing_pipeline.indexing_pipeline_service.chunk_text",
+ "app.indexing_pipeline.indexing_pipeline_service.chunk_text_hybrid",
side_effect=[["Original chunk."], ["Updated chunk."]],
)