mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-23 19:05:16 +02:00
Merge remote-tracking branch 'upstream/dev' into feat/ui-revamp
This commit is contained in:
commit
87caa4b6d0
29 changed files with 1622 additions and 891 deletions
|
|
@ -5,6 +5,17 @@ queries via Zero, instead of replicating all tables in public schema.
|
|||
|
||||
See: https://zero.rocicorp.dev/docs/zero-cache-config#app-publications
|
||||
|
||||
NOTE for future migration authors: this is the ONLY migration allowed
|
||||
to use bare ``CREATE PUBLICATION``. All subsequent mutations of
|
||||
``zero_publication`` MUST use the ``COMMENT ON PUBLICATION`` bookend
|
||||
pattern wrapping an ``ALTER PUBLICATION ... SET TABLE`` -- copy the
|
||||
``upgrade()`` function from migration
|
||||
``143_force_zero_publication_resync.py`` as your starting template.
|
||||
Raw ``DROP``/``CREATE PUBLICATION`` in new migrations would
|
||||
re-introduce bug #1355 (zero-cache stuck on a stale replica snapshot
|
||||
because Zero >= 1.0's change-streamer never sees the schema-change
|
||||
event).
|
||||
|
||||
Revision ID: 116
|
||||
Revises: 115
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -17,6 +17,16 @@ IMPORTANT — before AND after running this migration:
|
|||
3. Delete / reset the zero-cache data volume
|
||||
4. Restart zero-cache (it will do a fresh initial sync)
|
||||
|
||||
DO NOT COPY THIS PATTERN. The ``DROP PUBLICATION`` + ``CREATE
|
||||
PUBLICATION`` dance below is the pre-#1355 anti-pattern: on Zero >=
|
||||
1.0 it does not reliably wake the zero-cache change-streamer and can
|
||||
leave the replica pinned to a stale snapshot. This file is
|
||||
grandfathered in because it has already shipped to users; new
|
||||
publication mutations MUST use the ``COMMENT ON PUBLICATION`` bookend
|
||||
pattern wrapping an ``ALTER PUBLICATION ... SET TABLE`` -- copy the
|
||||
``upgrade()`` function from migration
|
||||
``143_force_zero_publication_resync.py`` as your starting template.
|
||||
|
||||
Revision ID: 117
|
||||
Revises: 116
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -1,5 +1,16 @@
|
|||
"""Add LOCAL_FOLDER_FILE document type, folder metadata, and document_versions table
|
||||
|
||||
DO NOT COPY THIS PATTERN. The bare ``ALTER PUBLICATION ... ADD/DROP
|
||||
TABLE`` calls below pre-date the ``COMMENT ON PUBLICATION`` bookend
|
||||
fix for bug #1355: on Zero >= 1.0 they do not reliably wake the
|
||||
zero-cache change-streamer and can leave the replica pinned to a
|
||||
stale snapshot. This file is grandfathered in because it has already
|
||||
shipped to users; new publication mutations MUST use the
|
||||
``COMMENT ON PUBLICATION`` bookend pattern wrapping an
|
||||
``ALTER PUBLICATION ... SET TABLE`` -- copy the ``upgrade()`` function
|
||||
from migration ``143_force_zero_publication_resync.py`` as your
|
||||
starting template.
|
||||
|
||||
Revision ID: 118
|
||||
Revises: 117
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -21,6 +21,16 @@ IMPORTANT - before AND after running this migration:
|
|||
3. Delete / reset the zero-cache data volume
|
||||
4. Restart zero-cache (it will do a fresh initial sync)
|
||||
|
||||
DO NOT COPY THIS PATTERN. The ``DROP PUBLICATION`` + ``CREATE
|
||||
PUBLICATION`` dance below is the pre-#1355 anti-pattern: on Zero >=
|
||||
1.0 it does not reliably wake the zero-cache change-streamer and can
|
||||
leave the replica pinned to a stale snapshot. This file is
|
||||
grandfathered in because it has already shipped to users; new
|
||||
publication mutations MUST use the ``COMMENT ON PUBLICATION`` bookend
|
||||
pattern wrapping an ``ALTER PUBLICATION ... SET TABLE`` -- copy the
|
||||
``upgrade()`` function from migration
|
||||
``143_force_zero_publication_resync.py`` as your starting template.
|
||||
|
||||
Revision ID: 139
|
||||
Revises: 138
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -32,6 +32,16 @@ Skipping the zero-cache stop will deadlock at the ACCESS EXCLUSIVE LOCK on
|
|||
"user". Skipping the data-volume reset will leave IndexedDB clients seeing
|
||||
column-not-found errors from a stale catalog snapshot.
|
||||
|
||||
DO NOT COPY THIS PATTERN. The ``DROP PUBLICATION`` + ``CREATE
|
||||
PUBLICATION`` dance below is the pre-#1355 anti-pattern: on Zero >=
|
||||
1.0 it does not reliably wake the zero-cache change-streamer and can
|
||||
leave the replica pinned to a stale snapshot. This file is
|
||||
grandfathered in because it has already shipped to users; new
|
||||
publication mutations MUST use the ``COMMENT ON PUBLICATION`` bookend
|
||||
pattern wrapping an ``ALTER PUBLICATION ... SET TABLE`` -- copy the
|
||||
``upgrade()`` function from migration
|
||||
``143_force_zero_publication_resync.py`` as your starting template.
|
||||
|
||||
Revision ID: 140
|
||||
Revises: 139
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -0,0 +1,142 @@
|
|||
"""force zero-cache to resync after upgrading to Zero >= 1.0
|
||||
|
||||
Re-emits the current ``zero_publication`` shape using
|
||||
``ALTER PUBLICATION ... SET TABLE`` wrapped in
|
||||
``COMMENT ON PUBLICATION`` bookends. This is the publication-change
|
||||
hook documented for Zero ``>=1.0``:
|
||||
|
||||
https://zero.rocicorp.dev/docs/connecting-to-postgres#publication-changes
|
||||
|
||||
Background
|
||||
----------
|
||||
Migrations 117 / 139 / 140 mutated ``zero_publication`` using
|
||||
``DROP PUBLICATION`` + ``CREATE PUBLICATION``. On Zero 0.26.2 that
|
||||
sequence did not reliably wake the zero-cache change-streamer, so
|
||||
affected installs ended up with a SQLite replica file (in the
|
||||
``surfsense-zero-cache`` volume) that was snapshotted against the
|
||||
pre-``user`` publication. The frontend Zero schema includes a
|
||||
``userTable`` query, which then failed with
|
||||
``SchemaVersionNotSupported`` and triggered the default
|
||||
``onUpdateNeeded`` -> ``location.reload()`` every WebSocket keepalive
|
||||
interval (~60s). See bug #1355.
|
||||
|
||||
This migration emits the canonical publication shape one more time,
|
||||
this time using a pattern that fires Postgres event triggers and
|
||||
Zero's schema-change hook. With ``ZERO_AUTO_RESET=true`` (the default)
|
||||
and Zero ``>=1.0``, zero-cache responds by wiping its replica and
|
||||
doing a fresh initial sync from the corrected publication.
|
||||
|
||||
The publication shape itself is unchanged versus migration 140 -- on
|
||||
installs whose replica is already correct, this is a no-op aside
|
||||
from the harmless event-trigger fire.
|
||||
|
||||
Revision ID: 143
|
||||
Revises: 142
|
||||
"""
|
||||
|
||||
from collections.abc import Sequence
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from alembic import op
|
||||
|
||||
revision: str = "143"
|
||||
down_revision: str | None = "142"
|
||||
branch_labels: str | Sequence[str] | None = None
|
||||
depends_on: str | Sequence[str] | None = None
|
||||
|
||||
PUBLICATION_NAME = "zero_publication"
|
||||
|
||||
# Must stay in sync with the column lists in migrations 117 / 139 / 140.
|
||||
DOCUMENT_COLS = [
|
||||
"id",
|
||||
"title",
|
||||
"document_type",
|
||||
"search_space_id",
|
||||
"folder_id",
|
||||
"created_by_id",
|
||||
"status",
|
||||
"created_at",
|
||||
"updated_at",
|
||||
]
|
||||
|
||||
USER_COLS = [
|
||||
"id",
|
||||
"pages_limit",
|
||||
"pages_used",
|
||||
"premium_credit_micros_limit",
|
||||
"premium_credit_micros_used",
|
||||
]
|
||||
|
||||
|
||||
def _has_zero_version(conn, table: str) -> bool:
|
||||
return (
|
||||
conn.execute(
|
||||
sa.text(
|
||||
"SELECT 1 FROM information_schema.columns "
|
||||
"WHERE table_name = :tbl AND column_name = '_0_version'"
|
||||
),
|
||||
{"tbl": table},
|
||||
).fetchone()
|
||||
is not None
|
||||
)
|
||||
|
||||
|
||||
def _build_set_table_ddl(
|
||||
*, documents_has_zero_ver: bool, user_has_zero_ver: bool
|
||||
) -> str:
|
||||
doc_cols = DOCUMENT_COLS + (['"_0_version"'] if documents_has_zero_ver else [])
|
||||
user_cols = USER_COLS + (['"_0_version"'] if user_has_zero_ver else [])
|
||||
doc_col_list = ", ".join(doc_cols)
|
||||
user_col_list = ", ".join(user_cols)
|
||||
return (
|
||||
f"ALTER PUBLICATION {PUBLICATION_NAME} SET TABLE "
|
||||
f"notifications, "
|
||||
f"documents ({doc_col_list}), "
|
||||
f"folders, "
|
||||
f"search_source_connectors, "
|
||||
f"new_chat_messages, "
|
||||
f"chat_comments, "
|
||||
f"chat_session_state, "
|
||||
f'"user" ({user_col_list})'
|
||||
)
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
conn = op.get_bind()
|
||||
|
||||
exists = conn.execute(
|
||||
sa.text("SELECT 1 FROM pg_publication WHERE pubname = :name"),
|
||||
{"name": PUBLICATION_NAME},
|
||||
).fetchone()
|
||||
if not exists:
|
||||
return
|
||||
|
||||
documents_has_zero_ver = _has_zero_version(conn, "documents")
|
||||
user_has_zero_ver = _has_zero_version(conn, "user")
|
||||
|
||||
# The COMMENT-ALTER-COMMENT trio MUST run in a single transaction so
|
||||
# Zero observes them as one schema-change event. Alembic's outer
|
||||
# transaction already covers us, but a SAVEPOINT keeps the trio
|
||||
# atomic with asyncpg, matching the pattern used in migrations
|
||||
# 117 / 139 / 140.
|
||||
tx = conn.begin_nested() if conn.in_transaction() else conn.begin()
|
||||
with tx:
|
||||
conn.execute(
|
||||
sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'pre-143-resync'")
|
||||
)
|
||||
conn.execute(
|
||||
sa.text(
|
||||
_build_set_table_ddl(
|
||||
documents_has_zero_ver=documents_has_zero_ver,
|
||||
user_has_zero_ver=user_has_zero_ver,
|
||||
)
|
||||
)
|
||||
)
|
||||
conn.execute(
|
||||
sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'post-143-resync'")
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""No-op. The publication shape is unchanged versus migration 140."""
|
||||
|
|
@ -1491,14 +1491,20 @@ async def stream_new_chat(
|
|||
|
||||
# Resolve @-mention chips to canonical virtual paths and rewrite
|
||||
# the user-typed text so the LLM sees ``\`/documents/...\``` instead
|
||||
# of bare ``@title``. The persisted user-message text keeps
|
||||
# ``@title`` so chip rendering on reload is unchanged — see
|
||||
# ``persistence._build_user_content``.
|
||||
# of bare ``@title``. The substitution lands in ``agent_user_query``
|
||||
# ONLY — the original ``user_query`` (with ``@title`` tokens) flows
|
||||
# untouched into ``persist_user_turn`` below so chip rendering on
|
||||
# reload still works (``UserTextPart`` → ``parseMentionSegments``
|
||||
# matches ``@title``, not ``\`/documents/...\```). It also feeds
|
||||
# the human-readable surfaces — SSE "Processing X" status, auto
|
||||
# thread title, memory seed — which all want what the user typed.
|
||||
# See ``persistence._build_user_content``.
|
||||
#
|
||||
# Cloud mode only: local-folder mode keeps the legacy
|
||||
# ``@title`` text path; mention support there is a follow-up
|
||||
# task because the path scheme (mount-rooted) and the picker
|
||||
# UI both need separate work.
|
||||
agent_user_query = user_query
|
||||
accepted_folder_ids: list[int] = []
|
||||
if fs_mode == FilesystemMode.CLOUD.value and (
|
||||
mentioned_document_ids
|
||||
|
|
@ -1533,11 +1539,13 @@ async def stream_new_chat(
|
|||
mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids,
|
||||
mentioned_folder_ids=mentioned_folder_ids,
|
||||
)
|
||||
user_query = substitute_in_text(user_query, resolved.token_to_path)
|
||||
agent_user_query = substitute_in_text(user_query, resolved.token_to_path)
|
||||
accepted_folder_ids = resolved.mentioned_folder_ids
|
||||
|
||||
# Format the user query with context (SurfSense docs + reports only)
|
||||
final_query = user_query
|
||||
# Format the user query with context (SurfSense docs + reports only).
|
||||
# Uses ``agent_user_query`` so the LLM sees backtick-wrapped paths
|
||||
# instead of bare ``@title`` tokens.
|
||||
final_query = agent_user_query
|
||||
context_parts = []
|
||||
|
||||
if mentioned_surfsense_docs:
|
||||
|
|
@ -1568,7 +1576,7 @@ async def stream_new_chat(
|
|||
|
||||
if context_parts:
|
||||
context = "\n\n".join(context_parts)
|
||||
final_query = f"{context}\n\n<user_query>{user_query}</user_query>"
|
||||
final_query = f"{context}\n\n<user_query>{agent_user_query}</user_query>"
|
||||
|
||||
if visibility == ChatVisibility.SEARCH_SPACE and current_user_display_name:
|
||||
final_query = f"**[{current_user_display_name}]:** {final_query}"
|
||||
|
|
|
|||
|
|
@ -350,6 +350,25 @@ def _drive_list_files(args: dict[str, Any]) -> dict[str, Any]:
|
|||
folder id and serve the matching fixture list.
|
||||
"""
|
||||
q = args.get("q", "")
|
||||
if "in owners" in q:
|
||||
return {
|
||||
"data": {
|
||||
"files": [
|
||||
{
|
||||
"id": "fake-file-owner-probe",
|
||||
"name": "owner-probe",
|
||||
"owners": [
|
||||
{
|
||||
"me": True,
|
||||
"emailAddress": "e2e-fake@surfsense.example",
|
||||
}
|
||||
],
|
||||
}
|
||||
],
|
||||
"nextPageToken": None,
|
||||
}
|
||||
}
|
||||
|
||||
folder_id = "root"
|
||||
if "in parents" in q:
|
||||
# q looks like: '<folder_id>' in parents and trashed = false ...
|
||||
|
|
|
|||
|
|
@ -1,13 +1,11 @@
|
|||
"""Composio route integration fixtures.
|
||||
|
||||
The sys.modules hijack happens at module import time, before importing
|
||||
app.app, so production `from composio import Composio` bindings resolve to
|
||||
the strict E2E fake in this pytest process too.
|
||||
The `composio` sys.modules hijack lives in the parent integration conftest
|
||||
so it runs before any sibling suite imports `app.routes`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from collections.abc import AsyncGenerator
|
||||
|
||||
import httpx
|
||||
|
|
@ -16,19 +14,15 @@ import pytest_asyncio
|
|||
from httpx import ASGITransport
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from tests.e2e.fakes import composio_module as _fake_composio
|
||||
|
||||
sys.modules["composio"] = _fake_composio
|
||||
|
||||
from app.app import app, limiter # noqa: E402
|
||||
from app.config import config # noqa: E402
|
||||
from app.db import ( # noqa: E402
|
||||
from app.app import app, limiter
|
||||
from app.config import config
|
||||
from app.db import (
|
||||
SearchSourceConnector,
|
||||
SearchSourceConnectorType,
|
||||
User,
|
||||
get_async_session,
|
||||
)
|
||||
from app.users import current_active_user # noqa: E402
|
||||
from app.users import current_active_user
|
||||
|
||||
pytestmark = pytest.mark.integration
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
import importlib
|
||||
import sys
|
||||
import uuid
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
|
|
@ -7,17 +9,27 @@ from sqlalchemy import text
|
|||
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
|
||||
from sqlalchemy.pool import NullPool
|
||||
|
||||
from app.config import config as app_config
|
||||
from app.db import (
|
||||
Base,
|
||||
DocumentType,
|
||||
SearchSourceConnector,
|
||||
SearchSourceConnectorType,
|
||||
SearchSpace,
|
||||
User,
|
||||
)
|
||||
from app.indexing_pipeline.connector_document import ConnectorDocument
|
||||
from tests.conftest import TEST_DATABASE_URL
|
||||
# Hijack `composio` before any `from app.*` import; the `from composio import
|
||||
# Composio` in app.services.composio_service binds once at first import.
|
||||
from tests.e2e.fakes import composio_module as _fake_composio
|
||||
|
||||
sys.modules["composio"] = _fake_composio
|
||||
|
||||
app_config = importlib.import_module("app.config").config
|
||||
app_db = importlib.import_module("app.db")
|
||||
Base = app_db.Base
|
||||
DocumentType = app_db.DocumentType
|
||||
SearchSourceConnector = app_db.SearchSourceConnector
|
||||
SearchSourceConnectorType = app_db.SearchSourceConnectorType
|
||||
SearchSpace = app_db.SearchSpace
|
||||
User = app_db.User
|
||||
ConnectorDocument = importlib.import_module(
|
||||
"app.indexing_pipeline.connector_document"
|
||||
).ConnectorDocument
|
||||
create_default_roles_and_membership = importlib.import_module(
|
||||
"app.routes.search_spaces_routes"
|
||||
).create_default_roles_and_membership
|
||||
TEST_DATABASE_URL = importlib.import_module("tests.conftest").TEST_DATABASE_URL
|
||||
|
||||
_EMBEDDING_DIM = app_config.embedding_model_instance.dimension
|
||||
|
||||
|
|
@ -105,6 +117,9 @@ async def db_search_space(db_session: AsyncSession, db_user: User) -> SearchSpac
|
|||
)
|
||||
db_session.add(space)
|
||||
await db_session.flush()
|
||||
# Mirror POST /searchspaces so routes guarded by check_permission find a membership.
|
||||
await create_default_roles_and_membership(db_session, space.id, db_user.id)
|
||||
await db_session.flush()
|
||||
return space
|
||||
|
||||
|
||||
|
|
@ -145,6 +160,10 @@ def patched_chunk_text(monkeypatch) -> MagicMock:
|
|||
"app.indexing_pipeline.indexing_pipeline_service.chunk_text",
|
||||
mock,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"app.indexing_pipeline.indexing_pipeline_service.chunk_text_hybrid",
|
||||
mock,
|
||||
)
|
||||
return mock
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -204,6 +204,7 @@ class TestStripeCheckoutSessionCreation:
|
|||
assert (
|
||||
fake_client.last_params["success_url"]
|
||||
== f"http://localhost:3000/dashboard/{search_space_id}/purchase-success"
|
||||
"?session_id={CHECKOUT_SESSION_ID}"
|
||||
)
|
||||
assert (
|
||||
fake_client.last_params["cancel_url"]
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ mocked at their system boundaries.
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
from unittest.mock import ANY, AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
|
|
@ -25,6 +25,7 @@ pytestmark = pytest.mark.integration
|
|||
|
||||
_COMPOSIO_ACCOUNT_ID = "composio-calendar-test-789"
|
||||
_INDEXER_MODULE = "app.tasks.connector_indexers.google_calendar_indexer"
|
||||
_GET_ACCESS_TOKEN = "app.services.composio_service.ComposioService.get_access_token"
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
|
|
@ -69,32 +70,29 @@ async def native_calendar(async_engine):
|
|||
await cleanup_space(async_engine, data["search_space_id"])
|
||||
|
||||
|
||||
@patch(_GET_ACCESS_TOKEN)
|
||||
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
|
||||
@patch(f"{_INDEXER_MODULE}.GoogleCalendarConnector")
|
||||
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
|
||||
async def test_composio_calendar_uses_composio_credentials(
|
||||
mock_build_creds,
|
||||
@patch(f"{_INDEXER_MODULE}.ComposioService")
|
||||
async def test_composio_calendar_uses_composio_service(
|
||||
mock_composio_service_cls,
|
||||
mock_cal_cls,
|
||||
mock_tl_cls,
|
||||
mock_get_access_token,
|
||||
async_engine,
|
||||
composio_calendar,
|
||||
):
|
||||
"""Calendar indexer calls build_composio_credentials for a Composio connector."""
|
||||
"""Calendar indexer uses Composio tools directly for a Composio connector."""
|
||||
from app.tasks.connector_indexers.google_calendar_indexer import (
|
||||
index_google_calendar_events,
|
||||
)
|
||||
|
||||
data = composio_calendar
|
||||
mock_creds = MagicMock(name="composio-creds")
|
||||
mock_build_creds.return_value = mock_creds
|
||||
mock_composio_service = MagicMock()
|
||||
mock_composio_service.get_calendar_events = AsyncMock(return_value=([], None))
|
||||
mock_composio_service_cls.return_value = mock_composio_service
|
||||
mock_tl_cls.return_value = mock_task_logger()
|
||||
|
||||
mock_cal_instance = MagicMock()
|
||||
mock_cal_instance.get_all_primary_calendar_events = AsyncMock(
|
||||
return_value=([], None)
|
||||
)
|
||||
mock_cal_cls.return_value = mock_cal_instance
|
||||
|
||||
maker = make_session_factory(async_engine)
|
||||
async with maker() as session:
|
||||
await index_google_calendar_events(
|
||||
|
|
@ -104,17 +102,25 @@ async def test_composio_calendar_uses_composio_credentials(
|
|||
user_id=data["user_id"],
|
||||
)
|
||||
|
||||
mock_build_creds.assert_called_once_with(_COMPOSIO_ACCOUNT_ID)
|
||||
mock_cal_cls.assert_called_once()
|
||||
_, kwargs = mock_cal_cls.call_args
|
||||
assert kwargs.get("credentials") is mock_creds
|
||||
mock_composio_service_cls.assert_called_once()
|
||||
mock_composio_service.get_calendar_events.assert_called_once_with(
|
||||
connected_account_id=_COMPOSIO_ACCOUNT_ID,
|
||||
entity_id=f"surfsense_{data['user_id']}",
|
||||
time_min=ANY,
|
||||
time_max=ANY,
|
||||
max_results=250,
|
||||
)
|
||||
mock_cal_cls.assert_not_called()
|
||||
mock_get_access_token.assert_not_called()
|
||||
|
||||
|
||||
@patch(_GET_ACCESS_TOKEN)
|
||||
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
|
||||
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
|
||||
@patch(f"{_INDEXER_MODULE}.ComposioService")
|
||||
async def test_composio_calendar_without_account_id_returns_error(
|
||||
mock_build_creds,
|
||||
mock_composio_service_cls,
|
||||
mock_tl_cls,
|
||||
mock_get_access_token,
|
||||
async_engine,
|
||||
composio_calendar_no_id,
|
||||
):
|
||||
|
|
@ -138,20 +144,23 @@ async def test_composio_calendar_without_account_id_returns_error(
|
|||
assert count == 0
|
||||
assert error is not None
|
||||
assert "composio" in error.lower()
|
||||
mock_build_creds.assert_not_called()
|
||||
mock_composio_service_cls.assert_not_called()
|
||||
mock_get_access_token.assert_not_called()
|
||||
|
||||
|
||||
@patch(_GET_ACCESS_TOKEN)
|
||||
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
|
||||
@patch(f"{_INDEXER_MODULE}.ComposioService")
|
||||
@patch(f"{_INDEXER_MODULE}.GoogleCalendarConnector")
|
||||
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
|
||||
async def test_native_calendar_does_not_use_composio_credentials(
|
||||
mock_build_creds,
|
||||
async def test_native_calendar_uses_google_calendar_connector(
|
||||
mock_cal_cls,
|
||||
mock_composio_service_cls,
|
||||
mock_tl_cls,
|
||||
mock_get_access_token,
|
||||
async_engine,
|
||||
native_calendar,
|
||||
):
|
||||
"""Calendar indexer does NOT call build_composio_credentials for a native connector."""
|
||||
"""Native Calendar connector uses GoogleCalendarConnector with no Composio path."""
|
||||
from app.tasks.connector_indexers.google_calendar_indexer import (
|
||||
index_google_calendar_events,
|
||||
)
|
||||
|
|
@ -174,4 +183,6 @@ async def test_native_calendar_does_not_use_composio_credentials(
|
|||
user_id=data["user_id"],
|
||||
)
|
||||
|
||||
mock_build_creds.assert_not_called()
|
||||
mock_cal_cls.assert_called_once()
|
||||
mock_composio_service_cls.assert_not_called()
|
||||
mock_get_access_token.assert_not_called()
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ mocked at their system boundaries.
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
from unittest.mock import ANY, AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
|
|
@ -25,6 +25,7 @@ pytestmark = pytest.mark.integration
|
|||
|
||||
_COMPOSIO_ACCOUNT_ID = "composio-gmail-test-456"
|
||||
_INDEXER_MODULE = "app.tasks.connector_indexers.google_gmail_indexer"
|
||||
_GET_ACCESS_TOKEN = "app.services.composio_service.ComposioService.get_access_token"
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
|
|
@ -69,30 +70,32 @@ async def native_gmail(async_engine):
|
|||
await cleanup_space(async_engine, data["search_space_id"])
|
||||
|
||||
|
||||
@patch(_GET_ACCESS_TOKEN)
|
||||
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
|
||||
@patch(f"{_INDEXER_MODULE}.GoogleGmailConnector")
|
||||
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
|
||||
async def test_composio_gmail_uses_composio_credentials(
|
||||
mock_build_creds,
|
||||
@patch(f"{_INDEXER_MODULE}.ComposioService")
|
||||
async def test_composio_gmail_uses_composio_service(
|
||||
mock_composio_service_cls,
|
||||
mock_gmail_cls,
|
||||
mock_tl_cls,
|
||||
mock_get_access_token,
|
||||
async_engine,
|
||||
composio_gmail,
|
||||
):
|
||||
"""Gmail indexer calls build_composio_credentials for a Composio connector."""
|
||||
"""Gmail indexer uses Composio tools directly for a Composio connector."""
|
||||
from app.tasks.connector_indexers.google_gmail_indexer import (
|
||||
index_google_gmail_messages,
|
||||
)
|
||||
|
||||
data = composio_gmail
|
||||
mock_creds = MagicMock(name="composio-creds")
|
||||
mock_build_creds.return_value = mock_creds
|
||||
mock_composio_service = MagicMock()
|
||||
mock_composio_service.get_gmail_messages = AsyncMock(
|
||||
return_value=([], None, None, None)
|
||||
)
|
||||
mock_composio_service.get_gmail_message_detail = AsyncMock(return_value=({}, None))
|
||||
mock_composio_service_cls.return_value = mock_composio_service
|
||||
mock_tl_cls.return_value = mock_task_logger()
|
||||
|
||||
mock_gmail_instance = MagicMock()
|
||||
mock_gmail_instance.get_recent_messages = AsyncMock(return_value=([], None))
|
||||
mock_gmail_cls.return_value = mock_gmail_instance
|
||||
|
||||
maker = make_session_factory(async_engine)
|
||||
async with maker() as session:
|
||||
await index_google_gmail_messages(
|
||||
|
|
@ -102,17 +105,25 @@ async def test_composio_gmail_uses_composio_credentials(
|
|||
user_id=data["user_id"],
|
||||
)
|
||||
|
||||
mock_build_creds.assert_called_once_with(_COMPOSIO_ACCOUNT_ID)
|
||||
mock_gmail_cls.assert_called_once()
|
||||
args, _ = mock_gmail_cls.call_args
|
||||
assert args[0] is mock_creds
|
||||
mock_composio_service_cls.assert_called_once()
|
||||
mock_composio_service.get_gmail_messages.assert_called_once_with(
|
||||
connected_account_id=_COMPOSIO_ACCOUNT_ID,
|
||||
entity_id=f"surfsense_{data['user_id']}",
|
||||
query=ANY,
|
||||
max_results=ANY,
|
||||
page_token=None,
|
||||
)
|
||||
mock_gmail_cls.assert_not_called()
|
||||
mock_get_access_token.assert_not_called()
|
||||
|
||||
|
||||
@patch(_GET_ACCESS_TOKEN)
|
||||
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
|
||||
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
|
||||
@patch(f"{_INDEXER_MODULE}.ComposioService")
|
||||
async def test_composio_gmail_without_account_id_returns_error(
|
||||
mock_build_creds,
|
||||
mock_composio_service_cls,
|
||||
mock_tl_cls,
|
||||
mock_get_access_token,
|
||||
async_engine,
|
||||
composio_gmail_no_id,
|
||||
):
|
||||
|
|
@ -136,20 +147,23 @@ async def test_composio_gmail_without_account_id_returns_error(
|
|||
assert count == 0
|
||||
assert error is not None
|
||||
assert "composio" in error.lower()
|
||||
mock_build_creds.assert_not_called()
|
||||
mock_composio_service_cls.assert_not_called()
|
||||
mock_get_access_token.assert_not_called()
|
||||
|
||||
|
||||
@patch(_GET_ACCESS_TOKEN)
|
||||
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
|
||||
@patch(f"{_INDEXER_MODULE}.ComposioService")
|
||||
@patch(f"{_INDEXER_MODULE}.GoogleGmailConnector")
|
||||
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
|
||||
async def test_native_gmail_does_not_use_composio_credentials(
|
||||
mock_build_creds,
|
||||
async def test_native_gmail_uses_google_gmail_connector(
|
||||
mock_gmail_cls,
|
||||
mock_composio_service_cls,
|
||||
mock_tl_cls,
|
||||
mock_get_access_token,
|
||||
async_engine,
|
||||
native_gmail,
|
||||
):
|
||||
"""Gmail indexer does NOT call build_composio_credentials for a native connector."""
|
||||
"""Native Gmail connector uses GoogleGmailConnector with no Composio path."""
|
||||
from app.tasks.connector_indexers.google_gmail_indexer import (
|
||||
index_google_gmail_messages,
|
||||
)
|
||||
|
|
@ -170,4 +184,6 @@ async def test_native_gmail_does_not_use_composio_credentials(
|
|||
user_id=data["user_id"],
|
||||
)
|
||||
|
||||
mock_build_creds.assert_not_called()
|
||||
mock_gmail_cls.assert_called_once()
|
||||
mock_composio_service_cls.assert_not_called()
|
||||
mock_get_access_token.assert_not_called()
|
||||
|
|
|
|||
|
|
@ -200,7 +200,7 @@ async def test_reindex_sets_status_ready(db_session, db_search_space, db_user, m
|
|||
async def test_reindex_replaces_chunks(db_session, db_search_space, db_user, mocker):
|
||||
"""Reindexing replaces old chunks with new content rather than appending."""
|
||||
mocker.patch(
|
||||
"app.indexing_pipeline.indexing_pipeline_service.chunk_text",
|
||||
"app.indexing_pipeline.indexing_pipeline_service.chunk_text_hybrid",
|
||||
side_effect=[["Original chunk."], ["Updated chunk."]],
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -37,7 +37,12 @@ def _make_orm_doc(connector_doc, doc_id):
|
|||
async def test_index_calls_embed_and_chunk_via_to_thread(
|
||||
pipeline, make_connector_document, monkeypatch
|
||||
):
|
||||
"""index() runs embed_texts and the chunker via asyncio.to_thread, not blocking the loop."""
|
||||
"""index() runs the chunker and embed_texts via asyncio.to_thread, not blocking the loop.
|
||||
|
||||
Routing between ``chunk_text`` (code path) and ``chunk_text_hybrid`` (default
|
||||
path, see issue #1334) is verified separately in
|
||||
``test_non_code_documents_use_hybrid_chunker``.
|
||||
"""
|
||||
to_thread_calls = []
|
||||
original_to_thread = asyncio.to_thread
|
||||
|
||||
|
|
@ -51,12 +56,6 @@ async def test_index_calls_embed_and_chunk_via_to_thread(
|
|||
"app.indexing_pipeline.indexing_pipeline_service.summarize_document",
|
||||
AsyncMock(return_value="Summary."),
|
||||
)
|
||||
mock_chunk = MagicMock(return_value=["chunk1"])
|
||||
mock_chunk.__name__ = "chunk_text"
|
||||
monkeypatch.setattr(
|
||||
"app.indexing_pipeline.indexing_pipeline_service.chunk_text",
|
||||
mock_chunk,
|
||||
)
|
||||
mock_chunk_hybrid = MagicMock(return_value=["chunk1"])
|
||||
mock_chunk_hybrid.__name__ = "chunk_text_hybrid"
|
||||
monkeypatch.setattr(
|
||||
|
|
@ -71,6 +70,11 @@ async def test_index_calls_embed_and_chunk_via_to_thread(
|
|||
"app.indexing_pipeline.indexing_pipeline_service.embed_texts",
|
||||
mock_embed,
|
||||
)
|
||||
# Bypass set_committed_value, which requires a real ORM instance (not MagicMock).
|
||||
monkeypatch.setattr(
|
||||
"app.indexing_pipeline.indexing_pipeline_service.attach_chunks_to_document",
|
||||
MagicMock(),
|
||||
)
|
||||
|
||||
connector_doc = make_connector_document(
|
||||
document_type=DocumentType.GOOGLE_GMAIL_CONNECTOR,
|
||||
|
|
@ -83,11 +87,62 @@ async def test_index_calls_embed_and_chunk_via_to_thread(
|
|||
|
||||
await pipeline.index(document, connector_doc, llm=MagicMock())
|
||||
|
||||
# Non-code documents now route through the table-aware hybrid chunker
|
||||
# (see commit 2f3a33c9). Either chunker entry point satisfies the
|
||||
# "chunking runs off the event loop" contract this test guards.
|
||||
# Either chunker entry point satisfies the "chunking runs off the event
|
||||
# loop" contract this test guards. Routing between the two is verified
|
||||
# in test_non_code_documents_use_hybrid_chunker.
|
||||
assert {"chunk_text", "chunk_text_hybrid"} & set(to_thread_calls)
|
||||
assert "embed_texts" in to_thread_calls
|
||||
assert document.status == DocumentStatus.ready()
|
||||
|
||||
|
||||
async def test_non_code_documents_use_hybrid_chunker(
|
||||
pipeline, make_connector_document, monkeypatch
|
||||
):
|
||||
"""Non-code documents route through ``chunk_text_hybrid`` (issue #1334).
|
||||
|
||||
The hybrid chunker preserves Markdown table integrity by avoiding splits
|
||||
mid-row. Only documents flagged with ``should_use_code_chunker=True``
|
||||
should take the ``chunk_text`` path.
|
||||
"""
|
||||
monkeypatch.setattr(
|
||||
"app.indexing_pipeline.indexing_pipeline_service.summarize_document",
|
||||
AsyncMock(return_value="Summary."),
|
||||
)
|
||||
mock_chunk_hybrid = MagicMock(return_value=["chunk1"])
|
||||
mock_chunk_hybrid.__name__ = "chunk_text_hybrid"
|
||||
monkeypatch.setattr(
|
||||
"app.indexing_pipeline.indexing_pipeline_service.chunk_text_hybrid",
|
||||
mock_chunk_hybrid,
|
||||
)
|
||||
mock_chunk_code = MagicMock(return_value=["chunk1"])
|
||||
mock_chunk_code.__name__ = "chunk_text"
|
||||
monkeypatch.setattr(
|
||||
"app.indexing_pipeline.indexing_pipeline_service.chunk_text",
|
||||
mock_chunk_code,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"app.indexing_pipeline.indexing_pipeline_service.embed_texts",
|
||||
MagicMock(side_effect=lambda texts: [[0.1] * _EMBEDDING_DIM for _ in texts]),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"app.indexing_pipeline.indexing_pipeline_service.attach_chunks_to_document",
|
||||
MagicMock(),
|
||||
)
|
||||
|
||||
connector_doc = make_connector_document(
|
||||
document_type=DocumentType.GOOGLE_GMAIL_CONNECTOR,
|
||||
unique_id="msg-1",
|
||||
search_space_id=1,
|
||||
should_use_code_chunker=False,
|
||||
)
|
||||
document = MagicMock(spec=Document)
|
||||
document.id = 1
|
||||
document.status = DocumentStatus.pending()
|
||||
|
||||
await pipeline.index(document, connector_doc, llm=MagicMock())
|
||||
|
||||
mock_chunk_hybrid.assert_called_once()
|
||||
mock_chunk_code.assert_not_called()
|
||||
|
||||
|
||||
def _mock_session_factory(orm_docs_by_id):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue