mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-06 20:15:17 +02:00
refactor(tests): Update tests to remove summary references and adjust for embedding errors
This commit is contained in:
parent
e4d7b01b09
commit
e588782a9b
17 changed files with 69 additions and 148 deletions
|
|
@ -1,7 +1,7 @@
|
||||||
import importlib
|
import importlib
|
||||||
import sys
|
import sys
|
||||||
import uuid
|
import uuid
|
||||||
from unittest.mock import AsyncMock, MagicMock
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import pytest_asyncio
|
import pytest_asyncio
|
||||||
|
|
@ -123,18 +123,6 @@ async def db_search_space(db_session: AsyncSession, db_user: User) -> SearchSpac
|
||||||
return space
|
return space
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def patched_summarize(monkeypatch) -> AsyncMock:
|
|
||||||
mock = AsyncMock(return_value="Mocked summary.")
|
|
||||||
return mock
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def patched_summarize_raises(monkeypatch) -> AsyncMock:
|
|
||||||
mock = AsyncMock(side_effect=RuntimeError("LLM unavailable"))
|
|
||||||
return mock
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def patched_embed_texts(monkeypatch) -> MagicMock:
|
def patched_embed_texts(monkeypatch) -> MagicMock:
|
||||||
mock = MagicMock(side_effect=lambda texts: [[0.1] * _EMBEDDING_DIM for _ in texts])
|
mock = MagicMock(side_effect=lambda texts: [[0.1] * _EMBEDDING_DIM for _ in texts])
|
||||||
|
|
@ -145,6 +133,16 @@ def patched_embed_texts(monkeypatch) -> MagicMock:
|
||||||
return mock
|
return mock
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def patched_embed_texts_raises(monkeypatch) -> MagicMock:
|
||||||
|
mock = MagicMock(side_effect=RuntimeError("Embedding unavailable"))
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"app.indexing_pipeline.indexing_pipeline_service.embed_texts",
|
||||||
|
mock,
|
||||||
|
)
|
||||||
|
return mock
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def patched_chunk_text(monkeypatch) -> MagicMock:
|
def patched_chunk_text(monkeypatch) -> MagicMock:
|
||||||
mock = MagicMock(return_value=["Test chunk content."])
|
mock = MagicMock(return_value=["Test chunk content."])
|
||||||
|
|
|
||||||
|
|
@ -32,8 +32,8 @@ async def test_sets_status_ready(db_session, db_search_space, db_user, mocker):
|
||||||
@pytest.mark.usefixtures(
|
@pytest.mark.usefixtures(
|
||||||
"patched_embed_texts", "patched_chunk_text"
|
"patched_embed_texts", "patched_chunk_text"
|
||||||
)
|
)
|
||||||
async def test_content_is_summary(db_session, db_search_space, db_user, mocker):
|
async def test_content_is_source_markdown(db_session, db_search_space, db_user, mocker):
|
||||||
"""Document content is set to the LLM-generated summary."""
|
"""Document content is set to the extracted source markdown."""
|
||||||
adapter = UploadDocumentAdapter(db_session)
|
adapter = UploadDocumentAdapter(db_session)
|
||||||
await adapter.index(
|
await adapter.index(
|
||||||
markdown_content="## Hello\n\nSome content.",
|
markdown_content="## Hello\n\nSome content.",
|
||||||
|
|
@ -48,7 +48,7 @@ async def test_content_is_summary(db_session, db_search_space, db_user, mocker):
|
||||||
)
|
)
|
||||||
document = result.scalars().first()
|
document = result.scalars().first()
|
||||||
|
|
||||||
assert document.content == "Mocked summary."
|
assert document.content == "## Hello\n\nSome content."
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.usefixtures(
|
@pytest.mark.usefixtures(
|
||||||
|
|
@ -79,9 +79,7 @@ async def test_chunks_written_to_db(db_session, db_search_space, db_user, mocker
|
||||||
assert chunks[0].content == "Test chunk content."
|
assert chunks[0].content == "Test chunk content."
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.usefixtures(
|
@pytest.mark.usefixtures("patched_embed_texts_raises", "patched_chunk_text")
|
||||||
"patched_summarize_raises", "patched_embed_texts", "patched_chunk_text"
|
|
||||||
)
|
|
||||||
async def test_raises_on_indexing_failure(db_session, db_search_space, db_user, mocker):
|
async def test_raises_on_indexing_failure(db_session, db_search_space, db_user, mocker):
|
||||||
"""RuntimeError is raised when the indexing step fails so the caller can fire a failure notification."""
|
"""RuntimeError is raised when the indexing step fails so the caller can fire a failure notification."""
|
||||||
adapter = UploadDocumentAdapter(db_session)
|
adapter = UploadDocumentAdapter(db_session)
|
||||||
|
|
@ -92,7 +90,7 @@ async def test_raises_on_indexing_failure(db_session, db_search_space, db_user,
|
||||||
etl_service="UNSTRUCTURED",
|
etl_service="UNSTRUCTURED",
|
||||||
search_space_id=db_search_space.id,
|
search_space_id=db_search_space.id,
|
||||||
user_id=str(db_user.id),
|
user_id=str(db_user.id),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
@ -104,7 +102,7 @@ async def test_raises_on_indexing_failure(db_session, db_search_space, db_user,
|
||||||
"patched_embed_texts", "patched_chunk_text"
|
"patched_embed_texts", "patched_chunk_text"
|
||||||
)
|
)
|
||||||
async def test_reindex_updates_content(db_session, db_search_space, db_user, mocker):
|
async def test_reindex_updates_content(db_session, db_search_space, db_user, mocker):
|
||||||
"""Document content is updated to the new summary after reindexing."""
|
"""Document content is updated to the new source markdown after reindexing."""
|
||||||
adapter = UploadDocumentAdapter(db_session)
|
adapter = UploadDocumentAdapter(db_session)
|
||||||
await adapter.index(
|
await adapter.index(
|
||||||
markdown_content="## Original\n\nOriginal content.",
|
markdown_content="## Original\n\nOriginal content.",
|
||||||
|
|
@ -125,7 +123,7 @@ async def test_reindex_updates_content(db_session, db_search_space, db_user, moc
|
||||||
await adapter.reindex(document=document)
|
await adapter.reindex(document=document)
|
||||||
|
|
||||||
await db_session.refresh(document)
|
await db_session.refresh(document)
|
||||||
assert document.content == "Mocked summary."
|
assert document.content == "## Edited\n\nNew content after user edit."
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.usefixtures(
|
@pytest.mark.usefixtures(
|
||||||
|
|
@ -256,7 +254,9 @@ async def test_reindex_clears_reindexing_flag(
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.usefixtures("patched_embed_texts", "patched_chunk_text")
|
@pytest.mark.usefixtures("patched_embed_texts", "patched_chunk_text")
|
||||||
async def test_reindex_raises_on_failure(db_session, db_search_space, db_user, mocker):
|
async def test_reindex_raises_on_failure(
|
||||||
|
db_session, db_search_space, db_user, patched_embed_texts, mocker
|
||||||
|
):
|
||||||
"""RuntimeError is raised when reindexing fails so the caller can handle it."""
|
"""RuntimeError is raised when reindexing fails so the caller can handle it."""
|
||||||
|
|
||||||
adapter = UploadDocumentAdapter(db_session)
|
adapter = UploadDocumentAdapter(db_session)
|
||||||
|
|
@ -276,6 +276,8 @@ async def test_reindex_raises_on_failure(db_session, db_search_space, db_user, m
|
||||||
document.source_markdown = "## Edited\n\nNew content after user edit."
|
document.source_markdown = "## Edited\n\nNew content after user edit."
|
||||||
await db_session.flush()
|
await db_session.flush()
|
||||||
|
|
||||||
|
patched_embed_texts.side_effect = RuntimeError("Embedding unavailable")
|
||||||
|
|
||||||
with pytest.raises(RuntimeError, match=r"Embedding failed|Reindexing failed"):
|
with pytest.raises(RuntimeError, match=r"Embedding failed|Reindexing failed"):
|
||||||
await adapter.reindex(document=document)
|
await adapter.reindex(document=document)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -259,16 +259,14 @@ async def test_reindex_replaces_old_chunks(
|
||||||
assert len(chunks) == 1
|
assert len(chunks) == 1
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.usefixtures(
|
@pytest.mark.usefixtures("patched_embed_texts_raises", "patched_chunk_text")
|
||||||
"patched_summarize_raises", "patched_embed_texts", "patched_chunk_text"
|
async def test_embedding_error_sets_status_failed(
|
||||||
)
|
|
||||||
async def test_llm_error_sets_status_failed(
|
|
||||||
db_session,
|
db_session,
|
||||||
db_search_space,
|
db_search_space,
|
||||||
make_connector_document,
|
make_connector_document,
|
||||||
mocker,
|
mocker,
|
||||||
):
|
):
|
||||||
"""Document status is FAILED when the LLM raises during indexing."""
|
"""Document status is FAILED when embedding raises during indexing."""
|
||||||
connector_doc = make_connector_document(search_space_id=db_search_space.id)
|
connector_doc = make_connector_document(search_space_id=db_search_space.id)
|
||||||
service = IndexingPipelineService(session=db_session)
|
service = IndexingPipelineService(session=db_session)
|
||||||
|
|
||||||
|
|
@ -286,10 +284,8 @@ async def test_llm_error_sets_status_failed(
|
||||||
assert DocumentStatus.is_state(reloaded.status, DocumentStatus.FAILED)
|
assert DocumentStatus.is_state(reloaded.status, DocumentStatus.FAILED)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.usefixtures(
|
@pytest.mark.usefixtures("patched_embed_texts_raises", "patched_chunk_text")
|
||||||
"patched_summarize_raises", "patched_embed_texts", "patched_chunk_text"
|
async def test_embedding_error_leaves_no_partial_data(
|
||||||
)
|
|
||||||
async def test_llm_error_leaves_no_partial_data(
|
|
||||||
db_session,
|
db_session,
|
||||||
db_search_space,
|
db_search_space,
|
||||||
make_connector_document,
|
make_connector_document,
|
||||||
|
|
|
||||||
|
|
@ -1271,7 +1271,7 @@ class TestIndexingProgressFlag:
|
||||||
original_index = IndexingPipelineService.index
|
original_index = IndexingPipelineService.index
|
||||||
flag_observed = []
|
flag_observed = []
|
||||||
|
|
||||||
async def patched_index(self_pipe, document, connector_doc, llm):
|
async def patched_index(self_pipe, document, connector_doc):
|
||||||
folder = (
|
folder = (
|
||||||
await db_session.execute(
|
await db_session.execute(
|
||||||
select(Folder).where(
|
select(Folder).where(
|
||||||
|
|
@ -1283,7 +1283,7 @@ class TestIndexingProgressFlag:
|
||||||
if folder:
|
if folder:
|
||||||
meta = folder.folder_metadata or {}
|
meta = folder.folder_metadata or {}
|
||||||
flag_observed.append(meta.get("indexing_in_progress", False))
|
flag_observed.append(meta.get("indexing_in_progress", False))
|
||||||
return await original_index(self_pipe, document, connector_doc, llm)
|
return await original_index(self_pipe, document, connector_doc)
|
||||||
|
|
||||||
IndexingPipelineService.index = patched_index
|
IndexingPipelineService.index = patched_index
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -338,9 +338,7 @@ async def test_same_content_from_different_source_is_skipped(
|
||||||
assert len(result.scalars().all()) == 1
|
assert len(result.scalars().all()) == 1
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.usefixtures(
|
@pytest.mark.usefixtures("patched_embed_texts_raises", "patched_chunk_text")
|
||||||
"patched_summarize_raises", "patched_embed_texts", "patched_chunk_text"
|
|
||||||
)
|
|
||||||
async def test_failed_document_with_unchanged_content_is_requeued(
|
async def test_failed_document_with_unchanged_content_is_requeued(
|
||||||
db_session,
|
db_session,
|
||||||
db_search_space,
|
db_search_space,
|
||||||
|
|
@ -351,7 +349,7 @@ async def test_failed_document_with_unchanged_content_is_requeued(
|
||||||
doc = make_connector_document(search_space_id=db_search_space.id)
|
doc = make_connector_document(search_space_id=db_search_space.id)
|
||||||
service = IndexingPipelineService(session=db_session)
|
service = IndexingPipelineService(session=db_session)
|
||||||
|
|
||||||
# First run: document is created and indexing crashes → status = failed
|
# First run: document is created and indexing crashes, so status becomes failed.
|
||||||
prepared = await service.prepare_for_indexing([doc])
|
prepared = await service.prepare_for_indexing([doc])
|
||||||
document_id = prepared[0].id
|
document_id = prepared[0].id
|
||||||
await service.index(prepared[0], doc)
|
await service.index(prepared[0], doc)
|
||||||
|
|
|
||||||
|
|
@ -87,18 +87,6 @@ async def test_build_connector_doc_produces_correct_fields():
|
||||||
assert doc.metadata["connector_id"] == _CONNECTOR_ID
|
assert doc.metadata["connector_id"] == _CONNECTOR_ID
|
||||||
assert doc.metadata["document_type"] == "Confluence Page"
|
assert doc.metadata["document_type"] == "Confluence Page"
|
||||||
assert doc.metadata["connector_type"] == "Confluence"
|
assert doc.metadata["connector_type"] == "Confluence"
|
||||||
assert "Engineering Handbook" in doc.deterministic_preview
|
|
||||||
assert markdown in doc.deterministic_preview
|
|
||||||
|
|
||||||
|
|
||||||
async def test_build_connector_doc_summary_disabled():
|
|
||||||
doc = _build_connector_doc(
|
|
||||||
_make_page(),
|
|
||||||
_to_markdown(_make_page()),
|
|
||||||
connector_id=_CONNECTOR_ID,
|
|
||||||
search_space_id=_SEARCH_SPACE_ID,
|
|
||||||
user_id=_USER_ID,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
|
||||||
|
|
@ -294,12 +294,6 @@ def full_scan_mocks(mock_drive_client, monkeypatch):
|
||||||
MagicMock(return_value=pipeline_mock),
|
MagicMock(return_value=pipeline_mock),
|
||||||
)
|
)
|
||||||
|
|
||||||
monkeypatch.setattr(
|
|
||||||
_mod,
|
|
||||||
"get_agent_llm",
|
|
||||||
AsyncMock(return_value=MagicMock()),
|
|
||||||
)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"drive_client": mock_drive_client,
|
"drive_client": mock_drive_client,
|
||||||
"session": mock_session,
|
"session": mock_session,
|
||||||
|
|
@ -480,12 +474,6 @@ async def test_delta_sync_removals_serial_rest_parallel(monkeypatch):
|
||||||
"IndexingPipelineService",
|
"IndexingPipelineService",
|
||||||
MagicMock(return_value=pipeline_mock),
|
MagicMock(return_value=pipeline_mock),
|
||||||
)
|
)
|
||||||
monkeypatch.setattr(
|
|
||||||
_mod,
|
|
||||||
"get_agent_llm",
|
|
||||||
AsyncMock(return_value=MagicMock()),
|
|
||||||
)
|
|
||||||
|
|
||||||
mock_session, _ = _make_page_limit_session()
|
mock_session, _ = _make_page_limit_session()
|
||||||
mock_task_logger = MagicMock()
|
mock_task_logger = MagicMock()
|
||||||
mock_task_logger.log_task_progress = AsyncMock()
|
mock_task_logger.log_task_progress = AsyncMock()
|
||||||
|
|
|
||||||
|
|
@ -88,20 +88,6 @@ async def test_build_connector_doc_produces_correct_fields():
|
||||||
assert doc.metadata["connector_id"] == _CONNECTOR_ID
|
assert doc.metadata["connector_id"] == _CONNECTOR_ID
|
||||||
assert doc.metadata["document_type"] == "Linear Issue"
|
assert doc.metadata["document_type"] == "Linear Issue"
|
||||||
assert doc.metadata["connector_type"] == "Linear"
|
assert doc.metadata["connector_type"] == "Linear"
|
||||||
assert "ENG-42" in doc.deterministic_preview
|
|
||||||
assert markdown in doc.deterministic_preview
|
|
||||||
|
|
||||||
|
|
||||||
async def test_build_connector_doc_summary_disabled():
|
|
||||||
"""When enable_vision_llm is False, deterministic_content is False."""
|
|
||||||
doc = _build_connector_doc(
|
|
||||||
_make_issue(),
|
|
||||||
_make_formatted_issue(),
|
|
||||||
"# content",
|
|
||||||
connector_id=_CONNECTOR_ID,
|
|
||||||
search_space_id=_SEARCH_SPACE_ID,
|
|
||||||
user_id=_USER_ID,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
|
||||||
|
|
@ -55,19 +55,6 @@ async def test_build_connector_doc_produces_correct_fields():
|
||||||
assert doc.metadata["connector_id"] == _CONNECTOR_ID
|
assert doc.metadata["connector_id"] == _CONNECTOR_ID
|
||||||
assert doc.metadata["document_type"] == "Notion Page"
|
assert doc.metadata["document_type"] == "Notion Page"
|
||||||
assert doc.metadata["connector_type"] == "Notion"
|
assert doc.metadata["connector_type"] == "Notion"
|
||||||
assert "My Notion Page" in doc.deterministic_preview
|
|
||||||
assert markdown in doc.deterministic_preview
|
|
||||||
|
|
||||||
|
|
||||||
async def test_build_connector_doc_summary_disabled():
|
|
||||||
"""When enable_vision_llm is False, deterministic_content is False."""
|
|
||||||
doc = _build_connector_doc(
|
|
||||||
_make_page(),
|
|
||||||
"# content",
|
|
||||||
connector_id=_CONNECTOR_ID,
|
|
||||||
search_space_id=_SEARCH_SPACE_ID,
|
|
||||||
user_id=_USER_ID,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
|
||||||
|
|
@ -335,10 +335,6 @@ def gdrive_full_scan_mocks(monkeypatch):
|
||||||
monkeypatch.setattr(
|
monkeypatch.setattr(
|
||||||
_mod, "IndexingPipelineService", MagicMock(return_value=pipeline_mock)
|
_mod, "IndexingPipelineService", MagicMock(return_value=pipeline_mock)
|
||||||
)
|
)
|
||||||
monkeypatch.setattr(
|
|
||||||
_mod, "get_agent_llm", AsyncMock(return_value=MagicMock())
|
|
||||||
)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"mod": _mod,
|
"mod": _mod,
|
||||||
"session": session,
|
"session": session,
|
||||||
|
|
@ -452,10 +448,6 @@ async def test_gdrive_delta_sync_skips_over_quota(monkeypatch):
|
||||||
monkeypatch.setattr(
|
monkeypatch.setattr(
|
||||||
_mod, "IndexingPipelineService", MagicMock(return_value=pipeline_mock)
|
_mod, "IndexingPipelineService", MagicMock(return_value=pipeline_mock)
|
||||||
)
|
)
|
||||||
monkeypatch.setattr(
|
|
||||||
_mod, "get_agent_llm", AsyncMock(return_value=MagicMock())
|
|
||||||
)
|
|
||||||
|
|
||||||
mock_task_logger = MagicMock()
|
mock_task_logger = MagicMock()
|
||||||
mock_task_logger.log_task_progress = AsyncMock()
|
mock_task_logger.log_task_progress = AsyncMock()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -69,6 +69,13 @@ def _signed_slack_request(payload: dict, *, secret: str = "signing-secret") -> R
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _enable_slack_gateway(monkeypatch):
|
||||||
|
monkeypatch.setattr(routes.config, "GATEWAY_SLACK_ENABLED", True)
|
||||||
|
monkeypatch.setattr(routes.config, "GATEWAY_SLACK_CLIENT_ID", "client-id")
|
||||||
|
monkeypatch.setattr(routes.config, "GATEWAY_SLACK_CLIENT_SECRET", "client-secret")
|
||||||
|
monkeypatch.setattr(routes.config, "GATEWAY_SLACK_SIGNING_SECRET", "signing-secret")
|
||||||
|
|
||||||
|
|
||||||
async def _call_webhook(*, request: RequestStub, account_id: int, session):
|
async def _call_webhook(*, request: RequestStub, account_id: int, session):
|
||||||
return await routes.telegram_webhook(
|
return await routes.telegram_webhook(
|
||||||
request=request,
|
request=request,
|
||||||
|
|
@ -207,7 +214,7 @@ def test_verify_slack_signature_accepts_valid_signature():
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_slack_webhook_url_verification(monkeypatch, mocker):
|
async def test_slack_webhook_url_verification(monkeypatch, mocker):
|
||||||
monkeypatch.setattr(routes.config, "GATEWAY_SLACK_SIGNING_SECRET", "signing-secret")
|
_enable_slack_gateway(monkeypatch)
|
||||||
request = _signed_slack_request({"type": "url_verification", "challenge": "abc123"})
|
request = _signed_slack_request({"type": "url_verification", "challenge": "abc123"})
|
||||||
|
|
||||||
response = await routes.slack_webhook(request=request, session=mocker.AsyncMock())
|
response = await routes.slack_webhook(request=request, session=mocker.AsyncMock())
|
||||||
|
|
@ -218,7 +225,7 @@ async def test_slack_webhook_url_verification(monkeypatch, mocker):
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_slack_webhook_persists_event(monkeypatch, mocker):
|
async def test_slack_webhook_persists_event(monkeypatch, mocker):
|
||||||
monkeypatch.setattr(routes.config, "GATEWAY_SLACK_SIGNING_SECRET", "signing-secret")
|
_enable_slack_gateway(monkeypatch)
|
||||||
session = mocker.AsyncMock()
|
session = mocker.AsyncMock()
|
||||||
monkeypatch.setattr(routes, "get_slack_account_by_team", mocker.AsyncMock(return_value=_slack_account()))
|
monkeypatch.setattr(routes, "get_slack_account_by_team", mocker.AsyncMock(return_value=_slack_account()))
|
||||||
persist = mocker.AsyncMock(return_value=100)
|
persist = mocker.AsyncMock(return_value=100)
|
||||||
|
|
@ -248,7 +255,7 @@ async def test_slack_webhook_persists_event(monkeypatch, mocker):
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_slack_webhook_ignores_self_event(monkeypatch, mocker):
|
async def test_slack_webhook_ignores_self_event(monkeypatch, mocker):
|
||||||
monkeypatch.setattr(routes.config, "GATEWAY_SLACK_SIGNING_SECRET", "signing-secret")
|
_enable_slack_gateway(monkeypatch)
|
||||||
session = mocker.AsyncMock()
|
session = mocker.AsyncMock()
|
||||||
monkeypatch.setattr(routes, "get_slack_account_by_team", mocker.AsyncMock(return_value=_slack_account()))
|
monkeypatch.setattr(routes, "get_slack_account_by_team", mocker.AsyncMock(return_value=_slack_account()))
|
||||||
persist = mocker.AsyncMock(return_value=100)
|
persist = mocker.AsyncMock(return_value=100)
|
||||||
|
|
@ -275,7 +282,7 @@ async def test_slack_webhook_ignores_self_event(monkeypatch, mocker):
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_discord_gateway_install_returns_oauth_url(monkeypatch):
|
async def test_discord_gateway_install_returns_oauth_url(monkeypatch, mocker):
|
||||||
monkeypatch.setattr(routes.config, "DISCORD_CLIENT_ID", "discord-client")
|
monkeypatch.setattr(routes.config, "DISCORD_CLIENT_ID", "discord-client")
|
||||||
monkeypatch.setattr(
|
monkeypatch.setattr(
|
||||||
routes.config,
|
routes.config,
|
||||||
|
|
@ -283,10 +290,12 @@ async def test_discord_gateway_install_returns_oauth_url(monkeypatch):
|
||||||
"http://localhost:8000/api/v1/gateway/discord/callback",
|
"http://localhost:8000/api/v1/gateway/discord/callback",
|
||||||
)
|
)
|
||||||
monkeypatch.setattr(routes.config, "SECRET_KEY", "test-secret")
|
monkeypatch.setattr(routes.config, "SECRET_KEY", "test-secret")
|
||||||
|
monkeypatch.setattr(routes, "check_search_space_access", mocker.AsyncMock())
|
||||||
|
|
||||||
response = await routes.install_discord_gateway(
|
response = await routes.install_discord_gateway(
|
||||||
search_space_id=123,
|
search_space_id=123,
|
||||||
user=SimpleNamespace(id="00000000-0000-0000-0000-000000000001"),
|
user=SimpleNamespace(id="00000000-0000-0000-0000-000000000001"),
|
||||||
|
session=mocker.AsyncMock(),
|
||||||
)
|
)
|
||||||
|
|
||||||
assert response["auth_url"].startswith("https://discord.com/api/oauth2/authorize?")
|
assert response["auth_url"].startswith("https://discord.com/api/oauth2/authorize?")
|
||||||
|
|
|
||||||
|
|
@ -37,12 +37,10 @@ async def test_calls_prepare_then_index_per_document(pipeline, make_connector_do
|
||||||
orm2 = MagicMock(spec=Document)
|
orm2 = MagicMock(spec=Document)
|
||||||
orm2.unique_identifier_hash = compute_unique_identifier_hash(doc2)
|
orm2.unique_identifier_hash = compute_unique_identifier_hash(doc2)
|
||||||
|
|
||||||
mock_llm = MagicMock()
|
|
||||||
|
|
||||||
pipeline.prepare_for_indexing = AsyncMock(return_value=[orm1, orm2])
|
pipeline.prepare_for_indexing = AsyncMock(return_value=[orm1, orm2])
|
||||||
pipeline.index = AsyncMock(side_effect=lambda doc, cdoc, llm: doc)
|
pipeline.index = AsyncMock(side_effect=lambda doc, cdoc: doc)
|
||||||
|
|
||||||
results = await pipeline.index_batch([doc1, doc2], mock_llm)
|
results = await pipeline.index_batch([doc1, doc2])
|
||||||
|
|
||||||
pipeline.prepare_for_indexing.assert_awaited_once_with([doc1, doc2])
|
pipeline.prepare_for_indexing.assert_awaited_once_with([doc1, doc2])
|
||||||
assert pipeline.index.await_count == 2
|
assert pipeline.index.await_count == 2
|
||||||
|
|
@ -53,7 +51,7 @@ async def test_empty_input_returns_empty(pipeline):
|
||||||
"""Empty connector_docs list returns empty result."""
|
"""Empty connector_docs list returns empty result."""
|
||||||
pipeline.prepare_for_indexing = AsyncMock(return_value=[])
|
pipeline.prepare_for_indexing = AsyncMock(return_value=[])
|
||||||
|
|
||||||
results = await pipeline.index_batch([], MagicMock())
|
results = await pipeline.index_batch([])
|
||||||
|
|
||||||
assert results == []
|
assert results == []
|
||||||
|
|
||||||
|
|
@ -74,7 +72,7 @@ async def test_skips_document_without_matching_connector_doc(
|
||||||
pipeline.prepare_for_indexing = AsyncMock(return_value=[orphan_orm])
|
pipeline.prepare_for_indexing = AsyncMock(return_value=[orphan_orm])
|
||||||
pipeline.index = AsyncMock()
|
pipeline.index = AsyncMock()
|
||||||
|
|
||||||
results = await pipeline.index_batch([doc1], MagicMock())
|
results = await pipeline.index_batch([doc1])
|
||||||
|
|
||||||
pipeline.index.assert_not_awaited()
|
pipeline.index.assert_not_awaited()
|
||||||
assert results == []
|
assert results == []
|
||||||
|
|
|
||||||
|
|
@ -183,19 +183,14 @@ async def test_batch_parallel_indexes_all_documents(
|
||||||
|
|
||||||
index_calls = []
|
index_calls = []
|
||||||
|
|
||||||
async def fake_index(self, document, connector_doc, llm):
|
async def fake_index(self, document, connector_doc):
|
||||||
index_calls.append(document.id)
|
index_calls.append(document.id)
|
||||||
document.status = DocumentStatus.ready()
|
document.status = DocumentStatus.ready()
|
||||||
return document
|
return document
|
||||||
|
|
||||||
monkeypatch.setattr(IndexingPipelineService, "index", fake_index)
|
monkeypatch.setattr(IndexingPipelineService, "index", fake_index)
|
||||||
|
|
||||||
async def mock_get_llm(session):
|
_, indexed, failed = await pipeline.index_batch_parallel(docs, max_concurrency=2)
|
||||||
return MagicMock()
|
|
||||||
|
|
||||||
_, indexed, failed = await pipeline.index_batch_parallel(
|
|
||||||
docs, mock_get_llm, max_concurrency=2
|
|
||||||
)
|
|
||||||
|
|
||||||
assert indexed == 3
|
assert indexed == 3
|
||||||
assert failed == 0
|
assert failed == 0
|
||||||
|
|
@ -224,20 +219,15 @@ async def test_batch_parallel_one_failure_does_not_affect_others(
|
||||||
_mock_session_factory(orm_by_id),
|
_mock_session_factory(orm_by_id),
|
||||||
)
|
)
|
||||||
|
|
||||||
async def failing_index(self, document, connector_doc, llm):
|
async def failing_index(self, document, connector_doc):
|
||||||
if document.id == 2:
|
if document.id == 2:
|
||||||
raise RuntimeError("LLM exploded")
|
raise RuntimeError("Indexing exploded")
|
||||||
document.status = DocumentStatus.ready()
|
document.status = DocumentStatus.ready()
|
||||||
return document
|
return document
|
||||||
|
|
||||||
monkeypatch.setattr(IndexingPipelineService, "index", failing_index)
|
monkeypatch.setattr(IndexingPipelineService, "index", failing_index)
|
||||||
|
|
||||||
async def mock_get_llm(session):
|
_, indexed, failed = await pipeline.index_batch_parallel(docs, max_concurrency=4)
|
||||||
return MagicMock()
|
|
||||||
|
|
||||||
_, indexed, failed = await pipeline.index_batch_parallel(
|
|
||||||
docs, mock_get_llm, max_concurrency=4
|
|
||||||
)
|
|
||||||
|
|
||||||
assert indexed == 2
|
assert indexed == 2
|
||||||
assert failed == 1
|
assert failed == 1
|
||||||
|
|
|
||||||
|
|
@ -137,15 +137,14 @@ Notes:
|
||||||
- `--skip-unanswerable` (run) — drop unanswerable questions
|
- `--skip-unanswerable` (run) — drop unanswerable questions
|
||||||
- `--docs <a.pdf>,<b.pdf>` (run) — scope to specific docs
|
- `--docs <a.pdf>,<b.pdf>` (run) — scope to specific docs
|
||||||
|
|
||||||
## Ingestion knobs (vision LLM, processing mode, summarize)
|
## Ingestion knobs (vision LLM, processing mode)
|
||||||
|
|
||||||
The harness exposes `POST /api/v1/documents/fileupload`'s three knobs on every `ingest` subcommand:
|
The harness exposes `POST /api/v1/documents/fileupload`'s ingest knobs on every `ingest` subcommand:
|
||||||
|
|
||||||
| Flag pair | Effect |
|
| Flag pair | Effect |
|
||||||
|--------------------------------------------|-----------------------------------------------------------------------------------------|
|
|--------------------------------------------|-----------------------------------------------------------------------------------------|
|
||||||
| `--use-vision-llm` / `--no-vision-llm` | Walk every embedded image in the PDF and inline image-derived text at the image's position (see below). |
|
| `--use-vision-llm` / `--no-vision-llm` | Walk every embedded image in the PDF and inline image-derived text at the image's position (see below). |
|
||||||
| `--processing-mode {basic,premium}` | `premium` carries a 10× page multiplier and routes to a stronger ETL (e.g. LlamaCloud). |
|
| `--processing-mode {basic,premium}` | `premium` carries a 10× page multiplier and routes to a stronger ETL (e.g. LlamaCloud). |
|
||||||
| `--should-summarize` / `--no-summarize` | Generate a per-document summary at ingest. |
|
|
||||||
|
|
||||||
The "Default ingest" column in the benchmarks table is what runs if you don't pass any flag. Whatever was actually used is recorded as a `__settings__` header in the doc map (`data/<suite>/maps/<benchmark>_*_map.jsonl`) and as `extra.ingest_settings` in `run_artifact.json`, then surfaced in the report — no need to hunt through CLI history.
|
The "Default ingest" column in the benchmarks table is what runs if you don't pass any flag. Whatever was actually used is recorded as a `__settings__` header in the doc map (`data/<suite>/maps/<benchmark>_*_map.jsonl`) and as `extra.ingest_settings` in `run_artifact.json`, then surfaced in the report — no need to hunt through CLI history.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -173,14 +173,14 @@ def add_ingest_settings_args(
|
||||||
*,
|
*,
|
||||||
defaults: IngestSettings,
|
defaults: IngestSettings,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Attach the three ingest-settings flag pairs to ``parser``.
|
"""Attach ingest-settings flags to ``parser``.
|
||||||
|
|
||||||
Each bool exposes a mutually exclusive ``--foo`` / ``--no-foo``
|
The vision bool exposes a mutually exclusive ``--foo`` / ``--no-foo``
|
||||||
pair so an operator can flip either direction without restating
|
pair so an operator can flip either direction without restating every
|
||||||
every flag. Default is ``None`` so that "operator didn't pass the
|
flag. Default is ``None`` so that "operator didn't pass the flag" is
|
||||||
flag" is distinguishable from "operator explicitly passed false"
|
distinguishable from "operator explicitly passed false" —
|
||||||
— ``IngestSettings.merge`` then folds in the benchmark default
|
``IngestSettings.merge`` then folds in the benchmark default only when
|
||||||
only when the operator was silent.
|
the operator was silent.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
settings_group = parser.add_argument_group(
|
settings_group = parser.add_argument_group(
|
||||||
|
|
@ -276,7 +276,7 @@ def format_ingest_settings_md(settings: Any) -> str:
|
||||||
mode = settings.get("processing_mode") or "basic"
|
mode = settings.get("processing_mode") or "basic"
|
||||||
return (
|
return (
|
||||||
f"- SurfSense ingest settings: vision_llm=`{vision}`, "
|
f"- SurfSense ingest settings: vision_llm=`{vision}`, "
|
||||||
f"processing_mode=`{mode}`, summarize=`{summarize}`"
|
f"processing_mode=`{mode}`"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@ Covers:
|
||||||
|
|
||||||
* ``IngestSettings.merge`` honours operator overrides and falls back
|
* ``IngestSettings.merge`` honours operator overrides and falls back
|
||||||
to per-benchmark defaults when the operator is silent.
|
to per-benchmark defaults when the operator is silent.
|
||||||
* ``add_ingest_settings_args`` exposes the three flag pairs and
|
* ``add_ingest_settings_args`` exposes ingest settings flags and
|
||||||
argparse defaults of ``None`` correctly distinguish "not passed"
|
argparse defaults of ``None`` correctly distinguish "not passed"
|
||||||
from "explicitly false".
|
from "explicitly false".
|
||||||
* ``settings_header_line`` / ``read_settings_header`` round-trip
|
* ``settings_header_line`` / ``read_settings_header`` round-trip
|
||||||
|
|
@ -116,12 +116,11 @@ class TestMerge:
|
||||||
assert d == {
|
assert d == {
|
||||||
"use_vision_llm": True,
|
"use_vision_llm": True,
|
||||||
"processing_mode": "premium",
|
"processing_mode": "premium",
|
||||||
"use_vision_llm": False,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def test_render_label_format(self) -> None:
|
def test_render_label_format(self) -> None:
|
||||||
s = IngestSettings(use_vision_llm=True, processing_mode="premium")
|
s = IngestSettings(use_vision_llm=True, processing_mode="premium")
|
||||||
assert s.render_label() == "vision=on, mode=premium, summarize=on"
|
assert s.render_label() == "vision=on, mode=premium"
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
@ -145,7 +144,6 @@ class TestAddArgs:
|
||||||
args = parser.parse_args([])
|
args = parser.parse_args([])
|
||||||
assert args.use_vision_llm is None
|
assert args.use_vision_llm is None
|
||||||
assert args.processing_mode is None
|
assert args.processing_mode is None
|
||||||
assert args.use_vision_llm is None
|
|
||||||
|
|
||||||
def test_use_vision_llm_flag(self, parser: argparse.ArgumentParser) -> None:
|
def test_use_vision_llm_flag(self, parser: argparse.ArgumentParser) -> None:
|
||||||
args = parser.parse_args(["--use-vision-llm"])
|
args = parser.parse_args(["--use-vision-llm"])
|
||||||
|
|
@ -166,12 +164,6 @@ class TestAddArgs:
|
||||||
with pytest.raises(SystemExit):
|
with pytest.raises(SystemExit):
|
||||||
parser.parse_args(["--processing-mode", "exotic"])
|
parser.parse_args(["--processing-mode", "exotic"])
|
||||||
|
|
||||||
def test_summarize_flag_pair(self, parser: argparse.ArgumentParser) -> None:
|
|
||||||
on = parser.parse_args(["--should-summarize"])
|
|
||||||
assert on.use_vision_llm is True
|
|
||||||
off = parser.parse_args(["--no-summarize"])
|
|
||||||
assert off.use_vision_llm is False
|
|
||||||
|
|
||||||
def test_vision_flags_mutually_exclusive(
|
def test_vision_flags_mutually_exclusive(
|
||||||
self, parser: argparse.ArgumentParser
|
self, parser: argparse.ArgumentParser
|
||||||
) -> None:
|
) -> None:
|
||||||
|
|
@ -249,19 +241,17 @@ class TestHeader:
|
||||||
class TestFormatMd:
|
class TestFormatMd:
|
||||||
def test_full_settings(self) -> None:
|
def test_full_settings(self) -> None:
|
||||||
out = format_ingest_settings_md(
|
out = format_ingest_settings_md(
|
||||||
{"use_vision_llm": True, "processing_mode": "premium", "use_vision_llm": True}
|
{"use_vision_llm": True, "processing_mode": "premium"}
|
||||||
)
|
)
|
||||||
assert "vision_llm=`on`" in out
|
assert "vision_llm=`on`" in out
|
||||||
assert "processing_mode=`premium`" in out
|
assert "processing_mode=`premium`" in out
|
||||||
assert "summarize=`on`" in out
|
|
||||||
|
|
||||||
def test_default_off(self) -> None:
|
def test_default_off(self) -> None:
|
||||||
out = format_ingest_settings_md(
|
out = format_ingest_settings_md(
|
||||||
{"use_vision_llm": False, "processing_mode": "basic", "use_vision_llm": False}
|
{"use_vision_llm": False, "processing_mode": "basic"}
|
||||||
)
|
)
|
||||||
assert "vision_llm=`off`" in out
|
assert "vision_llm=`off`" in out
|
||||||
assert "processing_mode=`basic`" in out
|
assert "processing_mode=`basic`" in out
|
||||||
assert "summarize=`off`" in out
|
|
||||||
|
|
||||||
def test_missing_returns_re_ingest_hint(self) -> None:
|
def test_missing_returns_re_ingest_hint(self) -> None:
|
||||||
# Empty dict + None + non-mapping should all degrade gracefully.
|
# Empty dict + None + non-mapping should all degrade gracefully.
|
||||||
|
|
|
||||||
|
|
@ -228,7 +228,7 @@ export function AgentModelManager({ searchSpaceId }: AgentModelManagerProps) {
|
||||||
<h3 className="text-sm md:text-base font-semibold mb-2">No Models Yet</h3>
|
<h3 className="text-sm md:text-base font-semibold mb-2">No Models Yet</h3>
|
||||||
<p className="text-[11px] md:text-xs text-muted-foreground max-w-sm mb-4">
|
<p className="text-[11px] md:text-xs text-muted-foreground max-w-sm mb-4">
|
||||||
{canCreate
|
{canCreate
|
||||||
? "Add your first model to power document summarization, chat, and other agent capabilities"
|
? "Add your first model to power chat, reports, and other agent capabilities"
|
||||||
: "No models have been added to this space yet. Contact a space owner to add one"}
|
: "No models have been added to this space yet. Contact a space owner to add one"}
|
||||||
</p>
|
</p>
|
||||||
</CardContent>
|
</CardContent>
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue