feat: implement and test index happy path

This commit is contained in:
CREDO23 2026-02-25 00:30:11 +02:00
parent 579a9e2cb5
commit 497ed681d5
3 changed files with 100 additions and 5 deletions

View file

@ -12,7 +12,7 @@ from sqlalchemy.pool import NullPool
from app.db import Base, SearchSpace
from app.db import User
_EMBEDDING_DIM = 4 # keep vectors tiny; real model uses 768+
_EMBEDDING_DIM = 1024 # must match the Vector() dimension used in DB column creation
_DEFAULT_TEST_DB = "postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense_test"
TEST_DATABASE_URL = os.environ.get("TEST_DATABASE_URL", _DEFAULT_TEST_DB)
@ -96,9 +96,33 @@ def mock_llm() -> AsyncMock:
@pytest.fixture
def mock_embedding_model() -> MagicMock:
model = MagicMock()
model.embed = MagicMock(
side_effect=lambda texts: [[0.1] * _EMBEDDING_DIM for _ in texts]
def patched_generate_summary(monkeypatch) -> AsyncMock:
mock = AsyncMock(return_value=("Mocked summary.", [0.1] * _EMBEDDING_DIM))
monkeypatch.setattr(
"app.indexing_pipeline.indexing_pipeline_service.generate_document_summary",
mock,
)
return mock
@pytest.fixture
def patched_create_chunks(monkeypatch) -> MagicMock:
from app.db import Chunk
chunk = Chunk(content="Test chunk content.", embedding=[0.1] * _EMBEDDING_DIM)
mock = AsyncMock(return_value=[chunk])
monkeypatch.setattr(
"app.indexing_pipeline.indexing_pipeline_service.create_document_chunks",
mock,
)
return mock
@pytest.fixture
def patched_embedding_model(monkeypatch) -> MagicMock:
from app.config import config
model = MagicMock()
model.embed = MagicMock(return_value=[0.1] * _EMBEDDING_DIM)
monkeypatch.setattr(config, "embedding_model_instance", model)
return model

View file

@ -0,0 +1,26 @@
import pytest
from sqlalchemy import select
from app.db import Document, DocumentStatus
from app.indexing_pipeline.indexing_pipeline_service import IndexingPipelineService
pytestmark = pytest.mark.integration
async def test_sets_status_ready(
db_session, db_search_space, make_connector_document,
mock_llm, patched_generate_summary, patched_create_chunks,
):
connector_doc = make_connector_document(search_space_id=db_search_space.id)
service = IndexingPipelineService(session=db_session)
prepared = await service.prepare_for_indexing([connector_doc])
document = prepared[0]
document_id = document.id
await service.index(document, connector_doc, mock_llm)
result = await db_session.execute(select(Document).filter(Document.id == document_id))
reloaded = result.scalars().first()
assert DocumentStatus.is_state(reloaded.status, DocumentStatus.READY)