mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-25 08:46:22 +02:00
feat: add integration and unit tests for local folder indexing and document versioning
This commit is contained in:
parent
96a58d0d30
commit
775dea7894
4 changed files with 894 additions and 0 deletions
184
surfsense_backend/tests/integration/test_document_versioning.py
Normal file
184
surfsense_backend/tests/integration/test_document_versioning.py
Normal file
|
|
@ -0,0 +1,184 @@
|
|||
"""Integration tests for document versioning snapshot + cleanup."""
|
||||
|
||||
from datetime import UTC, datetime, timedelta
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
from sqlalchemy import func, select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.db import Document, DocumentType, DocumentVersion, SearchSpace, User
|
||||
|
||||
pytestmark = pytest.mark.integration
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def db_document(
|
||||
db_session: AsyncSession, db_user: User, db_search_space: SearchSpace
|
||||
) -> Document:
|
||||
doc = Document(
|
||||
title="Test Doc",
|
||||
document_type=DocumentType.LOCAL_FOLDER_FILE,
|
||||
document_metadata={},
|
||||
content="Summary of test doc.",
|
||||
content_hash="abc123",
|
||||
unique_identifier_hash="local_folder:test-folder:test.md",
|
||||
source_markdown="# Test\n\nOriginal content.",
|
||||
search_space_id=db_search_space.id,
|
||||
created_by_id=db_user.id,
|
||||
)
|
||||
db_session.add(doc)
|
||||
await db_session.flush()
|
||||
return doc
|
||||
|
||||
|
||||
async def _version_count(session: AsyncSession, document_id: int) -> int:
|
||||
result = await session.execute(
|
||||
select(func.count()).select_from(DocumentVersion).where(
|
||||
DocumentVersion.document_id == document_id
|
||||
)
|
||||
)
|
||||
return result.scalar_one()
|
||||
|
||||
|
||||
async def _get_versions(session: AsyncSession, document_id: int) -> list[DocumentVersion]:
|
||||
result = await session.execute(
|
||||
select(DocumentVersion)
|
||||
.where(DocumentVersion.document_id == document_id)
|
||||
.order_by(DocumentVersion.version_number)
|
||||
)
|
||||
return list(result.scalars().all())
|
||||
|
||||
|
||||
class TestCreateVersionSnapshot:
|
||||
"""V1-V5: TDD slices for create_version_snapshot."""
|
||||
|
||||
async def test_v1_creates_first_version(self, db_session, db_document):
|
||||
"""V1: First snapshot creates version 1 with the document's current state."""
|
||||
from app.utils.document_versioning import create_version_snapshot
|
||||
|
||||
await create_version_snapshot(db_session, db_document)
|
||||
|
||||
versions = await _get_versions(db_session, db_document.id)
|
||||
assert len(versions) == 1
|
||||
assert versions[0].version_number == 1
|
||||
assert versions[0].source_markdown == "# Test\n\nOriginal content."
|
||||
assert versions[0].content_hash == "abc123"
|
||||
assert versions[0].title == "Test Doc"
|
||||
assert versions[0].document_id == db_document.id
|
||||
|
||||
async def test_v2_creates_version_2_after_30_min(
|
||||
self, db_session, db_document, monkeypatch
|
||||
):
|
||||
"""V2: After 30+ minutes, a new version is created (not overwritten)."""
|
||||
from app.utils.document_versioning import create_version_snapshot
|
||||
|
||||
t0 = datetime(2025, 1, 1, 12, 0, 0, tzinfo=UTC)
|
||||
monkeypatch.setattr(
|
||||
"app.utils.document_versioning._now", lambda: t0
|
||||
)
|
||||
await create_version_snapshot(db_session, db_document)
|
||||
|
||||
# Simulate content change and time passing
|
||||
db_document.source_markdown = "# Test\n\nUpdated content."
|
||||
db_document.content_hash = "def456"
|
||||
t1 = t0 + timedelta(minutes=31)
|
||||
monkeypatch.setattr(
|
||||
"app.utils.document_versioning._now", lambda: t1
|
||||
)
|
||||
await create_version_snapshot(db_session, db_document)
|
||||
|
||||
versions = await _get_versions(db_session, db_document.id)
|
||||
assert len(versions) == 2
|
||||
assert versions[0].version_number == 1
|
||||
assert versions[1].version_number == 2
|
||||
assert versions[1].source_markdown == "# Test\n\nUpdated content."
|
||||
|
||||
async def test_v3_overwrites_within_30_min(
|
||||
self, db_session, db_document, monkeypatch
|
||||
):
|
||||
"""V3: Within 30 minutes, the latest version is overwritten."""
|
||||
from app.utils.document_versioning import create_version_snapshot
|
||||
|
||||
t0 = datetime(2025, 1, 1, 12, 0, 0, tzinfo=UTC)
|
||||
monkeypatch.setattr(
|
||||
"app.utils.document_versioning._now", lambda: t0
|
||||
)
|
||||
await create_version_snapshot(db_session, db_document)
|
||||
count_after_first = await _version_count(db_session, db_document.id)
|
||||
assert count_after_first == 1
|
||||
|
||||
# Simulate quick edit within 30 minutes
|
||||
db_document.source_markdown = "# Test\n\nQuick edit."
|
||||
db_document.content_hash = "quick123"
|
||||
t1 = t0 + timedelta(minutes=10)
|
||||
monkeypatch.setattr(
|
||||
"app.utils.document_versioning._now", lambda: t1
|
||||
)
|
||||
await create_version_snapshot(db_session, db_document)
|
||||
|
||||
count_after_second = await _version_count(db_session, db_document.id)
|
||||
assert count_after_second == 1 # still 1, not 2
|
||||
|
||||
versions = await _get_versions(db_session, db_document.id)
|
||||
assert versions[0].source_markdown == "# Test\n\nQuick edit."
|
||||
assert versions[0].content_hash == "quick123"
|
||||
|
||||
async def test_v4_cleanup_90_day_old_versions(
|
||||
self, db_session, db_document, monkeypatch
|
||||
):
|
||||
"""V4: Versions older than 90 days are cleaned up."""
|
||||
from app.utils.document_versioning import create_version_snapshot
|
||||
|
||||
base = datetime(2025, 1, 1, 12, 0, 0, tzinfo=UTC)
|
||||
|
||||
# Create 5 versions spread across time: 3 older than 90 days, 2 recent
|
||||
for i in range(5):
|
||||
db_document.source_markdown = f"Content v{i+1}"
|
||||
db_document.content_hash = f"hash_{i+1}"
|
||||
if i < 3:
|
||||
t = base + timedelta(days=i) # old
|
||||
else:
|
||||
t = base + timedelta(days=100 + i) # recent
|
||||
monkeypatch.setattr(
|
||||
"app.utils.document_versioning._now", lambda _t=t: _t
|
||||
)
|
||||
await create_version_snapshot(db_session, db_document)
|
||||
|
||||
# Now trigger cleanup from a "current" time that makes the first 3 versions > 90 days old
|
||||
now = base + timedelta(days=200)
|
||||
monkeypatch.setattr(
|
||||
"app.utils.document_versioning._now", lambda: now
|
||||
)
|
||||
db_document.source_markdown = "Content v6"
|
||||
db_document.content_hash = "hash_6"
|
||||
await create_version_snapshot(db_session, db_document)
|
||||
|
||||
versions = await _get_versions(db_session, db_document.id)
|
||||
# The first 3 (old) should be cleaned up; versions 4, 5, 6 remain
|
||||
for v in versions:
|
||||
age = now - v.created_at.replace(tzinfo=UTC)
|
||||
assert age <= timedelta(days=90), f"Version {v.version_number} is too old"
|
||||
|
||||
async def test_v5_cap_at_20_versions(
|
||||
self, db_session, db_document, monkeypatch
|
||||
):
|
||||
"""V5: More than 20 versions triggers cap — oldest gets deleted."""
|
||||
from app.utils.document_versioning import create_version_snapshot
|
||||
|
||||
base = datetime(2025, 6, 1, 12, 0, 0, tzinfo=UTC)
|
||||
|
||||
# Create 21 versions (all within 90 days, each 31 min apart)
|
||||
for i in range(21):
|
||||
db_document.source_markdown = f"Content v{i+1}"
|
||||
db_document.content_hash = f"hash_{i+1}"
|
||||
t = base + timedelta(minutes=31 * i)
|
||||
monkeypatch.setattr(
|
||||
"app.utils.document_versioning._now", lambda _t=t: _t
|
||||
)
|
||||
await create_version_snapshot(db_session, db_document)
|
||||
|
||||
versions = await _get_versions(db_session, db_document.id)
|
||||
assert len(versions) == 20
|
||||
# The lowest version_number should be 2 (version 1 was the oldest and got capped)
|
||||
assert versions[0].version_number == 2
|
||||
Loading…
Add table
Add a link
Reference in a new issue