feat: add integration and unit tests for local folder indexing and document versioning

This commit is contained in:
Anish Sarkar 2026-04-02 11:12:16 +05:30
parent 96a58d0d30
commit 775dea7894
4 changed files with 894 additions and 0 deletions

View file

@ -0,0 +1,184 @@
"""Integration tests for document versioning snapshot + cleanup."""
from datetime import UTC, datetime, timedelta
import pytest
import pytest_asyncio
from sqlalchemy import func, select
from sqlalchemy.ext.asyncio import AsyncSession
from app.db import Document, DocumentType, DocumentVersion, SearchSpace, User
pytestmark = pytest.mark.integration
@pytest_asyncio.fixture
async def db_document(
db_session: AsyncSession, db_user: User, db_search_space: SearchSpace
) -> Document:
doc = Document(
title="Test Doc",
document_type=DocumentType.LOCAL_FOLDER_FILE,
document_metadata={},
content="Summary of test doc.",
content_hash="abc123",
unique_identifier_hash="local_folder:test-folder:test.md",
source_markdown="# Test\n\nOriginal content.",
search_space_id=db_search_space.id,
created_by_id=db_user.id,
)
db_session.add(doc)
await db_session.flush()
return doc
async def _version_count(session: AsyncSession, document_id: int) -> int:
result = await session.execute(
select(func.count()).select_from(DocumentVersion).where(
DocumentVersion.document_id == document_id
)
)
return result.scalar_one()
async def _get_versions(session: AsyncSession, document_id: int) -> list[DocumentVersion]:
result = await session.execute(
select(DocumentVersion)
.where(DocumentVersion.document_id == document_id)
.order_by(DocumentVersion.version_number)
)
return list(result.scalars().all())
class TestCreateVersionSnapshot:
"""V1-V5: TDD slices for create_version_snapshot."""
async def test_v1_creates_first_version(self, db_session, db_document):
"""V1: First snapshot creates version 1 with the document's current state."""
from app.utils.document_versioning import create_version_snapshot
await create_version_snapshot(db_session, db_document)
versions = await _get_versions(db_session, db_document.id)
assert len(versions) == 1
assert versions[0].version_number == 1
assert versions[0].source_markdown == "# Test\n\nOriginal content."
assert versions[0].content_hash == "abc123"
assert versions[0].title == "Test Doc"
assert versions[0].document_id == db_document.id
async def test_v2_creates_version_2_after_30_min(
self, db_session, db_document, monkeypatch
):
"""V2: After 30+ minutes, a new version is created (not overwritten)."""
from app.utils.document_versioning import create_version_snapshot
t0 = datetime(2025, 1, 1, 12, 0, 0, tzinfo=UTC)
monkeypatch.setattr(
"app.utils.document_versioning._now", lambda: t0
)
await create_version_snapshot(db_session, db_document)
# Simulate content change and time passing
db_document.source_markdown = "# Test\n\nUpdated content."
db_document.content_hash = "def456"
t1 = t0 + timedelta(minutes=31)
monkeypatch.setattr(
"app.utils.document_versioning._now", lambda: t1
)
await create_version_snapshot(db_session, db_document)
versions = await _get_versions(db_session, db_document.id)
assert len(versions) == 2
assert versions[0].version_number == 1
assert versions[1].version_number == 2
assert versions[1].source_markdown == "# Test\n\nUpdated content."
async def test_v3_overwrites_within_30_min(
self, db_session, db_document, monkeypatch
):
"""V3: Within 30 minutes, the latest version is overwritten."""
from app.utils.document_versioning import create_version_snapshot
t0 = datetime(2025, 1, 1, 12, 0, 0, tzinfo=UTC)
monkeypatch.setattr(
"app.utils.document_versioning._now", lambda: t0
)
await create_version_snapshot(db_session, db_document)
count_after_first = await _version_count(db_session, db_document.id)
assert count_after_first == 1
# Simulate quick edit within 30 minutes
db_document.source_markdown = "# Test\n\nQuick edit."
db_document.content_hash = "quick123"
t1 = t0 + timedelta(minutes=10)
monkeypatch.setattr(
"app.utils.document_versioning._now", lambda: t1
)
await create_version_snapshot(db_session, db_document)
count_after_second = await _version_count(db_session, db_document.id)
assert count_after_second == 1 # still 1, not 2
versions = await _get_versions(db_session, db_document.id)
assert versions[0].source_markdown == "# Test\n\nQuick edit."
assert versions[0].content_hash == "quick123"
async def test_v4_cleanup_90_day_old_versions(
self, db_session, db_document, monkeypatch
):
"""V4: Versions older than 90 days are cleaned up."""
from app.utils.document_versioning import create_version_snapshot
base = datetime(2025, 1, 1, 12, 0, 0, tzinfo=UTC)
# Create 5 versions spread across time: 3 older than 90 days, 2 recent
for i in range(5):
db_document.source_markdown = f"Content v{i+1}"
db_document.content_hash = f"hash_{i+1}"
if i < 3:
t = base + timedelta(days=i) # old
else:
t = base + timedelta(days=100 + i) # recent
monkeypatch.setattr(
"app.utils.document_versioning._now", lambda _t=t: _t
)
await create_version_snapshot(db_session, db_document)
# Now trigger cleanup from a "current" time that makes the first 3 versions > 90 days old
now = base + timedelta(days=200)
monkeypatch.setattr(
"app.utils.document_versioning._now", lambda: now
)
db_document.source_markdown = "Content v6"
db_document.content_hash = "hash_6"
await create_version_snapshot(db_session, db_document)
versions = await _get_versions(db_session, db_document.id)
# The first 3 (old) should be cleaned up; versions 4, 5, 6 remain
for v in versions:
age = now - v.created_at.replace(tzinfo=UTC)
assert age <= timedelta(days=90), f"Version {v.version_number} is too old"
async def test_v5_cap_at_20_versions(
self, db_session, db_document, monkeypatch
):
"""V5: More than 20 versions triggers cap — oldest gets deleted."""
from app.utils.document_versioning import create_version_snapshot
base = datetime(2025, 6, 1, 12, 0, 0, tzinfo=UTC)
# Create 21 versions (all within 90 days, each 31 min apart)
for i in range(21):
db_document.source_markdown = f"Content v{i+1}"
db_document.content_hash = f"hash_{i+1}"
t = base + timedelta(minutes=31 * i)
monkeypatch.setattr(
"app.utils.document_versioning._now", lambda _t=t: _t
)
await create_version_snapshot(db_session, db_document)
versions = await _get_versions(db_session, db_document.id)
assert len(versions) == 20
# The lowest version_number should be 2 (version 1 was the oldest and got capped)
assert versions[0].version_number == 2