2026-04-02 11:11:57 +05:30
|
|
|
"""Document versioning: snapshot creation and cleanup.
|
|
|
|
|
|
|
|
|
|
Rules:
|
|
|
|
|
- 30-minute debounce window: if the latest version was created < 30 min ago,
|
|
|
|
|
overwrite it instead of creating a new row.
|
|
|
|
|
- Maximum 20 versions per document.
|
|
|
|
|
- Versions older than 90 days are cleaned up.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
from datetime import UTC, datetime, timedelta
|
|
|
|
|
|
|
|
|
|
from sqlalchemy import delete, func, select
|
|
|
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
|
|
|
|
|
|
from app.db import Document, DocumentVersion
|
|
|
|
|
|
|
|
|
|
MAX_VERSIONS_PER_DOCUMENT = 20
|
|
|
|
|
DEBOUNCE_MINUTES = 30
|
|
|
|
|
RETENTION_DAYS = 90
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _now() -> datetime:
|
|
|
|
|
return datetime.now(UTC)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def create_version_snapshot(
|
|
|
|
|
session: AsyncSession,
|
|
|
|
|
document: Document,
|
|
|
|
|
) -> DocumentVersion | None:
|
|
|
|
|
"""Snapshot the document's current state into a DocumentVersion row.
|
|
|
|
|
|
|
|
|
|
Returns the created/updated DocumentVersion, or None if nothing was done.
|
|
|
|
|
"""
|
|
|
|
|
now = _now()
|
|
|
|
|
|
|
|
|
|
latest = (
|
|
|
|
|
await session.execute(
|
|
|
|
|
select(DocumentVersion)
|
|
|
|
|
.where(DocumentVersion.document_id == document.id)
|
|
|
|
|
.order_by(DocumentVersion.version_number.desc())
|
|
|
|
|
.limit(1)
|
|
|
|
|
)
|
|
|
|
|
).scalar_one_or_none()
|
|
|
|
|
|
|
|
|
|
if latest is not None:
|
|
|
|
|
age = now - latest.created_at.replace(tzinfo=UTC)
|
|
|
|
|
if age < timedelta(minutes=DEBOUNCE_MINUTES):
|
|
|
|
|
latest.source_markdown = document.source_markdown
|
|
|
|
|
latest.content_hash = document.content_hash
|
|
|
|
|
latest.title = document.title
|
|
|
|
|
latest.created_at = now
|
|
|
|
|
await session.flush()
|
|
|
|
|
return latest
|
|
|
|
|
|
|
|
|
|
max_num = (
|
|
|
|
|
await session.execute(
|
|
|
|
|
select(func.coalesce(func.max(DocumentVersion.version_number), 0)).where(
|
|
|
|
|
DocumentVersion.document_id == document.id
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
).scalar_one()
|
|
|
|
|
|
|
|
|
|
version = DocumentVersion(
|
|
|
|
|
document_id=document.id,
|
|
|
|
|
version_number=max_num + 1,
|
|
|
|
|
source_markdown=document.source_markdown,
|
|
|
|
|
content_hash=document.content_hash,
|
|
|
|
|
title=document.title,
|
|
|
|
|
created_at=now,
|
|
|
|
|
)
|
|
|
|
|
session.add(version)
|
|
|
|
|
await session.flush()
|
|
|
|
|
|
|
|
|
|
# Cleanup: remove versions older than 90 days
|
|
|
|
|
cutoff = now - timedelta(days=RETENTION_DAYS)
|
|
|
|
|
await session.execute(
|
|
|
|
|
delete(DocumentVersion).where(
|
|
|
|
|
DocumentVersion.document_id == document.id,
|
|
|
|
|
DocumentVersion.created_at < cutoff,
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Cleanup: cap at MAX_VERSIONS_PER_DOCUMENT
|
|
|
|
|
count = (
|
|
|
|
|
await session.execute(
|
2026-04-03 13:14:40 +05:30
|
|
|
select(func.count())
|
|
|
|
|
.select_from(DocumentVersion)
|
|
|
|
|
.where(DocumentVersion.document_id == document.id)
|
2026-04-02 11:11:57 +05:30
|
|
|
)
|
|
|
|
|
).scalar_one()
|
|
|
|
|
|
|
|
|
|
if count > MAX_VERSIONS_PER_DOCUMENT:
|
|
|
|
|
excess = count - MAX_VERSIONS_PER_DOCUMENT
|
|
|
|
|
oldest_ids_result = await session.execute(
|
|
|
|
|
select(DocumentVersion.id)
|
|
|
|
|
.where(DocumentVersion.document_id == document.id)
|
|
|
|
|
.order_by(DocumentVersion.version_number.asc())
|
|
|
|
|
.limit(excess)
|
|
|
|
|
)
|
|
|
|
|
oldest_ids = [row[0] for row in oldest_ids_result.all()]
|
|
|
|
|
if oldest_ids:
|
|
|
|
|
await session.execute(
|
|
|
|
|
delete(DocumentVersion).where(DocumentVersion.id.in_(oldest_ids))
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
await session.flush()
|
|
|
|
|
return version
|