mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-26 01:06:23 +02:00
feat: implement local folder indexing and document versioning capabilities
This commit is contained in:
parent
fe6c2e9b52
commit
96a58d0d30
4 changed files with 1127 additions and 1 deletions
107
surfsense_backend/app/utils/document_versioning.py
Normal file
107
surfsense_backend/app/utils/document_versioning.py
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
"""Document versioning: snapshot creation and cleanup.
|
||||
|
||||
Rules:
|
||||
- 30-minute debounce window: if the latest version was created < 30 min ago,
|
||||
overwrite it instead of creating a new row.
|
||||
- Maximum 20 versions per document.
|
||||
- Versions older than 90 days are cleaned up.
|
||||
"""
|
||||
|
||||
from datetime import UTC, datetime, timedelta
|
||||
|
||||
from sqlalchemy import delete, func, select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.db import Document, DocumentVersion
|
||||
|
||||
MAX_VERSIONS_PER_DOCUMENT = 20
|
||||
DEBOUNCE_MINUTES = 30
|
||||
RETENTION_DAYS = 90
|
||||
|
||||
|
||||
def _now() -> datetime:
|
||||
return datetime.now(UTC)
|
||||
|
||||
|
||||
async def create_version_snapshot(
|
||||
session: AsyncSession,
|
||||
document: Document,
|
||||
) -> DocumentVersion | None:
|
||||
"""Snapshot the document's current state into a DocumentVersion row.
|
||||
|
||||
Returns the created/updated DocumentVersion, or None if nothing was done.
|
||||
"""
|
||||
now = _now()
|
||||
|
||||
latest = (
|
||||
await session.execute(
|
||||
select(DocumentVersion)
|
||||
.where(DocumentVersion.document_id == document.id)
|
||||
.order_by(DocumentVersion.version_number.desc())
|
||||
.limit(1)
|
||||
)
|
||||
).scalar_one_or_none()
|
||||
|
||||
if latest is not None:
|
||||
age = now - latest.created_at.replace(tzinfo=UTC)
|
||||
if age < timedelta(minutes=DEBOUNCE_MINUTES):
|
||||
latest.source_markdown = document.source_markdown
|
||||
latest.content_hash = document.content_hash
|
||||
latest.title = document.title
|
||||
latest.created_at = now
|
||||
await session.flush()
|
||||
return latest
|
||||
|
||||
max_num = (
|
||||
await session.execute(
|
||||
select(func.coalesce(func.max(DocumentVersion.version_number), 0)).where(
|
||||
DocumentVersion.document_id == document.id
|
||||
)
|
||||
)
|
||||
).scalar_one()
|
||||
|
||||
version = DocumentVersion(
|
||||
document_id=document.id,
|
||||
version_number=max_num + 1,
|
||||
source_markdown=document.source_markdown,
|
||||
content_hash=document.content_hash,
|
||||
title=document.title,
|
||||
created_at=now,
|
||||
)
|
||||
session.add(version)
|
||||
await session.flush()
|
||||
|
||||
# Cleanup: remove versions older than 90 days
|
||||
cutoff = now - timedelta(days=RETENTION_DAYS)
|
||||
await session.execute(
|
||||
delete(DocumentVersion).where(
|
||||
DocumentVersion.document_id == document.id,
|
||||
DocumentVersion.created_at < cutoff,
|
||||
)
|
||||
)
|
||||
|
||||
# Cleanup: cap at MAX_VERSIONS_PER_DOCUMENT
|
||||
count = (
|
||||
await session.execute(
|
||||
select(func.count()).select_from(DocumentVersion).where(
|
||||
DocumentVersion.document_id == document.id
|
||||
)
|
||||
)
|
||||
).scalar_one()
|
||||
|
||||
if count > MAX_VERSIONS_PER_DOCUMENT:
|
||||
excess = count - MAX_VERSIONS_PER_DOCUMENT
|
||||
oldest_ids_result = await session.execute(
|
||||
select(DocumentVersion.id)
|
||||
.where(DocumentVersion.document_id == document.id)
|
||||
.order_by(DocumentVersion.version_number.asc())
|
||||
.limit(excess)
|
||||
)
|
||||
oldest_ids = [row[0] for row in oldest_ids_result.all()]
|
||||
if oldest_ids:
|
||||
await session.execute(
|
||||
delete(DocumentVersion).where(DocumentVersion.id.in_(oldest_ids))
|
||||
)
|
||||
|
||||
await session.flush()
|
||||
return version
|
||||
Loading…
Add table
Add a link
Reference in a new issue