feat: update document tracking to use 'updated_at' timestamp instead of 'last_edited_at'

This commit is contained in:
DESKTOP-RTLN3BA\$punk 2025-12-12 01:32:14 -08:00
parent a313387e0f
commit 8c9aa68faa
28 changed files with 253 additions and 18 deletions

View file

@ -2,6 +2,8 @@
Base functionality and shared imports for document processors.
"""
from datetime import UTC, datetime
from langchain_community.document_transformers import MarkdownifyTransformer
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.future import select
@ -12,6 +14,16 @@ from app.db import Document
md = MarkdownifyTransformer()
def get_current_timestamp() -> datetime:
"""
Get the current timestamp with timezone for updated_at field.
Returns:
Current datetime with UTC timezone
"""
return datetime.now(UTC)
async def check_duplicate_document(
session: AsyncSession, content_hash: str
) -> Document | None:

View file

@ -20,6 +20,7 @@ from app.utils.document_converters import (
from .base import (
check_document_by_unique_identifier,
get_current_timestamp,
)
@ -165,6 +166,7 @@ async def add_extension_received_document(
existing_document.document_metadata = content.metadata.model_dump()
existing_document.chunks = chunks
existing_document.blocknote_document = blocknote_json
existing_document.updated_at = get_current_timestamp()
await session.commit()
await session.refresh(existing_document)
@ -182,6 +184,7 @@ async def add_extension_received_document(
content_hash=content_hash,
unique_identifier_hash=unique_identifier_hash,
blocknote_document=blocknote_json,
updated_at=get_current_timestamp(),
)
session.add(document)

View file

@ -27,6 +27,7 @@ from app.utils.document_converters import (
from .base import (
check_document_by_unique_identifier,
get_current_timestamp,
)
from .markdown_processor import add_received_markdown_file_document
@ -123,7 +124,7 @@ async def add_received_file_document_using_unstructured(
existing_document.chunks = chunks
existing_document.blocknote_document = blocknote_json
existing_document.content_needs_reindexing = False
existing_document.last_edited_at = None
existing_document.updated_at = get_current_timestamp()
await session.commit()
await session.refresh(existing_document)
@ -145,7 +146,7 @@ async def add_received_file_document_using_unstructured(
unique_identifier_hash=unique_identifier_hash,
blocknote_document=blocknote_json,
content_needs_reindexing=False,
last_edited_at=None,
updated_at=get_current_timestamp(),
)
session.add(document)
@ -252,7 +253,7 @@ async def add_received_file_document_using_llamacloud(
existing_document.chunks = chunks
existing_document.blocknote_document = blocknote_json
existing_document.content_needs_reindexing = False
existing_document.last_edited_at = None
existing_document.updated_at = get_current_timestamp()
await session.commit()
await session.refresh(existing_document)
@ -274,7 +275,7 @@ async def add_received_file_document_using_llamacloud(
unique_identifier_hash=unique_identifier_hash,
blocknote_document=blocknote_json,
content_needs_reindexing=False,
last_edited_at=None,
updated_at=get_current_timestamp(),
)
session.add(document)
@ -406,7 +407,7 @@ async def add_received_file_document_using_docling(
existing_document.chunks = chunks
existing_document.blocknote_document = blocknote_json
existing_document.content_needs_reindexing = False
existing_document.last_edited_at = None
existing_document.updated_at = get_current_timestamp()
await session.commit()
await session.refresh(existing_document)
@ -428,7 +429,7 @@ async def add_received_file_document_using_docling(
unique_identifier_hash=unique_identifier_hash,
blocknote_document=blocknote_json,
content_needs_reindexing=False,
last_edited_at=None,
updated_at=get_current_timestamp(),
)
session.add(document)

View file

@ -19,6 +19,7 @@ from app.utils.document_converters import (
from .base import (
check_document_by_unique_identifier,
get_current_timestamp,
)
@ -131,6 +132,7 @@ async def add_received_markdown_file_document(
}
existing_document.chunks = chunks
existing_document.blocknote_document = blocknote_json
existing_document.updated_at = get_current_timestamp()
await session.commit()
await session.refresh(existing_document)
@ -150,6 +152,7 @@ async def add_received_markdown_file_document(
content_hash=content_hash,
unique_identifier_hash=unique_identifier_hash,
blocknote_document=blocknote_json,
updated_at=get_current_timestamp(),
)
session.add(document)

View file

@ -22,6 +22,7 @@ from app.utils.document_converters import (
from .base import (
check_document_by_unique_identifier,
get_current_timestamp,
)
@ -325,6 +326,7 @@ async def add_youtube_video_document(
}
existing_document.chunks = chunks
existing_document.blocknote_document = blocknote_json
existing_document.updated_at = get_current_timestamp()
await session.commit()
await session.refresh(existing_document)
@ -354,6 +356,7 @@ async def add_youtube_video_document(
content_hash=content_hash,
unique_identifier_hash=unique_identifier_hash,
blocknote_document=blocknote_json,
updated_at=get_current_timestamp(),
)
session.add(document)