mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-26 01:06:23 +02:00
feat: update document tracking to use 'updated_at' timestamp instead of 'last_edited_at'
This commit is contained in:
parent
a313387e0f
commit
8c9aa68faa
28 changed files with 253 additions and 18 deletions
|
|
@ -2,6 +2,8 @@
|
|||
Base functionality and shared imports for document processors.
|
||||
"""
|
||||
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from langchain_community.document_transformers import MarkdownifyTransformer
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.future import select
|
||||
|
|
@ -12,6 +14,16 @@ from app.db import Document
|
|||
md = MarkdownifyTransformer()
|
||||
|
||||
|
||||
def get_current_timestamp() -> datetime:
|
||||
"""
|
||||
Get the current timestamp with timezone for updated_at field.
|
||||
|
||||
Returns:
|
||||
Current datetime with UTC timezone
|
||||
"""
|
||||
return datetime.now(UTC)
|
||||
|
||||
|
||||
async def check_duplicate_document(
|
||||
session: AsyncSession, content_hash: str
|
||||
) -> Document | None:
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ from app.utils.document_converters import (
|
|||
|
||||
from .base import (
|
||||
check_document_by_unique_identifier,
|
||||
get_current_timestamp,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -165,6 +166,7 @@ async def add_extension_received_document(
|
|||
existing_document.document_metadata = content.metadata.model_dump()
|
||||
existing_document.chunks = chunks
|
||||
existing_document.blocknote_document = blocknote_json
|
||||
existing_document.updated_at = get_current_timestamp()
|
||||
|
||||
await session.commit()
|
||||
await session.refresh(existing_document)
|
||||
|
|
@ -182,6 +184,7 @@ async def add_extension_received_document(
|
|||
content_hash=content_hash,
|
||||
unique_identifier_hash=unique_identifier_hash,
|
||||
blocknote_document=blocknote_json,
|
||||
updated_at=get_current_timestamp(),
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ from app.utils.document_converters import (
|
|||
|
||||
from .base import (
|
||||
check_document_by_unique_identifier,
|
||||
get_current_timestamp,
|
||||
)
|
||||
from .markdown_processor import add_received_markdown_file_document
|
||||
|
||||
|
|
@ -123,7 +124,7 @@ async def add_received_file_document_using_unstructured(
|
|||
existing_document.chunks = chunks
|
||||
existing_document.blocknote_document = blocknote_json
|
||||
existing_document.content_needs_reindexing = False
|
||||
existing_document.last_edited_at = None
|
||||
existing_document.updated_at = get_current_timestamp()
|
||||
|
||||
await session.commit()
|
||||
await session.refresh(existing_document)
|
||||
|
|
@ -145,7 +146,7 @@ async def add_received_file_document_using_unstructured(
|
|||
unique_identifier_hash=unique_identifier_hash,
|
||||
blocknote_document=blocknote_json,
|
||||
content_needs_reindexing=False,
|
||||
last_edited_at=None,
|
||||
updated_at=get_current_timestamp(),
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
@ -252,7 +253,7 @@ async def add_received_file_document_using_llamacloud(
|
|||
existing_document.chunks = chunks
|
||||
existing_document.blocknote_document = blocknote_json
|
||||
existing_document.content_needs_reindexing = False
|
||||
existing_document.last_edited_at = None
|
||||
existing_document.updated_at = get_current_timestamp()
|
||||
|
||||
await session.commit()
|
||||
await session.refresh(existing_document)
|
||||
|
|
@ -274,7 +275,7 @@ async def add_received_file_document_using_llamacloud(
|
|||
unique_identifier_hash=unique_identifier_hash,
|
||||
blocknote_document=blocknote_json,
|
||||
content_needs_reindexing=False,
|
||||
last_edited_at=None,
|
||||
updated_at=get_current_timestamp(),
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
@ -406,7 +407,7 @@ async def add_received_file_document_using_docling(
|
|||
existing_document.chunks = chunks
|
||||
existing_document.blocknote_document = blocknote_json
|
||||
existing_document.content_needs_reindexing = False
|
||||
existing_document.last_edited_at = None
|
||||
existing_document.updated_at = get_current_timestamp()
|
||||
|
||||
await session.commit()
|
||||
await session.refresh(existing_document)
|
||||
|
|
@ -428,7 +429,7 @@ async def add_received_file_document_using_docling(
|
|||
unique_identifier_hash=unique_identifier_hash,
|
||||
blocknote_document=blocknote_json,
|
||||
content_needs_reindexing=False,
|
||||
last_edited_at=None,
|
||||
updated_at=get_current_timestamp(),
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ from app.utils.document_converters import (
|
|||
|
||||
from .base import (
|
||||
check_document_by_unique_identifier,
|
||||
get_current_timestamp,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -131,6 +132,7 @@ async def add_received_markdown_file_document(
|
|||
}
|
||||
existing_document.chunks = chunks
|
||||
existing_document.blocknote_document = blocknote_json
|
||||
existing_document.updated_at = get_current_timestamp()
|
||||
|
||||
await session.commit()
|
||||
await session.refresh(existing_document)
|
||||
|
|
@ -150,6 +152,7 @@ async def add_received_markdown_file_document(
|
|||
content_hash=content_hash,
|
||||
unique_identifier_hash=unique_identifier_hash,
|
||||
blocknote_document=blocknote_json,
|
||||
updated_at=get_current_timestamp(),
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ from app.utils.document_converters import (
|
|||
|
||||
from .base import (
|
||||
check_document_by_unique_identifier,
|
||||
get_current_timestamp,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -325,6 +326,7 @@ async def add_youtube_video_document(
|
|||
}
|
||||
existing_document.chunks = chunks
|
||||
existing_document.blocknote_document = blocknote_json
|
||||
existing_document.updated_at = get_current_timestamp()
|
||||
|
||||
await session.commit()
|
||||
await session.refresh(existing_document)
|
||||
|
|
@ -354,6 +356,7 @@ async def add_youtube_video_document(
|
|||
content_hash=content_hash,
|
||||
unique_identifier_hash=unique_identifier_hash,
|
||||
blocknote_document=blocknote_json,
|
||||
updated_at=get_current_timestamp(),
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue