mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-28 02:23:53 +02:00
feat: implement two-phase document indexing for Obsidian and Circleback connectors with real-time status updates
This commit is contained in:
parent
0f61a249c0
commit
629f6f9cf5
3 changed files with 394 additions and 192 deletions
|
|
@ -14,6 +14,34 @@ from app.db import Document
|
|||
md = MarkdownifyTransformer()
|
||||
|
||||
|
||||
def safe_set_chunks(document: Document, chunks: list) -> None:
|
||||
"""
|
||||
Safely assign chunks to a document without triggering lazy loading.
|
||||
|
||||
ALWAYS use this instead of `document.chunks = chunks` to avoid
|
||||
SQLAlchemy async errors (MissingGreenlet / greenlet_spawn).
|
||||
|
||||
Why this is needed:
|
||||
- Direct assignment `document.chunks = chunks` triggers SQLAlchemy to
|
||||
load the OLD chunks first (for comparison/orphan detection)
|
||||
- This lazy loading fails in async context with asyncpg driver
|
||||
- set_committed_value bypasses this by setting the value directly
|
||||
|
||||
This function is safe regardless of how the document was loaded
|
||||
(with or without selectinload).
|
||||
|
||||
Args:
|
||||
document: The Document object to update
|
||||
chunks: List of Chunk objects to assign
|
||||
|
||||
Example:
|
||||
# Instead of: document.chunks = chunks (DANGEROUS!)
|
||||
safe_set_chunks(document, chunks) # Always safe
|
||||
"""
|
||||
from sqlalchemy.orm.attributes import set_committed_value
|
||||
set_committed_value(document, 'chunks', chunks)
|
||||
|
||||
|
||||
def get_current_timestamp() -> datetime:
|
||||
"""
|
||||
Get the current timestamp with timezone for updated_at field.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue