refactor: update safe_set_chunks function to be asynchronous and modify all connector and document processor files to use the new async implementation

This commit is contained in:
DESKTOP-RTLN3BA\$punk 2026-03-15 00:44:27 -07:00
parent 49d8f41b09
commit 2b33dfe728
30 changed files with 102 additions and 106 deletions

View file

@ -13,12 +13,32 @@ from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from sqlalchemy import delete as sa_delete
from sqlalchemy.orm.attributes import set_committed_value
from app.config import config
from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument, async_session_maker
from app.utils.document_converters import embed_text
logger = logging.getLogger(__name__)
async def _safe_set_docs_chunks(
session: AsyncSession, document: SurfsenseDocsDocument, chunks: list
) -> None:
"""safe_set_chunks variant for the SurfsenseDocsDocument/Chunk models."""
if document.id is not None:
await session.execute(
sa_delete(SurfsenseDocsChunk).where(
SurfsenseDocsChunk.document_id == document.id
)
)
for chunk in chunks:
chunk.document_id = document.id
set_committed_value(document, "chunks", chunks)
session.add_all(chunks)
# Path to docs relative to project root
DOCS_DIR = (
Path(__file__).resolve().parent.parent.parent.parent
@ -156,7 +176,7 @@ async def index_surfsense_docs(session: AsyncSession) -> tuple[int, int, int, in
existing_doc.content = content
existing_doc.content_hash = content_hash
existing_doc.embedding = embed_text(content)
existing_doc.chunks = chunks
await _safe_set_docs_chunks(session, existing_doc, chunks)
existing_doc.updated_at = datetime.now(UTC)
updated += 1