feat: Fixed Document Summary Content across connectors and processors

This commit is contained in:
DESKTOP-RTLN3BA\$punk 2025-08-18 20:51:48 -07:00
parent c6921a4083
commit 1c4c61eb04
19 changed files with 474 additions and 233 deletions

View file

@ -8,9 +8,7 @@ from datetime import datetime, timedelta
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.future import select
from app.config import config
from app.db import (
Chunk,
Document,
SearchSourceConnector,
SearchSourceConnectorType,
@ -39,25 +37,6 @@ async def check_duplicate_document_by_hash(
return existing_doc_result.scalars().first()
async def create_document_chunks(content: str) -> list[Chunk]:
"""
Create chunks from document content.
Args:
content: Document content to chunk
Returns:
List of Chunk objects with embeddings
"""
return [
Chunk(
content=chunk.text,
embedding=config.embedding_model_instance.embed(chunk.text),
)
for chunk in config.chunker_instance.chunk(content)
]
async def get_connector_by_id(
session: AsyncSession, connector_id: int, connector_type: SearchSourceConnectorType
) -> SearchSourceConnector | None: