mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-25 19:15:18 +02:00
add fallback document sumary
This commit is contained in:
parent
36d1fba75f
commit
ca870cf660
3 changed files with 27 additions and 0 deletions
|
|
@ -12,6 +12,7 @@ class ConnectorDocument(BaseModel):
|
||||||
search_space_id: int = Field(gt=0)
|
search_space_id: int = Field(gt=0)
|
||||||
should_summarize: bool = True
|
should_summarize: bool = True
|
||||||
should_use_code_chunker: bool = False
|
should_use_code_chunker: bool = False
|
||||||
|
fallback_summary: str | None = None
|
||||||
metadata: dict = {}
|
metadata: dict = {}
|
||||||
connector_id: int = Field(gt=0)
|
connector_id: int = Field(gt=0)
|
||||||
created_by_id: str
|
created_by_id: str
|
||||||
|
|
|
||||||
|
|
@ -124,6 +124,8 @@ class IndexingPipelineService:
|
||||||
content = await summarize_document(
|
content = await summarize_document(
|
||||||
connector_doc.source_markdown, llm, connector_doc.metadata
|
connector_doc.source_markdown, llm, connector_doc.metadata
|
||||||
)
|
)
|
||||||
|
elif connector_doc.should_summarize and connector_doc.fallback_summary:
|
||||||
|
content = connector_doc.fallback_summary
|
||||||
else:
|
else:
|
||||||
content = connector_doc.source_markdown
|
content = connector_doc.source_markdown
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -156,6 +156,30 @@ async def test_no_llm_falls_back_to_source_markdown(
|
||||||
assert reloaded.content == "## Fallback content"
|
assert reloaded.content == "## Fallback content"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.usefixtures("patched_summarize", "patched_embed_text", "patched_chunk_text")
|
||||||
|
async def test_fallback_summary_used_when_llm_unavailable(
|
||||||
|
db_session, db_search_space, make_connector_document,
|
||||||
|
):
|
||||||
|
connector_doc = make_connector_document(
|
||||||
|
search_space_id=db_search_space.id,
|
||||||
|
should_summarize=True,
|
||||||
|
source_markdown="## Full raw content",
|
||||||
|
fallback_summary="Short pre-built summary.",
|
||||||
|
)
|
||||||
|
service = IndexingPipelineService(session=db_session)
|
||||||
|
|
||||||
|
prepared = await service.prepare_for_indexing([connector_doc])
|
||||||
|
document_id = prepared[0].id
|
||||||
|
|
||||||
|
await service.index(prepared[0], connector_doc, llm=None)
|
||||||
|
|
||||||
|
result = await db_session.execute(select(Document).filter(Document.id == document_id))
|
||||||
|
reloaded = result.scalars().first()
|
||||||
|
|
||||||
|
assert DocumentStatus.is_state(reloaded.status, DocumentStatus.READY)
|
||||||
|
assert reloaded.content == "Short pre-built summary."
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.usefixtures("patched_summarize", "patched_embed_text", "patched_chunk_text")
|
@pytest.mark.usefixtures("patched_summarize", "patched_embed_text", "patched_chunk_text")
|
||||||
async def test_reindex_replaces_old_chunks(
|
async def test_reindex_replaces_old_chunks(
|
||||||
db_session, db_search_space, make_connector_document, mocker,
|
db_session, db_search_space, make_connector_document, mocker,
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue