SurfSense/surfsense_backend/app/indexing_pipeline/connector_document.py

27 lines
835 B
Python
Raw Normal View History

from pydantic import BaseModel, Field, field_validator
from app.db import DocumentType
class ConnectorDocument(BaseModel):
2026-02-25 01:40:30 +02:00
"""Canonical data transfer object produced by connector adapters and consumed by the indexing pipeline."""
title: str
source_markdown: str
unique_id: str
document_type: DocumentType
search_space_id: int = Field(gt=0)
should_summarize: bool = True
should_use_code_chunker: bool = False
2026-02-25 13:47:36 +02:00
fallback_summary: str | None = None
metadata: dict = {}
connector_id: int | None = None
created_by_id: str
@field_validator("title", "source_markdown", "unique_id", "created_by_id")
@classmethod
def not_empty(cls, v: str, info) -> str:
if not v.strip():
raise ValueError(f"{info.field_name} must not be empty or whitespace")
return v