2026-02-25 08:29:53 +02:00
|
|
|
from pydantic import BaseModel, Field, field_validator
|
2026-02-24 22:20:08 +02:00
|
|
|
|
|
|
|
|
from app.db import DocumentType
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ConnectorDocument(BaseModel):
|
2026-02-25 01:40:30 +02:00
|
|
|
"""Canonical data transfer object produced by connector adapters and consumed by the indexing pipeline."""
|
2026-02-26 13:00:31 -08:00
|
|
|
|
2026-02-24 22:20:08 +02:00
|
|
|
title: str
|
|
|
|
|
source_markdown: str
|
|
|
|
|
unique_id: str
|
|
|
|
|
document_type: DocumentType
|
2026-02-25 08:29:53 +02:00
|
|
|
search_space_id: int = Field(gt=0)
|
2026-02-24 22:20:08 +02:00
|
|
|
should_summarize: bool = True
|
|
|
|
|
should_use_code_chunker: bool = False
|
2026-02-25 13:47:36 +02:00
|
|
|
fallback_summary: str | None = None
|
2026-02-24 22:20:08 +02:00
|
|
|
metadata: dict = {}
|
2026-02-25 19:56:59 +02:00
|
|
|
connector_id: int | None = None
|
2026-02-25 08:29:53 +02:00
|
|
|
created_by_id: str
|
2026-02-24 22:20:08 +02:00
|
|
|
|
2026-02-25 08:29:53 +02:00
|
|
|
@field_validator("title", "source_markdown", "unique_id", "created_by_id")
|
2026-02-24 22:20:08 +02:00
|
|
|
@classmethod
|
|
|
|
|
def not_empty(cls, v: str, info) -> str:
|
|
|
|
|
if not v.strip():
|
|
|
|
|
raise ValueError(f"{info.field_name} must not be empty or whitespace")
|
|
|
|
|
return v
|