dograh/api/schemas/knowledge_base.py

103 lines
3 KiB
Python
Raw Permalink Normal View History

"""Pydantic schemas for knowledge base operations."""
from datetime import datetime
from typing import Any, Dict, List, Optional
from pydantic import BaseModel, Field
class DocumentUploadRequestSchema(BaseModel):
"""Request schema for initiating document upload."""
filename: str = Field(..., description="Name of the file to upload")
mime_type: str = Field(..., description="MIME type of the file")
custom_metadata: Optional[Dict[str, Any]] = Field(
default=None, description="Optional custom metadata"
)
class DocumentUploadResponseSchema(BaseModel):
"""Response schema containing upload URL and document metadata."""
upload_url: str = Field(..., description="Signed URL for uploading the file")
document_uuid: str = Field(..., description="Unique identifier for the document")
s3_key: str = Field(..., description="S3 key where file should be uploaded")
class ProcessDocumentRequestSchema(BaseModel):
"""Request schema for triggering document processing."""
document_uuid: str = Field(..., description="Document UUID to process")
s3_key: str = Field(..., description="S3 key of the uploaded file")
retrieval_mode: str = Field(
default="chunked",
description="Retrieval mode: 'chunked' for vector search or 'full_document' for full text retrieval",
)
class DocumentResponseSchema(BaseModel):
"""Response schema for document metadata."""
id: int
document_uuid: str
filename: str
file_size_bytes: int
file_hash: str
mime_type: str
processing_status: str # pending, processing, completed, failed
processing_error: Optional[str] = None
total_chunks: int
retrieval_mode: str = "chunked"
custom_metadata: Dict[str, Any]
docling_metadata: Dict[str, Any]
source_url: Optional[str] = None
created_at: datetime
updated_at: datetime
organization_id: int
created_by: int
is_active: bool
class DocumentListResponseSchema(BaseModel):
"""Response schema for list of documents."""
documents: List[DocumentResponseSchema]
total: int
limit: int
offset: int
class ChunkSearchRequestSchema(BaseModel):
"""Request schema for searching similar chunks."""
query: str = Field(..., description="Search query text")
limit: int = Field(default=5, ge=1, le=50, description="Maximum number of results")
document_uuids: Optional[List[str]] = Field(
default=None, description="Filter by specific document UUIDs"
)
min_similarity: Optional[float] = Field(
default=None, ge=0.0, le=1.0, description="Minimum similarity threshold"
)
class ChunkResponseSchema(BaseModel):
"""Response schema for a document chunk."""
id: int
document_id: int
chunk_text: str
contextualized_text: Optional[str]
chunk_index: int
chunk_metadata: Dict[str, Any]
filename: str
document_uuid: str
similarity: float
class ChunkSearchResponseSchema(BaseModel):
"""Response schema for chunk search results."""
chunks: List[ChunkResponseSchema]
query: str
total_results: int