mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-06-07 07:55:16 +02:00
102 lines
3 KiB
Python
102 lines
3 KiB
Python
"""Pydantic schemas for knowledge base operations."""
|
|
|
|
from datetime import datetime
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
|
|
class DocumentUploadRequestSchema(BaseModel):
|
|
"""Request schema for initiating document upload."""
|
|
|
|
filename: str = Field(..., description="Name of the file to upload")
|
|
mime_type: str = Field(..., description="MIME type of the file")
|
|
custom_metadata: Optional[Dict[str, Any]] = Field(
|
|
default=None, description="Optional custom metadata"
|
|
)
|
|
|
|
|
|
class DocumentUploadResponseSchema(BaseModel):
|
|
"""Response schema containing upload URL and document metadata."""
|
|
|
|
upload_url: str = Field(..., description="Signed URL for uploading the file")
|
|
document_uuid: str = Field(..., description="Unique identifier for the document")
|
|
s3_key: str = Field(..., description="S3 key where file should be uploaded")
|
|
|
|
|
|
class ProcessDocumentRequestSchema(BaseModel):
|
|
"""Request schema for triggering document processing."""
|
|
|
|
document_uuid: str = Field(..., description="Document UUID to process")
|
|
s3_key: str = Field(..., description="S3 key of the uploaded file")
|
|
retrieval_mode: str = Field(
|
|
default="chunked",
|
|
description="Retrieval mode: 'chunked' for vector search or 'full_document' for full text retrieval",
|
|
)
|
|
|
|
|
|
class DocumentResponseSchema(BaseModel):
|
|
"""Response schema for document metadata."""
|
|
|
|
id: int
|
|
document_uuid: str
|
|
filename: str
|
|
file_size_bytes: int
|
|
file_hash: str
|
|
mime_type: str
|
|
processing_status: str # pending, processing, completed, failed
|
|
processing_error: Optional[str] = None
|
|
total_chunks: int
|
|
retrieval_mode: str = "chunked"
|
|
custom_metadata: Dict[str, Any]
|
|
docling_metadata: Dict[str, Any]
|
|
source_url: Optional[str] = None
|
|
created_at: datetime
|
|
updated_at: datetime
|
|
organization_id: int
|
|
created_by: int
|
|
is_active: bool
|
|
|
|
|
|
class DocumentListResponseSchema(BaseModel):
|
|
"""Response schema for list of documents."""
|
|
|
|
documents: List[DocumentResponseSchema]
|
|
total: int
|
|
limit: int
|
|
offset: int
|
|
|
|
|
|
class ChunkSearchRequestSchema(BaseModel):
|
|
"""Request schema for searching similar chunks."""
|
|
|
|
query: str = Field(..., description="Search query text")
|
|
limit: int = Field(default=5, ge=1, le=50, description="Maximum number of results")
|
|
document_uuids: Optional[List[str]] = Field(
|
|
default=None, description="Filter by specific document UUIDs"
|
|
)
|
|
min_similarity: Optional[float] = Field(
|
|
default=None, ge=0.0, le=1.0, description="Minimum similarity threshold"
|
|
)
|
|
|
|
|
|
class ChunkResponseSchema(BaseModel):
|
|
"""Response schema for a document chunk."""
|
|
|
|
id: int
|
|
document_id: int
|
|
chunk_text: str
|
|
contextualized_text: Optional[str]
|
|
chunk_index: int
|
|
chunk_metadata: Dict[str, Any]
|
|
filename: str
|
|
document_uuid: str
|
|
similarity: float
|
|
|
|
|
|
class ChunkSearchResponseSchema(BaseModel):
|
|
"""Response schema for chunk search results."""
|
|
|
|
chunks: List[ChunkResponseSchema]
|
|
query: str
|
|
total_results: int
|