2025-07-24 14:43:48 -07:00
|
|
|
from datetime import datetime
|
2025-10-01 18:50:36 -07:00
|
|
|
from typing import TypeVar
|
2026-02-02 12:32:24 +05:30
|
|
|
from uuid import UUID
|
2025-07-24 14:43:48 -07:00
|
|
|
|
2025-05-07 22:04:57 -07:00
|
|
|
from pydantic import BaseModel, ConfigDict
|
2025-07-24 14:43:48 -07:00
|
|
|
|
2025-03-14 18:53:14 -07:00
|
|
|
from app.db import DocumentType
|
2025-07-24 14:43:48 -07:00
|
|
|
|
2025-08-23 18:48:18 -07:00
|
|
|
from .chunks import ChunkRead
|
|
|
|
|
|
2025-10-01 13:05:22 -07:00
|
|
|
T = TypeVar("T")
|
|
|
|
|
|
2025-03-14 18:53:14 -07:00
|
|
|
|
|
|
|
|
class ExtensionDocumentMetadata(BaseModel):
|
|
|
|
|
BrowsingSessionId: str
|
|
|
|
|
VisitedWebPageURL: str
|
|
|
|
|
VisitedWebPageTitle: str
|
|
|
|
|
VisitedWebPageDateWithTimeInISOString: str
|
|
|
|
|
VisitedWebPageReffererURL: str
|
|
|
|
|
VisitedWebPageVisitDurationInMilliseconds: str
|
|
|
|
|
|
2025-07-24 14:43:48 -07:00
|
|
|
|
2025-03-14 18:53:14 -07:00
|
|
|
class ExtensionDocumentContent(BaseModel):
|
|
|
|
|
metadata: ExtensionDocumentMetadata
|
2025-07-24 14:43:48 -07:00
|
|
|
pageContent: str # noqa: N815
|
|
|
|
|
|
2025-03-14 18:53:14 -07:00
|
|
|
|
|
|
|
|
class DocumentBase(BaseModel):
|
|
|
|
|
document_type: DocumentType
|
2025-07-24 14:43:48 -07:00
|
|
|
content: (
|
|
|
|
|
list[ExtensionDocumentContent] | list[str] | str
|
|
|
|
|
) # Updated to allow string content
|
2025-03-14 18:53:14 -07:00
|
|
|
search_space_id: int
|
|
|
|
|
|
2025-07-24 14:43:48 -07:00
|
|
|
|
2025-03-14 18:53:14 -07:00
|
|
|
class DocumentsCreate(DocumentBase):
|
|
|
|
|
pass
|
|
|
|
|
|
2025-07-24 14:43:48 -07:00
|
|
|
|
2025-03-14 18:53:14 -07:00
|
|
|
class DocumentUpdate(DocumentBase):
|
|
|
|
|
pass
|
|
|
|
|
|
2025-07-24 14:43:48 -07:00
|
|
|
|
2026-02-05 21:59:31 +05:30
|
|
|
class DocumentStatusSchema(BaseModel):
|
|
|
|
|
"""Document processing status."""
|
2026-02-06 05:35:15 +05:30
|
|
|
|
2026-02-05 21:59:31 +05:30
|
|
|
state: str # "ready", "processing", "failed"
|
|
|
|
|
reason: str | None = None
|
|
|
|
|
|
|
|
|
|
|
2025-03-14 18:53:14 -07:00
|
|
|
class DocumentRead(BaseModel):
|
|
|
|
|
id: int
|
|
|
|
|
title: str
|
|
|
|
|
document_type: DocumentType
|
|
|
|
|
document_metadata: dict
|
|
|
|
|
content: str # Changed to string to match frontend
|
2025-12-17 00:09:43 -08:00
|
|
|
content_hash: str
|
|
|
|
|
unique_identifier_hash: str | None
|
2025-03-14 18:53:14 -07:00
|
|
|
created_at: datetime
|
2025-12-17 00:09:43 -08:00
|
|
|
updated_at: datetime | None
|
2025-03-14 18:53:14 -07:00
|
|
|
search_space_id: int
|
2026-02-02 12:32:24 +05:30
|
|
|
created_by_id: UUID | None = None # User who created/uploaded this document
|
2026-02-21 23:41:00 +05:30
|
|
|
created_by_name: str | None = None
|
|
|
|
|
created_by_email: str | None = None
|
2026-02-06 05:35:15 +05:30
|
|
|
status: DocumentStatusSchema | None = (
|
|
|
|
|
None # Processing status (ready, processing, failed)
|
|
|
|
|
)
|
2025-03-14 18:53:14 -07:00
|
|
|
|
2025-07-24 14:43:48 -07:00
|
|
|
model_config = ConfigDict(from_attributes=True)
|
2025-08-23 18:48:18 -07:00
|
|
|
|
|
|
|
|
|
|
|
|
|
class DocumentWithChunksRead(DocumentRead):
|
|
|
|
|
chunks: list[ChunkRead] = []
|
|
|
|
|
|
|
|
|
|
model_config = ConfigDict(from_attributes=True)
|
2025-10-01 13:05:22 -07:00
|
|
|
|
|
|
|
|
|
2025-10-01 18:50:36 -07:00
|
|
|
class PaginatedResponse[T](BaseModel):
|
2025-10-01 13:05:22 -07:00
|
|
|
items: list[T]
|
|
|
|
|
total: int
|
2025-12-17 00:09:43 -08:00
|
|
|
page: int
|
|
|
|
|
page_size: int
|
|
|
|
|
has_more: bool
|
2026-01-17 20:45:10 +05:30
|
|
|
|
|
|
|
|
|
|
|
|
|
class DocumentTitleRead(BaseModel):
|
|
|
|
|
"""Lightweight document response for mention picker - only essential fields."""
|
|
|
|
|
|
|
|
|
|
id: int
|
|
|
|
|
title: str
|
|
|
|
|
document_type: DocumentType
|
|
|
|
|
|
|
|
|
|
model_config = ConfigDict(from_attributes=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class DocumentTitleSearchResponse(BaseModel):
|
|
|
|
|
"""Response for document title search - optimized for typeahead."""
|
|
|
|
|
|
|
|
|
|
items: list[DocumentTitleRead]
|
|
|
|
|
has_more: bool
|
2026-02-09 16:46:54 -08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
class DocumentStatusItemRead(BaseModel):
|
|
|
|
|
"""Lightweight document status payload for batch status polling."""
|
|
|
|
|
|
|
|
|
|
id: int
|
|
|
|
|
title: str
|
|
|
|
|
document_type: DocumentType
|
|
|
|
|
status: DocumentStatusSchema
|
|
|
|
|
|
|
|
|
|
model_config = ConfigDict(from_attributes=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class DocumentStatusBatchResponse(BaseModel):
|
|
|
|
|
"""Batch status response for a set of document IDs."""
|
|
|
|
|
|
|
|
|
|
items: list[DocumentStatusItemRead]
|