SurfSense/surfsense_backend/app/schemas/documents.py
DESKTOP-RTLN3BA\$punk 62e698d8aa refactor: streamline document upload limits and enhance handling of mentioned documents
- Updated maximum file size limit to 500 MB per file.
- Removed restrictions on the number of files per upload and total upload size.
- Enhanced handling of user-mentioning documents in the knowledge base search middleware.
- Improved document reading and processing logic to accommodate new features and optimizations.
2026-04-02 19:39:10 -07:00

120 lines
2.7 KiB
Python

from datetime import datetime
from typing import TypeVar
from uuid import UUID
from pydantic import BaseModel, ConfigDict
from app.db import DocumentType
from .chunks import ChunkRead
T = TypeVar("T")
class ExtensionDocumentMetadata(BaseModel):
BrowsingSessionId: str
VisitedWebPageURL: str
VisitedWebPageTitle: str
VisitedWebPageDateWithTimeInISOString: str
VisitedWebPageReffererURL: str
VisitedWebPageVisitDurationInMilliseconds: str
class ExtensionDocumentContent(BaseModel):
metadata: ExtensionDocumentMetadata
pageContent: str # noqa: N815
class DocumentBase(BaseModel):
document_type: DocumentType
content: (
list[ExtensionDocumentContent] | list[str] | str
) # Updated to allow string content
search_space_id: int
class DocumentsCreate(DocumentBase):
pass
class DocumentUpdate(DocumentBase):
pass
class DocumentStatusSchema(BaseModel):
"""Document processing status."""
state: str # "ready", "processing", "failed"
reason: str | None = None
class DocumentRead(BaseModel):
id: int
title: str
document_type: DocumentType
document_metadata: dict
content: str = ""
content_preview: str = ""
content_hash: str
unique_identifier_hash: str | None
created_at: datetime
updated_at: datetime | None
search_space_id: int
folder_id: int | None = None
created_by_id: UUID | None = None
created_by_name: str | None = None
created_by_email: str | None = None
status: DocumentStatusSchema | None = None
model_config = ConfigDict(from_attributes=True)
class DocumentWithChunksRead(DocumentRead):
chunks: list[ChunkRead] = []
total_chunks: int = 0
chunk_start_index: int = 0
model_config = ConfigDict(from_attributes=True)
class PaginatedResponse[T](BaseModel):
items: list[T]
total: int
page: int
page_size: int
has_more: bool
class DocumentTitleRead(BaseModel):
"""Lightweight document response for mention picker - only essential fields."""
id: int
title: str
document_type: DocumentType
folder_id: int | None = None
model_config = ConfigDict(from_attributes=True)
class DocumentTitleSearchResponse(BaseModel):
"""Response for document title search - optimized for typeahead."""
items: list[DocumentTitleRead]
has_more: bool
class DocumentStatusItemRead(BaseModel):
"""Lightweight document status payload for batch status polling."""
id: int
title: str
document_type: DocumentType
status: DocumentStatusSchema
model_config = ConfigDict(from_attributes=True)
class DocumentStatusBatchResponse(BaseModel):
"""Batch status response for a set of document IDs."""
items: list[DocumentStatusItemRead]