SurfSense/surfsense_backend/app/schemas/documents.py

87 lines
1.9 KiB
Python
Raw Normal View History

from datetime import datetime
2025-10-01 18:50:36 -07:00
from typing import TypeVar
from pydantic import BaseModel, ConfigDict
2025-03-14 18:53:14 -07:00
from app.db import DocumentType
from .chunks import ChunkRead
T = TypeVar("T")
2025-03-14 18:53:14 -07:00
class ExtensionDocumentMetadata(BaseModel):
BrowsingSessionId: str
VisitedWebPageURL: str
VisitedWebPageTitle: str
VisitedWebPageDateWithTimeInISOString: str
VisitedWebPageReffererURL: str
VisitedWebPageVisitDurationInMilliseconds: str
2025-03-14 18:53:14 -07:00
class ExtensionDocumentContent(BaseModel):
metadata: ExtensionDocumentMetadata
pageContent: str # noqa: N815
2025-03-14 18:53:14 -07:00
class DocumentBase(BaseModel):
document_type: DocumentType
content: (
list[ExtensionDocumentContent] | list[str] | str
) # Updated to allow string content
2025-03-14 18:53:14 -07:00
search_space_id: int
2025-03-14 18:53:14 -07:00
class DocumentsCreate(DocumentBase):
pass
2025-03-14 18:53:14 -07:00
class DocumentUpdate(DocumentBase):
pass
2025-03-14 18:53:14 -07:00
class DocumentRead(BaseModel):
id: int
title: str
document_type: DocumentType
document_metadata: dict
content: str # Changed to string to match frontend
content_hash: str
unique_identifier_hash: str | None
2025-03-14 18:53:14 -07:00
created_at: datetime
updated_at: datetime | None
2025-03-14 18:53:14 -07:00
search_space_id: int
model_config = ConfigDict(from_attributes=True)
class DocumentWithChunksRead(DocumentRead):
chunks: list[ChunkRead] = []
model_config = ConfigDict(from_attributes=True)
2025-10-01 18:50:36 -07:00
class PaginatedResponse[T](BaseModel):
items: list[T]
total: int
page: int
page_size: int
has_more: bool
class DocumentTitleRead(BaseModel):
"""Lightweight document response for mention picker - only essential fields."""
id: int
title: str
document_type: DocumentType
model_config = ConfigDict(from_attributes=True)
class DocumentTitleSearchResponse(BaseModel):
"""Response for document title search - optimized for typeahead."""
items: list[DocumentTitleRead]
has_more: bool