feat(editor): increase editor byte limit to 5MB and implement UTF-8 truncation for document content

This commit is contained in:
Anish Sarkar 2026-06-05 14:57:52 +05:30
parent cf8a2d0759
commit 640238e1af
4 changed files with 110 additions and 29 deletions

View file

@ -38,7 +38,14 @@ logger = logging.getLogger(__name__)
router = APIRouter()
EDITOR_PLATE_MAX_BYTES = 2 * 1024 * 1024
EDITOR_PLATE_MAX_BYTES = 5 * 1024 * 1024
def _truncate_utf8(text: str, max_bytes: int) -> str:
encoded = text.encode("utf-8")
if len(encoded) <= max_bytes:
return text
return encoded[:max_bytes].decode("utf-8", errors="ignore")
@router.get("/search-spaces/{search_space_id}/documents/{document_id}/editor-content")
@ -46,7 +53,8 @@ async def get_editor_content(
search_space_id: int,
document_id: int,
max_length: int | None = Query(
None, description="Truncate source_markdown to this many characters"
None,
description="Truncate source_markdown to this many UTF-8 bytes. Defaults to the Plate editor byte limit.",
),
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
@ -87,10 +95,11 @@ async def get_editor_content(
def _build_response(md: str) -> dict:
size_bytes = len(md.encode("utf-8"))
viewer_mode = "monaco" if size_bytes > EDITOR_PLATE_MAX_BYTES else "plate"
content_limit = max_length if max_length is not None else EDITOR_PLATE_MAX_BYTES
truncated = False
output_md = md
if max_length is not None and size_bytes > max_length:
output_md = md[:max_length]
if size_bytes > content_limit:
output_md = _truncate_utf8(md, content_limit)
truncated = True
return {
"document_id": document.id,
@ -101,6 +110,7 @@ async def get_editor_content(
"chunk_count": chunk_count,
"truncated": truncated,
"viewer_mode": viewer_mode,
"editor_plate_max_bytes": EDITOR_PLATE_MAX_BYTES,
"updated_at": document.updated_at.isoformat()
if document.updated_at
else None,