Merge remote-tracking branch 'upstream/dev' into feat/api-key

This commit is contained in:
Anish Sarkar 2026-06-23 13:09:53 +05:30
commit 3695e1d5c5
64 changed files with 1043 additions and 1852 deletions

View file

@ -38,7 +38,6 @@ from app.schemas import (
from app.services.task_dispatcher import TaskDispatcher, get_task_dispatcher
from app.users import get_auth_context
from app.utils.rbac import check_permission
from app.utils.text_spans import char_span_to_line_range
try:
asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())
@ -977,12 +976,9 @@ async def get_document_by_chunk_id(
session: AsyncSession = Depends(get_async_session),
auth: AuthContext = Depends(get_auth_context),
):
"""Resolve a chunk id to its document plus a window of surrounding chunks.
Returns the cited chunk's 1-based line range (cited_start_line/
cited_end_line) when char spans exist, so callers can anchor the citation
to exact source lines. Uses SQL-level pagination to avoid loading all
chunks into memory.
"""
Retrieves a document based on a chunk ID, including a window of chunks around the cited one.
Uses SQL-level pagination to avoid loading all chunks into memory.
"""
try:
from sqlalchemy import and_, func, or_
@ -1046,17 +1042,6 @@ async def get_document_by_chunk_id(
)
windowed_chunks = windowed_result.scalars().all()
cited_start_line: int | None = None
cited_end_line: int | None = None
if (
chunk.start_char is not None
and chunk.end_char is not None
and document.source_markdown
):
cited_start_line, cited_end_line = char_span_to_line_range(
document.source_markdown, chunk.start_char, chunk.end_char
)
return DocumentWithChunksRead(
id=document.id,
title=document.title,
@ -1071,8 +1056,6 @@ async def get_document_by_chunk_id(
chunks=windowed_chunks,
total_chunks=total_chunks,
chunk_start_index=start,
cited_start_line=cited_start_line,
cited_end_line=cited_end_line,
)
except HTTPException:
raise

View file

@ -43,34 +43,6 @@ EDITOR_PLATE_MAX_BYTES = 1 * 1024 * 1024
EDITOR_PLATE_MAX_LINES = 5000
def _raise_no_canonical_body(document: Document) -> None:
"""Translate a missing source_markdown into a status-aware HTTP error."""
doc_status = document.status or {}
state = (
doc_status.get("state", "ready") if isinstance(doc_status, dict) else "ready"
)
if state in ("pending", "processing"):
raise HTTPException(
status_code=409,
detail="This document is still being processed. Please wait a moment and try again.",
)
if state == "failed":
reason = (
doc_status.get("reason", "Unknown error")
if isinstance(doc_status, dict)
else "Unknown error"
)
raise HTTPException(
status_code=422,
detail=f"Processing failed: {reason}. You can delete this document and re-upload it.",
)
raise HTTPException(
status_code=400,
detail="This document has no editable content. It may not have been processed correctly. Try re-indexing or re-uploading it.",
)
@router.get("/search-spaces/{search_space_id}/documents/{document_id}/editor-content")
async def get_editor_content(
search_space_id: int,
@ -82,9 +54,8 @@ async def get_editor_content(
"""
Get document content for editing.
Returns source_markdown (the canonical body) for the Plate.js editor, with a
one-time migration from legacy blocknote_document. Never reconstructs the
body from chunks.
Returns source_markdown for the Plate.js editor.
Falls back to blocknote_document markdown conversion, then chunk reconstruction.
Requires DOCUMENTS_READ permission.
"""
@ -154,9 +125,52 @@ async def get_editor_content(
await session.commit()
return _build_response(empty_markdown)
# No canonical body. Chunks are an index artifact, never the source of
# truth, so surface the processing state instead of rebuilding from them.
_raise_no_canonical_body(document)
chunk_contents_result = await session.execute(
select(Chunk.content)
.filter(Chunk.document_id == document_id)
.order_by(Chunk.position, Chunk.id)
)
chunk_contents = chunk_contents_result.scalars().all()
if not chunk_contents:
doc_status = document.status or {}
state = (
doc_status.get("state", "ready")
if isinstance(doc_status, dict)
else "ready"
)
if state in ("pending", "processing"):
raise HTTPException(
status_code=409,
detail="This document is still being processed. Please wait a moment and try again.",
)
if state == "failed":
reason = (
doc_status.get("reason", "Unknown error")
if isinstance(doc_status, dict)
else "Unknown error"
)
raise HTTPException(
status_code=422,
detail=f"Processing failed: {reason}. You can delete this document and re-upload it.",
)
raise HTTPException(
status_code=400,
detail="This document has no content. It may not have been processed correctly. Try deleting and re-uploading it.",
)
markdown_content = "\n\n".join(chunk_contents)
if not markdown_content.strip():
raise HTTPException(
status_code=400,
detail="This document appears to be empty. Try re-uploading or editing it to add content.",
)
document.source_markdown = markdown_content
await session.commit()
return _build_response(markdown_content)
@router.get(
@ -170,9 +184,8 @@ async def download_document_markdown(
):
user = auth.user
"""
Download the canonical document body as a .md file.
Serves source_markdown, migrating legacy blocknote_document when present.
Download the full document content as a .md file.
Reconstructs markdown from source_markdown or chunks.
"""
await check_permission(
session,
@ -198,6 +211,15 @@ async def download_document_markdown(
from app.utils.blocknote_to_markdown import blocknote_to_markdown
markdown = blocknote_to_markdown(document.blocknote_document)
if markdown is None:
chunk_contents_result = await session.execute(
select(Chunk.content)
.filter(Chunk.document_id == document_id)
.order_by(Chunk.position, Chunk.id)
)
chunk_contents = chunk_contents_result.scalars().all()
if chunk_contents:
markdown = "\n\n".join(chunk_contents)
if not markdown or not markdown.strip():
raise HTTPException(
@ -340,6 +362,15 @@ async def export_document(
from app.utils.blocknote_to_markdown import blocknote_to_markdown
markdown_content = blocknote_to_markdown(document.blocknote_document)
if markdown_content is None:
chunk_contents_result = await session.execute(
select(Chunk.content)
.filter(Chunk.document_id == document_id)
.order_by(Chunk.position, Chunk.id)
)
chunk_contents = chunk_contents_result.scalars().all()
if chunk_contents:
markdown_content = "\n\n".join(chunk_contents)
if not markdown_content or not markdown_content.strip():
raise HTTPException(status_code=400, detail="Document has no content to export")

View file

@ -214,7 +214,7 @@ async def _execute_image_generation(
)
# Store response
image_gen.response_data = (
response_dict = (
response.model_dump() if hasattr(response, "model_dump") else dict(response)
)
if not image_gen.model and hasattr(response, "_hidden_params"):
@ -222,6 +222,20 @@ async def _execute_image_generation(
if isinstance(hidden, dict) and hidden.get("model"):
image_gen.model = hidden["model"]
# Fix relative URLs in response data (for the serving endpoint)
from urllib.parse import urlparse
images = response_dict.get("data", [])
provider_base_url = resolved_kwargs.get("api_base")
for image in images:
if image.get("url"):
raw_url: str = image["url"]
if raw_url.startswith("/") and provider_base_url:
parsed = urlparse(provider_base_url)
origin = f"{parsed.scheme}://{parsed.netloc}"
image["url"] = f"{origin}{raw_url}"
image_gen.response_data = response_dict
# =============================================================================
# Image Generation Execution + Results CRUD