mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-07-04 22:02:16 +02:00
Merge remote-tracking branch 'upstream/dev' into feat/api-key
This commit is contained in:
commit
3695e1d5c5
64 changed files with 1043 additions and 1852 deletions
|
|
@ -38,7 +38,6 @@ from app.schemas import (
|
|||
from app.services.task_dispatcher import TaskDispatcher, get_task_dispatcher
|
||||
from app.users import get_auth_context
|
||||
from app.utils.rbac import check_permission
|
||||
from app.utils.text_spans import char_span_to_line_range
|
||||
|
||||
try:
|
||||
asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())
|
||||
|
|
@ -977,12 +976,9 @@ async def get_document_by_chunk_id(
|
|||
session: AsyncSession = Depends(get_async_session),
|
||||
auth: AuthContext = Depends(get_auth_context),
|
||||
):
|
||||
"""Resolve a chunk id to its document plus a window of surrounding chunks.
|
||||
|
||||
Returns the cited chunk's 1-based line range (cited_start_line/
|
||||
cited_end_line) when char spans exist, so callers can anchor the citation
|
||||
to exact source lines. Uses SQL-level pagination to avoid loading all
|
||||
chunks into memory.
|
||||
"""
|
||||
Retrieves a document based on a chunk ID, including a window of chunks around the cited one.
|
||||
Uses SQL-level pagination to avoid loading all chunks into memory.
|
||||
"""
|
||||
try:
|
||||
from sqlalchemy import and_, func, or_
|
||||
|
|
@ -1046,17 +1042,6 @@ async def get_document_by_chunk_id(
|
|||
)
|
||||
windowed_chunks = windowed_result.scalars().all()
|
||||
|
||||
cited_start_line: int | None = None
|
||||
cited_end_line: int | None = None
|
||||
if (
|
||||
chunk.start_char is not None
|
||||
and chunk.end_char is not None
|
||||
and document.source_markdown
|
||||
):
|
||||
cited_start_line, cited_end_line = char_span_to_line_range(
|
||||
document.source_markdown, chunk.start_char, chunk.end_char
|
||||
)
|
||||
|
||||
return DocumentWithChunksRead(
|
||||
id=document.id,
|
||||
title=document.title,
|
||||
|
|
@ -1071,8 +1056,6 @@ async def get_document_by_chunk_id(
|
|||
chunks=windowed_chunks,
|
||||
total_chunks=total_chunks,
|
||||
chunk_start_index=start,
|
||||
cited_start_line=cited_start_line,
|
||||
cited_end_line=cited_end_line,
|
||||
)
|
||||
except HTTPException:
|
||||
raise
|
||||
|
|
|
|||
|
|
@ -43,34 +43,6 @@ EDITOR_PLATE_MAX_BYTES = 1 * 1024 * 1024
|
|||
EDITOR_PLATE_MAX_LINES = 5000
|
||||
|
||||
|
||||
def _raise_no_canonical_body(document: Document) -> None:
|
||||
"""Translate a missing source_markdown into a status-aware HTTP error."""
|
||||
doc_status = document.status or {}
|
||||
state = (
|
||||
doc_status.get("state", "ready") if isinstance(doc_status, dict) else "ready"
|
||||
)
|
||||
|
||||
if state in ("pending", "processing"):
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail="This document is still being processed. Please wait a moment and try again.",
|
||||
)
|
||||
if state == "failed":
|
||||
reason = (
|
||||
doc_status.get("reason", "Unknown error")
|
||||
if isinstance(doc_status, dict)
|
||||
else "Unknown error"
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=422,
|
||||
detail=f"Processing failed: {reason}. You can delete this document and re-upload it.",
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="This document has no editable content. It may not have been processed correctly. Try re-indexing or re-uploading it.",
|
||||
)
|
||||
|
||||
|
||||
@router.get("/search-spaces/{search_space_id}/documents/{document_id}/editor-content")
|
||||
async def get_editor_content(
|
||||
search_space_id: int,
|
||||
|
|
@ -82,9 +54,8 @@ async def get_editor_content(
|
|||
"""
|
||||
Get document content for editing.
|
||||
|
||||
Returns source_markdown (the canonical body) for the Plate.js editor, with a
|
||||
one-time migration from legacy blocknote_document. Never reconstructs the
|
||||
body from chunks.
|
||||
Returns source_markdown for the Plate.js editor.
|
||||
Falls back to blocknote_document → markdown conversion, then chunk reconstruction.
|
||||
|
||||
Requires DOCUMENTS_READ permission.
|
||||
"""
|
||||
|
|
@ -154,9 +125,52 @@ async def get_editor_content(
|
|||
await session.commit()
|
||||
return _build_response(empty_markdown)
|
||||
|
||||
# No canonical body. Chunks are an index artifact, never the source of
|
||||
# truth, so surface the processing state instead of rebuilding from them.
|
||||
_raise_no_canonical_body(document)
|
||||
chunk_contents_result = await session.execute(
|
||||
select(Chunk.content)
|
||||
.filter(Chunk.document_id == document_id)
|
||||
.order_by(Chunk.position, Chunk.id)
|
||||
)
|
||||
chunk_contents = chunk_contents_result.scalars().all()
|
||||
|
||||
if not chunk_contents:
|
||||
doc_status = document.status or {}
|
||||
state = (
|
||||
doc_status.get("state", "ready")
|
||||
if isinstance(doc_status, dict)
|
||||
else "ready"
|
||||
)
|
||||
if state in ("pending", "processing"):
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail="This document is still being processed. Please wait a moment and try again.",
|
||||
)
|
||||
if state == "failed":
|
||||
reason = (
|
||||
doc_status.get("reason", "Unknown error")
|
||||
if isinstance(doc_status, dict)
|
||||
else "Unknown error"
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=422,
|
||||
detail=f"Processing failed: {reason}. You can delete this document and re-upload it.",
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="This document has no content. It may not have been processed correctly. Try deleting and re-uploading it.",
|
||||
)
|
||||
|
||||
markdown_content = "\n\n".join(chunk_contents)
|
||||
|
||||
if not markdown_content.strip():
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="This document appears to be empty. Try re-uploading or editing it to add content.",
|
||||
)
|
||||
|
||||
document.source_markdown = markdown_content
|
||||
await session.commit()
|
||||
|
||||
return _build_response(markdown_content)
|
||||
|
||||
|
||||
@router.get(
|
||||
|
|
@ -170,9 +184,8 @@ async def download_document_markdown(
|
|||
):
|
||||
user = auth.user
|
||||
"""
|
||||
Download the canonical document body as a .md file.
|
||||
|
||||
Serves source_markdown, migrating legacy blocknote_document when present.
|
||||
Download the full document content as a .md file.
|
||||
Reconstructs markdown from source_markdown or chunks.
|
||||
"""
|
||||
await check_permission(
|
||||
session,
|
||||
|
|
@ -198,6 +211,15 @@ async def download_document_markdown(
|
|||
from app.utils.blocknote_to_markdown import blocknote_to_markdown
|
||||
|
||||
markdown = blocknote_to_markdown(document.blocknote_document)
|
||||
if markdown is None:
|
||||
chunk_contents_result = await session.execute(
|
||||
select(Chunk.content)
|
||||
.filter(Chunk.document_id == document_id)
|
||||
.order_by(Chunk.position, Chunk.id)
|
||||
)
|
||||
chunk_contents = chunk_contents_result.scalars().all()
|
||||
if chunk_contents:
|
||||
markdown = "\n\n".join(chunk_contents)
|
||||
|
||||
if not markdown or not markdown.strip():
|
||||
raise HTTPException(
|
||||
|
|
@ -340,6 +362,15 @@ async def export_document(
|
|||
from app.utils.blocknote_to_markdown import blocknote_to_markdown
|
||||
|
||||
markdown_content = blocknote_to_markdown(document.blocknote_document)
|
||||
if markdown_content is None:
|
||||
chunk_contents_result = await session.execute(
|
||||
select(Chunk.content)
|
||||
.filter(Chunk.document_id == document_id)
|
||||
.order_by(Chunk.position, Chunk.id)
|
||||
)
|
||||
chunk_contents = chunk_contents_result.scalars().all()
|
||||
if chunk_contents:
|
||||
markdown_content = "\n\n".join(chunk_contents)
|
||||
|
||||
if not markdown_content or not markdown_content.strip():
|
||||
raise HTTPException(status_code=400, detail="Document has no content to export")
|
||||
|
|
|
|||
|
|
@ -214,7 +214,7 @@ async def _execute_image_generation(
|
|||
)
|
||||
|
||||
# Store response
|
||||
image_gen.response_data = (
|
||||
response_dict = (
|
||||
response.model_dump() if hasattr(response, "model_dump") else dict(response)
|
||||
)
|
||||
if not image_gen.model and hasattr(response, "_hidden_params"):
|
||||
|
|
@ -222,6 +222,20 @@ async def _execute_image_generation(
|
|||
if isinstance(hidden, dict) and hidden.get("model"):
|
||||
image_gen.model = hidden["model"]
|
||||
|
||||
# Fix relative URLs in response data (for the serving endpoint)
|
||||
from urllib.parse import urlparse
|
||||
images = response_dict.get("data", [])
|
||||
provider_base_url = resolved_kwargs.get("api_base")
|
||||
for image in images:
|
||||
if image.get("url"):
|
||||
raw_url: str = image["url"]
|
||||
if raw_url.startswith("/") and provider_base_url:
|
||||
parsed = urlparse(provider_base_url)
|
||||
origin = f"{parsed.scheme}://{parsed.netloc}"
|
||||
image["url"] = f"{origin}{raw_url}"
|
||||
|
||||
image_gen.response_data = response_dict
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Image Generation Execution + Results CRUD
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue