mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-20 21:18:13 +02:00
code quality issues fixed
This commit is contained in:
parent
abbaa848f3
commit
3fac196c35
17 changed files with 495 additions and 493 deletions
|
|
@ -4,6 +4,7 @@ Revision ID: 38
|
|||
Revises: 37
|
||||
|
||||
"""
|
||||
|
||||
from collections.abc import Sequence
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
|
@ -12,30 +13,38 @@ from sqlalchemy.dialects import postgresql
|
|||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = '38'
|
||||
down_revision: str | None = '37'
|
||||
revision: str = "38"
|
||||
down_revision: str | None = "37"
|
||||
branch_labels: str | Sequence[str] | None = None
|
||||
depends_on: str | Sequence[str] | None = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Upgrade schema - Add BlockNote fields only."""
|
||||
|
||||
|
||||
op.add_column(
|
||||
'documents',
|
||||
sa.Column('blocknote_document', postgresql.JSONB(astext_type=sa.Text()), nullable=True),
|
||||
"documents",
|
||||
sa.Column(
|
||||
"blocknote_document", postgresql.JSONB(astext_type=sa.Text()), nullable=True
|
||||
),
|
||||
)
|
||||
op.add_column(
|
||||
'documents',
|
||||
sa.Column('content_needs_reindexing', sa.Boolean(), nullable=False, server_default=sa.false()),
|
||||
"documents",
|
||||
sa.Column(
|
||||
"content_needs_reindexing",
|
||||
sa.Boolean(),
|
||||
nullable=False,
|
||||
server_default=sa.false(),
|
||||
),
|
||||
)
|
||||
op.add_column(
|
||||
'documents',
|
||||
sa.Column('last_edited_at', sa.TIMESTAMP(timezone=True), nullable=True)
|
||||
"documents",
|
||||
sa.Column("last_edited_at", sa.TIMESTAMP(timezone=True), nullable=True),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Downgrade schema - Remove BlockNote fields."""
|
||||
op.drop_column('documents', 'last_edited_at')
|
||||
op.drop_column('documents', 'content_needs_reindexing')
|
||||
op.drop_column('documents', 'blocknote_document')
|
||||
op.drop_column("documents", "last_edited_at")
|
||||
op.drop_column("documents", "content_needs_reindexing")
|
||||
op.drop_column("documents", "blocknote_document")
|
||||
|
|
|
|||
|
|
@ -178,15 +178,15 @@ class Document(BaseModel, TimestampMixin):
|
|||
content_hash = Column(String, nullable=False, index=True, unique=True)
|
||||
unique_identifier_hash = Column(String, nullable=True, index=True, unique=True)
|
||||
embedding = Column(Vector(config.embedding_model_instance.dimension))
|
||||
|
||||
|
||||
# BlockNote live editing state (NULL when never edited)
|
||||
blocknote_document = Column(JSONB, nullable=True)
|
||||
|
||||
|
||||
# blocknote background reindex flag
|
||||
content_needs_reindexing = Column(
|
||||
Boolean, nullable=False, default=False, server_default=text("false")
|
||||
)
|
||||
|
||||
|
||||
# Track when blocknote document was last edited
|
||||
last_edited_at = Column(TIMESTAMP(timezone=True), nullable=True)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
"""
|
||||
Editor routes for BlockNote document editing.
|
||||
"""
|
||||
|
||||
from datetime import UTC, datetime
|
||||
from typing import Any
|
||||
|
||||
|
|
@ -26,7 +27,7 @@ async def get_editor_content(
|
|||
):
|
||||
"""
|
||||
Get document content for editing.
|
||||
|
||||
|
||||
Returns BlockNote JSON document. If blocknote_document is NULL,
|
||||
attempts to convert from `content` - though this won't work well
|
||||
for old documents that only have summaries.
|
||||
|
|
@ -37,24 +38,26 @@ async def get_editor_content(
|
|||
.filter(Document.id == document_id, SearchSpace.user_id == user.id)
|
||||
)
|
||||
document = result.scalars().first()
|
||||
|
||||
|
||||
if not document:
|
||||
raise HTTPException(status_code=404, detail="Document not found")
|
||||
|
||||
|
||||
# If blocknote_document exists, return it
|
||||
if document.blocknote_document:
|
||||
return {
|
||||
"document_id": document.id,
|
||||
"title": document.title,
|
||||
"blocknote_document": document.blocknote_document,
|
||||
"last_edited_at": document.last_edited_at.isoformat() if document.last_edited_at else None,
|
||||
"last_edited_at": document.last_edited_at.isoformat()
|
||||
if document.last_edited_at
|
||||
else None,
|
||||
}
|
||||
|
||||
|
||||
# For old documents without blocknote_document, return error
|
||||
# (Can't convert summary back to full document)
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="This document was uploaded before editing was enabled. Please re-upload to enable editing."
|
||||
detail="This document was uploaded before editing was enabled. Please re-upload to enable editing.",
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -75,21 +78,21 @@ async def update_blocknote_content(
|
|||
.filter(Document.id == document_id, SearchSpace.user_id == user.id)
|
||||
)
|
||||
document = result.scalars().first()
|
||||
|
||||
|
||||
if not document:
|
||||
raise HTTPException(status_code=404, detail="Document not found")
|
||||
|
||||
|
||||
blocknote_document = data.get("blocknote_document")
|
||||
if not blocknote_document:
|
||||
raise HTTPException(status_code=400, detail="blocknote_document is required")
|
||||
|
||||
|
||||
# Update only blocknote_document and last_edited_at
|
||||
document.blocknote_document = blocknote_document
|
||||
document.last_edited_at = datetime.now(UTC)
|
||||
|
||||
|
||||
await session.commit()
|
||||
await session.refresh(document)
|
||||
|
||||
|
||||
return {"status": "saved", "last_edited_at": document.last_edited_at.isoformat()}
|
||||
|
||||
|
||||
|
|
@ -110,52 +113,51 @@ async def update_blocknote_content(
|
|||
# .filter(Document.id == document_id, SearchSpace.user_id == user.id)
|
||||
# )
|
||||
# document = result.scalars().first()
|
||||
|
||||
|
||||
# if not document:
|
||||
# raise HTTPException(status_code=404, detail="Document not found")
|
||||
|
||||
|
||||
# if not document.blocknote_document:
|
||||
# raise HTTPException(
|
||||
# status_code=400,
|
||||
# detail="Document has no BlockNote content to finalize"
|
||||
# )
|
||||
|
||||
|
||||
# # 1. Convert BlockNote JSON → Markdown
|
||||
# full_markdown = await convert_blocknote_to_markdown(document.blocknote_document)
|
||||
|
||||
|
||||
# if not full_markdown:
|
||||
# raise HTTPException(
|
||||
# status_code=500,
|
||||
# detail="Failed to convert BlockNote document to markdown"
|
||||
# )
|
||||
|
||||
|
||||
# # 2. Generate new summary from full markdown
|
||||
# from app.services.llm_service import get_user_long_context_llm
|
||||
# from app.utils.document_converters import generate_document_summary
|
||||
|
||||
|
||||
# user_llm = await get_user_long_context_llm(session, str(user.id), document.search_space_id)
|
||||
# if not user_llm:
|
||||
# raise HTTPException(
|
||||
# status_code=500,
|
||||
# detail="No LLM configured for summary generation"
|
||||
# )
|
||||
|
||||
|
||||
# document_metadata = document.document_metadata or {}
|
||||
# summary_content, summary_embedding = await generate_document_summary(
|
||||
# full_markdown, user_llm, document_metadata
|
||||
# )
|
||||
|
||||
|
||||
# # 3. Update document fields
|
||||
# document.content = summary_content
|
||||
# document.embedding = summary_embedding
|
||||
# document.content_needs_reindexing = True # Trigger chunk regeneration
|
||||
# document.last_edited_at = datetime.now(UTC)
|
||||
|
||||
|
||||
# await session.commit()
|
||||
|
||||
|
||||
# return {
|
||||
# "status": "finalized",
|
||||
# "message": "Document saved. Summary and chunks will be regenerated in the background.",
|
||||
# "content_needs_reindexing": True,
|
||||
# }
|
||||
|
||||
|
|
@ -144,9 +144,9 @@ async def add_extension_received_document(
|
|||
|
||||
# Process chunks
|
||||
chunks = await create_document_chunks(content.pageContent)
|
||||
|
||||
|
||||
from app.utils.blocknote_converter import convert_markdown_to_blocknote
|
||||
|
||||
|
||||
# Convert markdown to BlockNote JSON
|
||||
blocknote_json = await convert_markdown_to_blocknote(combined_document_string)
|
||||
if not blocknote_json:
|
||||
|
|
|
|||
|
|
@ -99,14 +99,15 @@ async def add_received_file_document_using_unstructured(
|
|||
|
||||
# Process chunks
|
||||
chunks = await create_document_chunks(file_in_markdown)
|
||||
|
||||
|
||||
from app.utils.blocknote_converter import convert_markdown_to_blocknote
|
||||
|
||||
|
||||
# Convert markdown to BlockNote JSON
|
||||
blocknote_json = await convert_markdown_to_blocknote(file_in_markdown)
|
||||
if not blocknote_json:
|
||||
logging.warning(f"Failed to convert {file_name} to BlockNote JSON, document will not be editable")
|
||||
|
||||
logging.warning(
|
||||
f"Failed to convert {file_name} to BlockNote JSON, document will not be editable"
|
||||
)
|
||||
|
||||
# Update or create document
|
||||
if existing_document:
|
||||
|
|
@ -223,14 +224,15 @@ async def add_received_file_document_using_llamacloud(
|
|||
|
||||
# Process chunks
|
||||
chunks = await create_document_chunks(file_in_markdown)
|
||||
|
||||
|
||||
from app.utils.blocknote_converter import convert_markdown_to_blocknote
|
||||
|
||||
# Convert markdown to BlockNote JSON
|
||||
blocknote_json = await convert_markdown_to_blocknote(file_in_markdown)
|
||||
if not blocknote_json:
|
||||
logging.warning(f"Failed to convert {file_name} to BlockNote JSON, document will not be editable")
|
||||
|
||||
logging.warning(
|
||||
f"Failed to convert {file_name} to BlockNote JSON, document will not be editable"
|
||||
)
|
||||
|
||||
# Update or create document
|
||||
if existing_document:
|
||||
|
|
@ -372,14 +374,15 @@ async def add_received_file_document_using_docling(
|
|||
|
||||
# Process chunks
|
||||
chunks = await create_document_chunks(file_in_markdown)
|
||||
|
||||
|
||||
from app.utils.blocknote_converter import convert_markdown_to_blocknote
|
||||
|
||||
|
||||
# Convert markdown to BlockNote JSON
|
||||
blocknote_json = await convert_markdown_to_blocknote(file_in_markdown)
|
||||
if not blocknote_json:
|
||||
logging.warning(f"Failed to convert {file_name} to BlockNote JSON, document will not be editable")
|
||||
|
||||
logging.warning(
|
||||
f"Failed to convert {file_name} to BlockNote JSON, document will not be editable"
|
||||
)
|
||||
|
||||
# Update or create document
|
||||
if existing_document:
|
||||
|
|
|
|||
|
|
@ -109,14 +109,15 @@ async def add_received_markdown_file_document(
|
|||
|
||||
# Process chunks
|
||||
chunks = await create_document_chunks(file_in_markdown)
|
||||
|
||||
|
||||
from app.utils.blocknote_converter import convert_markdown_to_blocknote
|
||||
|
||||
|
||||
# Convert to BlockNote JSON
|
||||
blocknote_json = await convert_markdown_to_blocknote(file_in_markdown)
|
||||
if not blocknote_json:
|
||||
logging.warning(f"Failed to convert {file_name} to BlockNote JSON, document will not be editable")
|
||||
|
||||
logging.warning(
|
||||
f"Failed to convert {file_name} to BlockNote JSON, document will not be editable"
|
||||
)
|
||||
|
||||
# Update or create document
|
||||
if existing_document:
|
||||
|
|
|
|||
|
|
@ -247,7 +247,7 @@ async def add_crawled_url_document(
|
|||
f"Processing content chunks for URL: {url}",
|
||||
{"stage": "chunk_processing"},
|
||||
)
|
||||
|
||||
|
||||
from app.utils.blocknote_converter import convert_markdown_to_blocknote
|
||||
|
||||
# Convert markdown to BlockNote JSON
|
||||
|
|
|
|||
|
|
@ -290,7 +290,7 @@ async def add_youtube_video_document(
|
|||
f"Processing content chunks for video: {video_data.get('title', 'YouTube Video')}",
|
||||
{"stage": "chunk_processing"},
|
||||
)
|
||||
|
||||
|
||||
from app.utils.blocknote_converter import convert_markdown_to_blocknote
|
||||
|
||||
# Convert transcript to BlockNote JSON
|
||||
|
|
|
|||
|
|
@ -11,17 +11,17 @@ logger = logging.getLogger(__name__)
|
|||
async def convert_markdown_to_blocknote(markdown: str) -> dict[str, Any] | None:
|
||||
"""
|
||||
Convert markdown to BlockNote JSON via Next.js API.
|
||||
|
||||
|
||||
Args:
|
||||
markdown: Markdown string to convert
|
||||
|
||||
|
||||
Returns:
|
||||
BlockNote document as dict, or None if conversion fails
|
||||
"""
|
||||
if not markdown or not markdown.strip():
|
||||
logger.warning("Empty markdown provided for conversion")
|
||||
return None
|
||||
|
||||
|
||||
if not markdown or len(markdown) < 10:
|
||||
logger.warning("Markdown became too short after sanitization")
|
||||
# Return a minimal BlockNote document
|
||||
|
|
@ -32,13 +32,13 @@ async def convert_markdown_to_blocknote(markdown: str) -> dict[str, Any] | None:
|
|||
{
|
||||
"type": "text",
|
||||
"text": "Document content could not be converted for editing.",
|
||||
"styles": {}
|
||||
"styles": {},
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
"children": [],
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
try:
|
||||
response = await client.post(
|
||||
|
|
@ -49,19 +49,23 @@ async def convert_markdown_to_blocknote(markdown: str) -> dict[str, Any] | None:
|
|||
response.raise_for_status()
|
||||
data = response.json()
|
||||
blocknote_document = data.get("blocknote_document")
|
||||
|
||||
|
||||
if blocknote_document:
|
||||
logger.info(f"Successfully converted markdown to BlockNote (original: {len(markdown)} chars, sanitized: {len(markdown)} chars)")
|
||||
logger.info(
|
||||
f"Successfully converted markdown to BlockNote (original: {len(markdown)} chars, sanitized: {len(markdown)} chars)"
|
||||
)
|
||||
return blocknote_document
|
||||
else:
|
||||
logger.warning("Next.js API returned empty blocknote_document")
|
||||
return None
|
||||
|
||||
|
||||
except httpx.TimeoutException:
|
||||
logger.error("Timeout converting markdown to BlockNote after 30s")
|
||||
return None
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error(f"HTTP error converting markdown to BlockNote: {e.response.status_code} - {e.response.text}")
|
||||
logger.error(
|
||||
f"HTTP error converting markdown to BlockNote: {e.response.status_code} - {e.response.text}"
|
||||
)
|
||||
# Log first 1000 chars of problematic markdown for debugging
|
||||
logger.debug(f"Problematic markdown sample: {markdown[:1000]}")
|
||||
return None
|
||||
|
|
@ -69,20 +73,23 @@ async def convert_markdown_to_blocknote(markdown: str) -> dict[str, Any] | None:
|
|||
logger.error(f"Failed to convert markdown to BlockNote: {e}", exc_info=True)
|
||||
return None
|
||||
|
||||
async def convert_blocknote_to_markdown(blocknote_document: dict[str, Any] | list[dict[str, Any]]) -> str | None:
|
||||
|
||||
async def convert_blocknote_to_markdown(
|
||||
blocknote_document: dict[str, Any] | list[dict[str, Any]],
|
||||
) -> str | None:
|
||||
"""
|
||||
Convert BlockNote JSON to markdown via Next.js API.
|
||||
|
||||
|
||||
Args:
|
||||
blocknote_document: BlockNote document as dict or list of blocks
|
||||
|
||||
|
||||
Returns:
|
||||
Markdown string, or None if conversion fails
|
||||
"""
|
||||
if not blocknote_document:
|
||||
logger.warning("Empty BlockNote document provided for conversion")
|
||||
return None
|
||||
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
try:
|
||||
response = await client.post(
|
||||
|
|
@ -93,21 +100,24 @@ async def convert_blocknote_to_markdown(blocknote_document: dict[str, Any] | lis
|
|||
response.raise_for_status()
|
||||
data = response.json()
|
||||
markdown = data.get("markdown")
|
||||
|
||||
|
||||
if markdown:
|
||||
logger.info(f"Successfully converted BlockNote to markdown ({len(markdown)} chars)")
|
||||
logger.info(
|
||||
f"Successfully converted BlockNote to markdown ({len(markdown)} chars)"
|
||||
)
|
||||
return markdown
|
||||
else:
|
||||
logger.warning("Next.js API returned empty markdown")
|
||||
return None
|
||||
|
||||
|
||||
except httpx.TimeoutException:
|
||||
logger.error("Timeout converting BlockNote to markdown after 30s")
|
||||
return None
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error(f"HTTP error converting BlockNote to markdown: {e.response.status_code} - {e.response.text}")
|
||||
logger.error(
|
||||
f"HTTP error converting BlockNote to markdown: {e.response.status_code} - {e.response.text}"
|
||||
)
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to convert BlockNote to markdown: {e}", exc_info=True)
|
||||
return None
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue