code quality issues fixed

This commit is contained in:
Anish Sarkar 2025-11-23 16:39:23 +05:30
parent abbaa848f3
commit 3fac196c35
17 changed files with 495 additions and 493 deletions

View file

@ -4,6 +4,7 @@ Revision ID: 38
Revises: 37
"""
from collections.abc import Sequence
import sqlalchemy as sa
@ -12,30 +13,38 @@ from sqlalchemy.dialects import postgresql
from alembic import op
# revision identifiers, used by Alembic.
revision: str = '38'
down_revision: str | None = '37'
revision: str = "38"
down_revision: str | None = "37"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
"""Upgrade schema - Add BlockNote fields only."""
op.add_column(
'documents',
sa.Column('blocknote_document', postgresql.JSONB(astext_type=sa.Text()), nullable=True),
"documents",
sa.Column(
"blocknote_document", postgresql.JSONB(astext_type=sa.Text()), nullable=True
),
)
op.add_column(
'documents',
sa.Column('content_needs_reindexing', sa.Boolean(), nullable=False, server_default=sa.false()),
"documents",
sa.Column(
"content_needs_reindexing",
sa.Boolean(),
nullable=False,
server_default=sa.false(),
),
)
op.add_column(
'documents',
sa.Column('last_edited_at', sa.TIMESTAMP(timezone=True), nullable=True)
"documents",
sa.Column("last_edited_at", sa.TIMESTAMP(timezone=True), nullable=True),
)
def downgrade() -> None:
"""Downgrade schema - Remove BlockNote fields."""
op.drop_column('documents', 'last_edited_at')
op.drop_column('documents', 'content_needs_reindexing')
op.drop_column('documents', 'blocknote_document')
op.drop_column("documents", "last_edited_at")
op.drop_column("documents", "content_needs_reindexing")
op.drop_column("documents", "blocknote_document")

View file

@ -178,15 +178,15 @@ class Document(BaseModel, TimestampMixin):
content_hash = Column(String, nullable=False, index=True, unique=True)
unique_identifier_hash = Column(String, nullable=True, index=True, unique=True)
embedding = Column(Vector(config.embedding_model_instance.dimension))
# BlockNote live editing state (NULL when never edited)
blocknote_document = Column(JSONB, nullable=True)
# blocknote background reindex flag
content_needs_reindexing = Column(
Boolean, nullable=False, default=False, server_default=text("false")
)
# Track when blocknote document was last edited
last_edited_at = Column(TIMESTAMP(timezone=True), nullable=True)

View file

@ -1,6 +1,7 @@
"""
Editor routes for BlockNote document editing.
"""
from datetime import UTC, datetime
from typing import Any
@ -26,7 +27,7 @@ async def get_editor_content(
):
"""
Get document content for editing.
Returns BlockNote JSON document. If blocknote_document is NULL,
attempts to convert from `content` - though this won't work well
for old documents that only have summaries.
@ -37,24 +38,26 @@ async def get_editor_content(
.filter(Document.id == document_id, SearchSpace.user_id == user.id)
)
document = result.scalars().first()
if not document:
raise HTTPException(status_code=404, detail="Document not found")
# If blocknote_document exists, return it
if document.blocknote_document:
return {
"document_id": document.id,
"title": document.title,
"blocknote_document": document.blocknote_document,
"last_edited_at": document.last_edited_at.isoformat() if document.last_edited_at else None,
"last_edited_at": document.last_edited_at.isoformat()
if document.last_edited_at
else None,
}
# For old documents without blocknote_document, return error
# (Can't convert summary back to full document)
raise HTTPException(
status_code=400,
detail="This document was uploaded before editing was enabled. Please re-upload to enable editing."
detail="This document was uploaded before editing was enabled. Please re-upload to enable editing.",
)
@ -75,21 +78,21 @@ async def update_blocknote_content(
.filter(Document.id == document_id, SearchSpace.user_id == user.id)
)
document = result.scalars().first()
if not document:
raise HTTPException(status_code=404, detail="Document not found")
blocknote_document = data.get("blocknote_document")
if not blocknote_document:
raise HTTPException(status_code=400, detail="blocknote_document is required")
# Update only blocknote_document and last_edited_at
document.blocknote_document = blocknote_document
document.last_edited_at = datetime.now(UTC)
await session.commit()
await session.refresh(document)
return {"status": "saved", "last_edited_at": document.last_edited_at.isoformat()}
@ -110,52 +113,51 @@ async def update_blocknote_content(
# .filter(Document.id == document_id, SearchSpace.user_id == user.id)
# )
# document = result.scalars().first()
# if not document:
# raise HTTPException(status_code=404, detail="Document not found")
# if not document.blocknote_document:
# raise HTTPException(
# status_code=400,
# detail="Document has no BlockNote content to finalize"
# )
# # 1. Convert BlockNote JSON → Markdown
# full_markdown = await convert_blocknote_to_markdown(document.blocknote_document)
# if not full_markdown:
# raise HTTPException(
# status_code=500,
# detail="Failed to convert BlockNote document to markdown"
# )
# # 2. Generate new summary from full markdown
# from app.services.llm_service import get_user_long_context_llm
# from app.utils.document_converters import generate_document_summary
# user_llm = await get_user_long_context_llm(session, str(user.id), document.search_space_id)
# if not user_llm:
# raise HTTPException(
# status_code=500,
# detail="No LLM configured for summary generation"
# )
# document_metadata = document.document_metadata or {}
# summary_content, summary_embedding = await generate_document_summary(
# full_markdown, user_llm, document_metadata
# )
# # 3. Update document fields
# document.content = summary_content
# document.embedding = summary_embedding
# document.content_needs_reindexing = True # Trigger chunk regeneration
# document.last_edited_at = datetime.now(UTC)
# await session.commit()
# return {
# "status": "finalized",
# "message": "Document saved. Summary and chunks will be regenerated in the background.",
# "content_needs_reindexing": True,
# }

View file

@ -144,9 +144,9 @@ async def add_extension_received_document(
# Process chunks
chunks = await create_document_chunks(content.pageContent)
from app.utils.blocknote_converter import convert_markdown_to_blocknote
# Convert markdown to BlockNote JSON
blocknote_json = await convert_markdown_to_blocknote(combined_document_string)
if not blocknote_json:

View file

@ -99,14 +99,15 @@ async def add_received_file_document_using_unstructured(
# Process chunks
chunks = await create_document_chunks(file_in_markdown)
from app.utils.blocknote_converter import convert_markdown_to_blocknote
# Convert markdown to BlockNote JSON
blocknote_json = await convert_markdown_to_blocknote(file_in_markdown)
if not blocknote_json:
logging.warning(f"Failed to convert {file_name} to BlockNote JSON, document will not be editable")
logging.warning(
f"Failed to convert {file_name} to BlockNote JSON, document will not be editable"
)
# Update or create document
if existing_document:
@ -223,14 +224,15 @@ async def add_received_file_document_using_llamacloud(
# Process chunks
chunks = await create_document_chunks(file_in_markdown)
from app.utils.blocknote_converter import convert_markdown_to_blocknote
# Convert markdown to BlockNote JSON
blocknote_json = await convert_markdown_to_blocknote(file_in_markdown)
if not blocknote_json:
logging.warning(f"Failed to convert {file_name} to BlockNote JSON, document will not be editable")
logging.warning(
f"Failed to convert {file_name} to BlockNote JSON, document will not be editable"
)
# Update or create document
if existing_document:
@ -372,14 +374,15 @@ async def add_received_file_document_using_docling(
# Process chunks
chunks = await create_document_chunks(file_in_markdown)
from app.utils.blocknote_converter import convert_markdown_to_blocknote
# Convert markdown to BlockNote JSON
blocknote_json = await convert_markdown_to_blocknote(file_in_markdown)
if not blocknote_json:
logging.warning(f"Failed to convert {file_name} to BlockNote JSON, document will not be editable")
logging.warning(
f"Failed to convert {file_name} to BlockNote JSON, document will not be editable"
)
# Update or create document
if existing_document:

View file

@ -109,14 +109,15 @@ async def add_received_markdown_file_document(
# Process chunks
chunks = await create_document_chunks(file_in_markdown)
from app.utils.blocknote_converter import convert_markdown_to_blocknote
# Convert to BlockNote JSON
blocknote_json = await convert_markdown_to_blocknote(file_in_markdown)
if not blocknote_json:
logging.warning(f"Failed to convert {file_name} to BlockNote JSON, document will not be editable")
logging.warning(
f"Failed to convert {file_name} to BlockNote JSON, document will not be editable"
)
# Update or create document
if existing_document:

View file

@ -247,7 +247,7 @@ async def add_crawled_url_document(
f"Processing content chunks for URL: {url}",
{"stage": "chunk_processing"},
)
from app.utils.blocknote_converter import convert_markdown_to_blocknote
# Convert markdown to BlockNote JSON

View file

@ -290,7 +290,7 @@ async def add_youtube_video_document(
f"Processing content chunks for video: {video_data.get('title', 'YouTube Video')}",
{"stage": "chunk_processing"},
)
from app.utils.blocknote_converter import convert_markdown_to_blocknote
# Convert transcript to BlockNote JSON

View file

@ -11,17 +11,17 @@ logger = logging.getLogger(__name__)
async def convert_markdown_to_blocknote(markdown: str) -> dict[str, Any] | None:
"""
Convert markdown to BlockNote JSON via Next.js API.
Args:
markdown: Markdown string to convert
Returns:
BlockNote document as dict, or None if conversion fails
"""
if not markdown or not markdown.strip():
logger.warning("Empty markdown provided for conversion")
return None
if not markdown or len(markdown) < 10:
logger.warning("Markdown became too short after sanitization")
# Return a minimal BlockNote document
@ -32,13 +32,13 @@ async def convert_markdown_to_blocknote(markdown: str) -> dict[str, Any] | None:
{
"type": "text",
"text": "Document content could not be converted for editing.",
"styles": {}
"styles": {},
}
],
"children": []
"children": [],
}
]
async with httpx.AsyncClient() as client:
try:
response = await client.post(
@ -49,19 +49,23 @@ async def convert_markdown_to_blocknote(markdown: str) -> dict[str, Any] | None:
response.raise_for_status()
data = response.json()
blocknote_document = data.get("blocknote_document")
if blocknote_document:
logger.info(f"Successfully converted markdown to BlockNote (original: {len(markdown)} chars, sanitized: {len(markdown)} chars)")
logger.info(
f"Successfully converted markdown to BlockNote (original: {len(markdown)} chars, sanitized: {len(markdown)} chars)"
)
return blocknote_document
else:
logger.warning("Next.js API returned empty blocknote_document")
return None
except httpx.TimeoutException:
logger.error("Timeout converting markdown to BlockNote after 30s")
return None
except httpx.HTTPStatusError as e:
logger.error(f"HTTP error converting markdown to BlockNote: {e.response.status_code} - {e.response.text}")
logger.error(
f"HTTP error converting markdown to BlockNote: {e.response.status_code} - {e.response.text}"
)
# Log first 1000 chars of problematic markdown for debugging
logger.debug(f"Problematic markdown sample: {markdown[:1000]}")
return None
@ -69,20 +73,23 @@ async def convert_markdown_to_blocknote(markdown: str) -> dict[str, Any] | None:
logger.error(f"Failed to convert markdown to BlockNote: {e}", exc_info=True)
return None
async def convert_blocknote_to_markdown(blocknote_document: dict[str, Any] | list[dict[str, Any]]) -> str | None:
async def convert_blocknote_to_markdown(
blocknote_document: dict[str, Any] | list[dict[str, Any]],
) -> str | None:
"""
Convert BlockNote JSON to markdown via Next.js API.
Args:
blocknote_document: BlockNote document as dict or list of blocks
Returns:
Markdown string, or None if conversion fails
"""
if not blocknote_document:
logger.warning("Empty BlockNote document provided for conversion")
return None
async with httpx.AsyncClient() as client:
try:
response = await client.post(
@ -93,21 +100,24 @@ async def convert_blocknote_to_markdown(blocknote_document: dict[str, Any] | lis
response.raise_for_status()
data = response.json()
markdown = data.get("markdown")
if markdown:
logger.info(f"Successfully converted BlockNote to markdown ({len(markdown)} chars)")
logger.info(
f"Successfully converted BlockNote to markdown ({len(markdown)} chars)"
)
return markdown
else:
logger.warning("Next.js API returned empty markdown")
return None
except httpx.TimeoutException:
logger.error("Timeout converting BlockNote to markdown after 30s")
return None
except httpx.HTTPStatusError as e:
logger.error(f"HTTP error converting BlockNote to markdown: {e.response.status_code} - {e.response.text}")
logger.error(
f"HTTP error converting BlockNote to markdown: {e.response.status_code} - {e.response.text}"
)
return None
except Exception as e:
logger.error(f"Failed to convert BlockNote to markdown: {e}", exc_info=True)
return None