diff --git a/surfsense_backend/alembic/versions/101_add_source_markdown_to_documents.py b/surfsense_backend/alembic/versions/101_add_source_markdown_to_documents.py new file mode 100644 index 000000000..a5b0728b4 --- /dev/null +++ b/surfsense_backend/alembic/versions/101_add_source_markdown_to_documents.py @@ -0,0 +1,153 @@ +"""101_add_source_markdown_to_documents + +Revision ID: 101 +Revises: 100 +Create Date: 2026-02-17 + +Adds source_markdown column and populates it for existing documents +using a pure-Python BlockNote JSON → Markdown converter. No external +dependencies (no Node.js, no Celery, no HTTP calls). + +Fallback chain per document: + 1. blocknote_document exists → convert to markdown with Python converter + 2. blocknote_document missing/fails → reconstruct from chunks + 3. Neither exists → skip (log warning) +""" + +from __future__ import annotations + +import json +import logging +from collections.abc import Sequence + +import sqlalchemy as sa + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "101" +down_revision: str | None = "100" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + +logger = logging.getLogger("alembic.migration.101") + + +def upgrade() -> None: + """Add source_markdown column and populate it for existing documents.""" + + conn = op.get_bind() + existing_columns = [ + col["name"] for col in sa.inspect(conn).get_columns("documents") + ] + + # 1. Add the column + if "source_markdown" not in existing_columns: + op.add_column( + "documents", + sa.Column("source_markdown", sa.Text(), nullable=True), + ) + + # 2. Populate source_markdown for existing documents (inline, synchronous) + _populate_source_markdown(conn) + + +def _populate_source_markdown(conn) -> None: + """Populate source_markdown for all documents where it is NULL. + + Fallback chain: + 1. blocknote_document → pure-Python converter → source_markdown + 2. chunks (ordered by id) → joined text → source_markdown + 3. Neither → skip with warning + """ + # Import the pure-Python converter (no external deps) + from app.utils.blocknote_to_markdown import blocknote_to_markdown + + # Find documents that need migration + result = conn.execute( + sa.text(""" + SELECT id, title, blocknote_document + FROM documents + WHERE source_markdown IS NULL + """) + ) + rows = result.fetchall() + + total = len(rows) + if total == 0: + print("✓ No documents need source_markdown migration") + return + + print(f" Migrating {total} documents to source_markdown...") + + migrated = 0 + from_blocknote = 0 + from_chunks = 0 + skipped = 0 + + for row in rows: + doc_id = row[0] + doc_title = row[1] + blocknote_doc = row[2] + + markdown = None + + # --- Fallback 1: Convert blocknote_document with pure Python --- + if blocknote_doc: + try: + # blocknote_doc may be a JSON string or already parsed + if isinstance(blocknote_doc, str): + blocknote_doc = json.loads(blocknote_doc) + markdown = blocknote_to_markdown(blocknote_doc) + if markdown: + from_blocknote += 1 + except Exception as e: + logger.warning( + f" Doc {doc_id} ({doc_title}): blocknote conversion failed ({e}), " + f"falling back to chunks" + ) + + # --- Fallback 2: Reconstruct from chunks --- + if not markdown: + chunk_result = conn.execute( + sa.text(""" + SELECT content FROM chunks + WHERE document_id = :doc_id + ORDER BY id + """), + {"doc_id": doc_id}, + ) + chunk_rows = chunk_result.fetchall() + if chunk_rows: + chunk_texts = [r[0] for r in chunk_rows if r[0]] + if chunk_texts: + markdown = "\n\n".join(chunk_texts) + from_chunks += 1 + + # --- Fallback 3: Nothing to migrate from --- + if not markdown or not markdown.strip(): + logger.warning( + f" Doc {doc_id} ({doc_title}): no blocknote_document or chunks — skipped" + ) + skipped += 1 + continue + + # Write source_markdown + conn.execute( + sa.text(""" + UPDATE documents SET source_markdown = :md WHERE id = :doc_id + """), + {"md": markdown, "doc_id": doc_id}, + ) + migrated += 1 + + print( + f"✓ source_markdown migration complete: {migrated} migrated " + f"({from_blocknote} from blocknote, {from_chunks} from chunks), " + f"{skipped} skipped out of {total} total" + ) + + +def downgrade() -> None: + """Remove source_markdown column.""" + op.drop_column("documents", "source_markdown") diff --git a/surfsense_backend/app/celery_app.py b/surfsense_backend/app/celery_app.py index a6637b1bd..62414775a 100644 --- a/surfsense_backend/app/celery_app.py +++ b/surfsense_backend/app/celery_app.py @@ -79,7 +79,6 @@ celery_app = Celery( "app.tasks.celery_tasks.podcast_tasks", "app.tasks.celery_tasks.connector_tasks", "app.tasks.celery_tasks.schedule_checker_task", - "app.tasks.celery_tasks.blocknote_migration_tasks", "app.tasks.celery_tasks.document_reindex_tasks", "app.tasks.celery_tasks.stale_notification_cleanup_task", ], diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index e232f0e14..1c9181ed2 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -894,9 +894,15 @@ class Document(BaseModel, TimestampMixin): embedding = Column(Vector(config.embedding_model_instance.dimension)) # BlockNote live editing state (NULL when never edited) + # DEPRECATED: Will be removed in a future migration. Use source_markdown instead. blocknote_document = Column(JSONB, nullable=True) - # blocknote background reindex flag + # Full raw markdown content for the Plate.js editor. + # This is the source of truth for document content in the editor. + # Populated from markdown at ingestion time, or from blocknote_document migration. + source_markdown = Column(Text, nullable=True) + + # Background reindex flag (set when editor content is saved) content_needs_reindexing = Column( Boolean, nullable=False, default=False, server_default=text("false") ) diff --git a/surfsense_backend/app/routes/editor_routes.py b/surfsense_backend/app/routes/editor_routes.py index a0e7b59c1..26a5ca12e 100644 --- a/surfsense_backend/app/routes/editor_routes.py +++ b/surfsense_backend/app/routes/editor_routes.py @@ -1,5 +1,5 @@ """ -Editor routes for BlockNote document editing. +Editor routes for document editing with markdown (Plate.js frontend). """ from datetime import UTC, datetime @@ -27,8 +27,8 @@ async def get_editor_content( """ Get document content for editing. - Returns BlockNote JSON document. If blocknote_document is NULL, - attempts to generate it from chunks (lazy migration). + Returns source_markdown for the Plate.js editor. + Falls back to blocknote_document → markdown conversion, then chunk reconstruction. Requires DOCUMENTS_READ permission. """ @@ -54,54 +54,61 @@ async def get_editor_content( if not document: raise HTTPException(status_code=404, detail="Document not found") - # If blocknote_document exists, return it + # Priority 1: Return source_markdown if it exists (check `is not None` to allow empty strings) + if document.source_markdown is not None: + return { + "document_id": document.id, + "title": document.title, + "document_type": document.document_type.value, + "source_markdown": document.source_markdown, + "updated_at": document.updated_at.isoformat() + if document.updated_at + else None, + } + + # Priority 2: Lazy-migrate from blocknote_document (pure Python, no external deps) if document.blocknote_document: - return { - "document_id": document.id, - "title": document.title, - "document_type": document.document_type.value, - "blocknote_document": document.blocknote_document, - "updated_at": document.updated_at.isoformat() - if document.updated_at - else None, - } + from app.utils.blocknote_to_markdown import blocknote_to_markdown - # For NOTE type documents, return empty BlockNote structure if no content exists - if document.document_type == DocumentType.NOTE: - # Return empty BlockNote structure - empty_blocknote = [ - { - "type": "paragraph", - "content": [], - "children": [], - } - ] - # Save empty structure if not already saved - if not document.blocknote_document: - document.blocknote_document = empty_blocknote + markdown = blocknote_to_markdown(document.blocknote_document) + if markdown: + # Persist the migration so we don't repeat it + document.source_markdown = markdown await session.commit() + return { + "document_id": document.id, + "title": document.title, + "document_type": document.document_type.value, + "source_markdown": markdown, + "updated_at": document.updated_at.isoformat() + if document.updated_at + else None, + } + + # Priority 3: For NOTE type with no content, return empty markdown + if document.document_type == DocumentType.NOTE: + empty_markdown = "" + document.source_markdown = empty_markdown + await session.commit() return { "document_id": document.id, "title": document.title, "document_type": document.document_type.value, - "blocknote_document": empty_blocknote, + "source_markdown": empty_markdown, "updated_at": document.updated_at.isoformat() if document.updated_at else None, } - # Lazy migration: Try to generate blocknote_document from chunks (for other document types) - from app.utils.blocknote_converter import convert_markdown_to_blocknote - + # Priority 4: Reconstruct from chunks chunks = sorted(document.chunks, key=lambda c: c.id) if not chunks: raise HTTPException( status_code=400, - detail="This document has no chunks and cannot be edited. Please re-upload to enable editing.", + detail="This document has no content and cannot be edited. Please re-upload to enable editing.", ) - # Reconstruct markdown from chunks markdown_content = "\n\n".join(chunk.content for chunk in chunks) if not markdown_content.strip(): @@ -110,26 +117,18 @@ async def get_editor_content( detail="This document has empty content and cannot be edited.", ) - # Convert to BlockNote - blocknote_json = await convert_markdown_to_blocknote(markdown_content) - - if not blocknote_json: - raise HTTPException( - status_code=500, - detail="Failed to convert document to editable format. Please try again later.", - ) - - # Save the generated blocknote_document (lazy migration) - document.blocknote_document = blocknote_json - document.content_needs_reindexing = False + # Persist the lazy migration + document.source_markdown = markdown_content await session.commit() return { "document_id": document.id, "title": document.title, "document_type": document.document_type.value, - "blocknote_document": blocknote_json, - "updated_at": document.updated_at.isoformat() if document.updated_at else None, + "source_markdown": markdown_content, + "updated_at": document.updated_at.isoformat() + if document.updated_at + else None, } @@ -142,9 +141,11 @@ async def save_document( user: User = Depends(current_active_user), ): """ - Save BlockNote document and trigger reindexing. + Save document markdown and trigger reindexing. Called when user clicks 'Save & Exit'. + Accepts { "source_markdown": "...", "title": "..." (optional) }. + Requires DOCUMENTS_UPDATE permission. """ from app.tasks.celery_tasks.document_reindex_tasks import reindex_document_task @@ -169,49 +170,40 @@ async def save_document( if not document: raise HTTPException(status_code=404, detail="Document not found") - blocknote_document = data.get("blocknote_document") - if not blocknote_document: - raise HTTPException(status_code=400, detail="blocknote_document is required") + source_markdown = data.get("source_markdown") + if source_markdown is None: + raise HTTPException( + status_code=400, detail="source_markdown is required" + ) - # Add type validation - if not isinstance(blocknote_document, list): - raise HTTPException(status_code=400, detail="blocknote_document must be a list") + if not isinstance(source_markdown, str): + raise HTTPException( + status_code=400, detail="source_markdown must be a string" + ) - # For NOTE type documents, extract title from first block (heading) - if ( - document.document_type == DocumentType.NOTE - and blocknote_document - and len(blocknote_document) > 0 - ): - first_block = blocknote_document[0] - if ( - first_block - and first_block.get("content") - and isinstance(first_block["content"], list) - ): - # Extract text from first block content - # Match the frontend extractTitleFromBlockNote logic exactly - title_parts = [] - for item in first_block["content"]: - if isinstance(item, str): - title_parts.append(item) - elif ( - isinstance(item, dict) - and "text" in item - and isinstance(item["text"], str) - ): - # BlockNote structure: {"type": "text", "text": "...", "styles": {}} - title_parts.append(item["text"]) + # For NOTE type, extract title from first heading line if present + if document.document_type == DocumentType.NOTE: + # If the frontend sends a title, use it; otherwise extract from markdown + new_title = data.get("title") + if not new_title: + # Extract title from the first line of markdown (# Heading) + for line in source_markdown.split("\n"): + stripped = line.strip() + if stripped.startswith("# "): + new_title = stripped[2:].strip() + break + elif stripped: + # First non-empty non-heading line + new_title = stripped[:100] + break - new_title = "".join(title_parts).strip() - if new_title: - document.title = new_title - else: - # Only set to "Untitled" if content exists but is empty - document.title = "Untitled" + if new_title: + document.title = new_title.strip() + else: + document.title = "Untitled" - # Save BlockNote document - document.blocknote_document = blocknote_document + # Save source_markdown + document.source_markdown = source_markdown document.updated_at = datetime.now(UTC) document.content_needs_reindexing = True diff --git a/surfsense_backend/app/routes/notes_routes.py b/surfsense_backend/app/routes/notes_routes.py index 47cf96d04..76518de08 100644 --- a/surfsense_backend/app/routes/notes_routes.py +++ b/surfsense_backend/app/routes/notes_routes.py @@ -1,9 +1,8 @@ """ -Notes routes for creating and managing BlockNote documents. +Notes routes for creating and managing note documents. """ from datetime import UTC, datetime -from typing import Any from fastapi import APIRouter, Depends, HTTPException from pydantic import BaseModel @@ -20,7 +19,7 @@ router = APIRouter() class CreateNoteRequest(BaseModel): title: str - blocknote_document: list[dict[str, Any]] | None = None + source_markdown: str | None = None @router.post("/search-spaces/{search_space_id}/notes", response_model=DocumentRead) @@ -31,7 +30,7 @@ async def create_note( user: User = Depends(current_active_user), ): """ - Create a new note (BlockNote document). + Create a new note document. Requires DOCUMENTS_CREATE permission. """ @@ -47,16 +46,8 @@ async def create_note( if not request.title or not request.title.strip(): raise HTTPException(status_code=400, detail="Title is required") - # Default empty BlockNote structure if not provided - blocknote_document = request.blocknote_document - if blocknote_document is None: - blocknote_document = [ - { - "type": "paragraph", - "content": [], - "children": [], - } - ] + # Default empty markdown if not provided + source_markdown = request.source_markdown if request.source_markdown else "" # Generate content hash (use title for now, will be updated on save) import hashlib @@ -64,14 +55,13 @@ async def create_note( content_hash = hashlib.sha256(request.title.encode()).hexdigest() # Create document with NOTE type - document = Document( search_space_id=search_space_id, title=request.title.strip(), document_type=DocumentType.NOTE, content="", # Empty initially, will be populated on first save/reindex content_hash=content_hash, - blocknote_document=blocknote_document, + source_markdown=source_markdown, content_needs_reindexing=False, # Will be set to True on first save document_metadata={"NOTE": True}, embedding=None, # Will be generated on first reindex diff --git a/surfsense_backend/app/tasks/celery_tasks/blocknote_migration_tasks.py b/surfsense_backend/app/tasks/celery_tasks/blocknote_migration_tasks.py deleted file mode 100644 index c945bcb04..000000000 --- a/surfsense_backend/app/tasks/celery_tasks/blocknote_migration_tasks.py +++ /dev/null @@ -1,168 +0,0 @@ -"""Celery tasks for populating blocknote_document for existing documents.""" - -import logging - -from sqlalchemy import select -from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine -from sqlalchemy.orm import selectinload -from sqlalchemy.pool import NullPool - -from app.celery_app import celery_app -from app.config import config -from app.db import Document -from app.utils.blocknote_converter import convert_markdown_to_blocknote - -logger = logging.getLogger(__name__) - - -def get_celery_session_maker(): - """ - Create a new async session maker for Celery tasks. - This is necessary because Celery tasks run in a new event loop, - and the default session maker is bound to the main app's event loop. - """ - engine = create_async_engine( - config.DATABASE_URL, - poolclass=NullPool, - echo=False, - ) - return async_sessionmaker(engine, expire_on_commit=False) - - -@celery_app.task(name="populate_blocknote_for_documents", bind=True) -def populate_blocknote_for_documents_task( - self, document_ids: list[int] | None = None, batch_size: int = 50 -): - """ - Celery task to populate blocknote_document for existing documents. - - Args: - document_ids: Optional list of specific document IDs to process. - If None, processes all documents with blocknote_document IS NULL. - batch_size: Number of documents to process in each batch (default: 50) - """ - import asyncio - - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - - try: - loop.run_until_complete( - _populate_blocknote_for_documents(document_ids, batch_size) - ) - finally: - loop.close() - - -async def _populate_blocknote_for_documents( - document_ids: list[int] | None = None, batch_size: int = 50 -): - """ - Async function to populate blocknote_document for documents. - - Args: - document_ids: Optional list of specific document IDs to process - batch_size: Number of documents to process per batch - """ - async with get_celery_session_maker()() as session: - try: - # Build query for documents that need blocknote_document populated - query = select(Document).where(Document.blocknote_document.is_(None)) - - # If specific document IDs provided, filter by them - if document_ids: - query = query.where(Document.id.in_(document_ids)) - - # Load chunks relationship to avoid N+1 queries - query = query.options(selectinload(Document.chunks)) - - # Execute query - result = await session.execute(query) - documents = result.scalars().all() - - total_documents = len(documents) - logger.info(f"Found {total_documents} documents to process") - - if total_documents == 0: - logger.info("No documents to process") - return - - # Process documents in batches - processed = 0 - failed = 0 - - for i in range(0, total_documents, batch_size): - batch = documents[i : i + batch_size] - logger.info( - f"Processing batch {i // batch_size + 1}: documents {i + 1}-{min(i + batch_size, total_documents)}" - ) - - for document in batch: - try: - # Use preloaded chunks from selectinload - no need to query again - chunks = sorted(document.chunks, key=lambda c: c.id) - - if not chunks: - logger.warning( - f"Document {document.id} ({document.title}) has no chunks, skipping" - ) - failed += 1 - continue - - # Reconstruct markdown by concatenating chunk contents - markdown_content = "\n\n".join( - chunk.content for chunk in chunks - ) - - if not markdown_content or not markdown_content.strip(): - logger.warning( - f"Document {document.id} ({document.title}) has empty markdown content, skipping" - ) - failed += 1 - continue - - # Convert markdown to BlockNote JSON - blocknote_json = await convert_markdown_to_blocknote( - markdown_content - ) - - if not blocknote_json: - logger.warning( - f"Failed to convert markdown to BlockNote for document {document.id} ({document.title})" - ) - failed += 1 - continue - - # Update document with blocknote_document (other fields already have correct defaults) - document.blocknote_document = blocknote_json - - processed += 1 - - # Commit every batch_size documents to avoid long transactions - if processed % batch_size == 0: - await session.commit() - logger.info( - f"Committed batch: {processed} documents processed so far" - ) - - except Exception as e: - logger.error( - f"Error processing document {document.id} ({document.title}): {e}", - exc_info=True, - ) - failed += 1 - # Continue with next document instead of failing entire batch - continue - - # Commit remaining changes in the batch - await session.commit() - logger.info(f"Completed batch {i // batch_size + 1}") - - logger.info( - f"Migration complete: {processed} documents processed, {failed} failed" - ) - - except Exception as e: - await session.rollback() - logger.error(f"Error in blocknote migration task: {e}", exc_info=True) - raise diff --git a/surfsense_backend/app/tasks/celery_tasks/document_reindex_tasks.py b/surfsense_backend/app/tasks/celery_tasks/document_reindex_tasks.py index b9d4c3b95..a2a0d635d 100644 --- a/surfsense_backend/app/tasks/celery_tasks/document_reindex_tasks.py +++ b/surfsense_backend/app/tasks/celery_tasks/document_reindex_tasks.py @@ -13,7 +13,6 @@ from app.config import config from app.db import Document from app.services.llm_service import get_user_long_context_llm from app.services.task_logging_service import TaskLoggingService -from app.utils.blocknote_converter import convert_blocknote_to_markdown from app.utils.document_converters import ( create_document_chunks, generate_document_summary, @@ -84,48 +83,37 @@ async def _reindex_document(document_id: int, user_id: str): ) try: - if not document.blocknote_document: + # Read markdown directly from source_markdown + markdown_content = document.source_markdown + + if not markdown_content: await task_logger.log_task_failure( log_entry, - f"Document {document_id} has no BlockNote content to reindex", - "No BlockNote content", - {"error_type": "NoBlockNoteContent"}, + f"Document {document_id} has no source_markdown to reindex", + "No source_markdown content", + {"error_type": "NoSourceMarkdown"}, ) return logger.info(f"Reindexing document {document_id} ({document.title})") - # 1. Convert BlockNote → Markdown - markdown_content = await convert_blocknote_to_markdown( - document.blocknote_document - ) - - if not markdown_content: - await task_logger.log_task_failure( - log_entry, - f"Failed to convert document {document_id} to markdown", - "Markdown conversion failed", - {"error_type": "ConversionError"}, - ) - return - - # 2. Delete old chunks explicitly + # 1. Delete old chunks explicitly from app.db import Chunk await session.execute(delete(Chunk).where(Chunk.document_id == document_id)) await session.flush() # Ensure old chunks are deleted - # 3. Create new chunks + # 2. Create new chunks from source_markdown new_chunks = await create_document_chunks(markdown_content) - # 4. Add new chunks to session + # 3. Add new chunks to session for chunk in new_chunks: chunk.document_id = document_id session.add(chunk) logger.info(f"Created {len(new_chunks)} chunks for document {document_id}") - # 5. Regenerate summary + # 4. Regenerate summary user_llm = await get_user_long_context_llm( session, user_id, document.search_space_id ) @@ -139,7 +127,7 @@ async def _reindex_document(document_id: int, user_id: str): markdown_content, user_llm, document_metadata ) - # 6. Update document + # 5. Update document document.content = summary_content document.embedding = summary_embedding document.content_needs_reindexing = False diff --git a/surfsense_backend/app/tasks/document_processors/circleback_processor.py b/surfsense_backend/app/tasks/document_processors/circleback_processor.py index a513bcaf0..a86b64499 100644 --- a/surfsense_backend/app/tasks/document_processors/circleback_processor.py +++ b/surfsense_backend/app/tasks/document_processors/circleback_processor.py @@ -208,14 +208,7 @@ async def add_circleback_meeting_document( # Process chunks chunks = await create_document_chunks(markdown_content) - # Convert to BlockNote JSON for editing capability - from app.utils.blocknote_converter import convert_markdown_to_blocknote - - blocknote_json = await convert_markdown_to_blocknote(markdown_content) - if not blocknote_json: - logger.warning( - f"Failed to convert Circleback meeting {meeting_id} to BlockNote JSON, document will not be editable" - ) + # No BlockNote conversion needed — store raw markdown for Plate.js editor # Prepare final document metadata document_metadata = { @@ -235,7 +228,7 @@ async def add_circleback_meeting_document( document.embedding = summary_embedding document.document_metadata = document_metadata safe_set_chunks(document, chunks) - document.blocknote_document = blocknote_json + document.source_markdown = markdown_content document.content_needs_reindexing = False document.updated_at = get_current_timestamp() document.status = DocumentStatus.ready() diff --git a/surfsense_backend/app/tasks/document_processors/extension_processor.py b/surfsense_backend/app/tasks/document_processors/extension_processor.py index 9ddab4ec6..a6e482e15 100644 --- a/surfsense_backend/app/tasks/document_processors/extension_processor.py +++ b/surfsense_backend/app/tasks/document_processors/extension_processor.py @@ -146,16 +146,6 @@ async def add_extension_received_document( # Process chunks chunks = await create_document_chunks(content.pageContent) - from app.utils.blocknote_converter import convert_markdown_to_blocknote - - # Convert markdown to BlockNote JSON - blocknote_json = await convert_markdown_to_blocknote(combined_document_string) - if not blocknote_json: - logging.warning( - f"Failed to convert extension document '{content.metadata.VisitedWebPageTitle}' " - f"to BlockNote JSON, document will not be editable" - ) - # Update or create document if existing_document: # Update existing document @@ -165,7 +155,7 @@ async def add_extension_received_document( existing_document.embedding = summary_embedding existing_document.document_metadata = content.metadata.model_dump() existing_document.chunks = chunks - existing_document.blocknote_document = blocknote_json + existing_document.source_markdown = combined_document_string existing_document.updated_at = get_current_timestamp() await session.commit() @@ -183,7 +173,7 @@ async def add_extension_received_document( chunks=chunks, content_hash=content_hash, unique_identifier_hash=unique_identifier_hash, - blocknote_document=blocknote_json, + source_markdown=combined_document_string, updated_at=get_current_timestamp(), created_by_id=user_id, ) diff --git a/surfsense_backend/app/tasks/document_processors/file_processors.py b/surfsense_backend/app/tasks/document_processors/file_processors.py index 3fa57e998..041053a04 100644 --- a/surfsense_backend/app/tasks/document_processors/file_processors.py +++ b/surfsense_backend/app/tasks/document_processors/file_processors.py @@ -476,15 +476,6 @@ async def add_received_file_document_using_unstructured( # Process chunks chunks = await create_document_chunks(file_in_markdown) - from app.utils.blocknote_converter import convert_markdown_to_blocknote - - # Convert markdown to BlockNote JSON - blocknote_json = await convert_markdown_to_blocknote(file_in_markdown) - if not blocknote_json: - logging.warning( - f"Failed to convert {file_name} to BlockNote JSON, document will not be editable" - ) - # Update or create document if existing_document: # Update existing document @@ -497,7 +488,7 @@ async def add_received_file_document_using_unstructured( "ETL_SERVICE": "UNSTRUCTURED", } existing_document.chunks = chunks - existing_document.blocknote_document = blocknote_json + existing_document.source_markdown = file_in_markdown existing_document.content_needs_reindexing = False existing_document.updated_at = get_current_timestamp() existing_document.status = DocumentStatus.ready() # Mark as ready @@ -525,7 +516,7 @@ async def add_received_file_document_using_unstructured( chunks=chunks, content_hash=content_hash, unique_identifier_hash=primary_hash, - blocknote_document=blocknote_json, + source_markdown=file_in_markdown, content_needs_reindexing=False, updated_at=get_current_timestamp(), created_by_id=user_id, @@ -619,15 +610,6 @@ async def add_received_file_document_using_llamacloud( # Process chunks chunks = await create_document_chunks(file_in_markdown) - from app.utils.blocknote_converter import convert_markdown_to_blocknote - - # Convert markdown to BlockNote JSON - blocknote_json = await convert_markdown_to_blocknote(file_in_markdown) - if not blocknote_json: - logging.warning( - f"Failed to convert {file_name} to BlockNote JSON, document will not be editable" - ) - # Update or create document if existing_document: # Update existing document @@ -640,7 +622,7 @@ async def add_received_file_document_using_llamacloud( "ETL_SERVICE": "LLAMACLOUD", } existing_document.chunks = chunks - existing_document.blocknote_document = blocknote_json + existing_document.source_markdown = file_in_markdown existing_document.content_needs_reindexing = False existing_document.updated_at = get_current_timestamp() existing_document.status = DocumentStatus.ready() # Mark as ready @@ -668,7 +650,7 @@ async def add_received_file_document_using_llamacloud( chunks=chunks, content_hash=content_hash, unique_identifier_hash=primary_hash, - blocknote_document=blocknote_json, + source_markdown=file_in_markdown, content_needs_reindexing=False, updated_at=get_current_timestamp(), created_by_id=user_id, @@ -787,15 +769,6 @@ async def add_received_file_document_using_docling( # Process chunks chunks = await create_document_chunks(file_in_markdown) - from app.utils.blocknote_converter import convert_markdown_to_blocknote - - # Convert markdown to BlockNote JSON - blocknote_json = await convert_markdown_to_blocknote(file_in_markdown) - if not blocknote_json: - logging.warning( - f"Failed to convert {file_name} to BlockNote JSON, document will not be editable" - ) - # Update or create document if existing_document: # Update existing document @@ -808,7 +781,7 @@ async def add_received_file_document_using_docling( "ETL_SERVICE": "DOCLING", } existing_document.chunks = chunks - existing_document.blocknote_document = blocknote_json + existing_document.source_markdown = file_in_markdown existing_document.content_needs_reindexing = False existing_document.updated_at = get_current_timestamp() existing_document.status = DocumentStatus.ready() # Mark as ready @@ -836,7 +809,7 @@ async def add_received_file_document_using_docling( chunks=chunks, content_hash=content_hash, unique_identifier_hash=primary_hash, - blocknote_document=blocknote_json, + source_markdown=file_in_markdown, content_needs_reindexing=False, updated_at=get_current_timestamp(), created_by_id=user_id, @@ -1658,7 +1631,6 @@ async def process_file_in_background_with_document( from app.config import config as app_config from app.services.llm_service import get_user_long_context_llm - from app.utils.blocknote_converter import convert_markdown_to_blocknote try: markdown_content = None @@ -1917,9 +1889,6 @@ async def process_file_in_background_with_document( chunks = await create_document_chunks(markdown_content) - # Convert to BlockNote for editing - blocknote_json = await convert_markdown_to_blocknote(markdown_content) - # ===== STEP 4: Update document to READY ===== from sqlalchemy.orm.attributes import flag_modified @@ -1937,7 +1906,7 @@ async def process_file_in_background_with_document( # Use safe_set_chunks to avoid async issues safe_set_chunks(document, chunks) - document.blocknote_document = blocknote_json + document.source_markdown = markdown_content document.content_needs_reindexing = False document.updated_at = get_current_timestamp() document.status = DocumentStatus.ready() # Shows checkmark in UI diff --git a/surfsense_backend/app/tasks/document_processors/markdown_processor.py b/surfsense_backend/app/tasks/document_processors/markdown_processor.py index 8ecbb1370..a8d20c062 100644 --- a/surfsense_backend/app/tasks/document_processors/markdown_processor.py +++ b/surfsense_backend/app/tasks/document_processors/markdown_processor.py @@ -248,15 +248,6 @@ async def add_received_markdown_file_document( # Process chunks chunks = await create_document_chunks(file_in_markdown) - from app.utils.blocknote_converter import convert_markdown_to_blocknote - - # Convert to BlockNote JSON - blocknote_json = await convert_markdown_to_blocknote(file_in_markdown) - if not blocknote_json: - logging.warning( - f"Failed to convert {file_name} to BlockNote JSON, document will not be editable" - ) - # Update or create document if existing_document: # Update existing document @@ -268,7 +259,7 @@ async def add_received_markdown_file_document( "FILE_NAME": file_name, } existing_document.chunks = chunks - existing_document.blocknote_document = blocknote_json + existing_document.source_markdown = file_in_markdown existing_document.updated_at = get_current_timestamp() existing_document.status = DocumentStatus.ready() # Mark as ready @@ -294,7 +285,7 @@ async def add_received_markdown_file_document( chunks=chunks, content_hash=content_hash, unique_identifier_hash=primary_hash, - blocknote_document=blocknote_json, + source_markdown=file_in_markdown, updated_at=get_current_timestamp(), created_by_id=user_id, connector_id=connector.get("connector_id") if connector else None, diff --git a/surfsense_backend/app/tasks/document_processors/youtube_processor.py b/surfsense_backend/app/tasks/document_processors/youtube_processor.py index 80cdaae4d..13b969fb6 100644 --- a/surfsense_backend/app/tasks/document_processors/youtube_processor.py +++ b/surfsense_backend/app/tasks/document_processors/youtube_processor.py @@ -397,16 +397,6 @@ async def add_youtube_video_document( {"stage": "chunk_processing"}, ) - from app.utils.blocknote_converter import convert_markdown_to_blocknote - - # Convert transcript to BlockNote JSON - blocknote_json = await convert_markdown_to_blocknote(combined_document_string) - if not blocknote_json: - logging.warning( - f"Failed to convert YouTube video '{video_id}' to BlockNote JSON, " - "document will not be editable" - ) - chunks = await create_document_chunks(combined_document_string) # ======================================================================= @@ -430,7 +420,7 @@ async def add_youtube_video_document( "thumbnail": video_data.get("thumbnail_url", ""), } safe_set_chunks(document, chunks) - document.blocknote_document = blocknote_json + document.source_markdown = combined_document_string document.status = DocumentStatus.ready() # READY status - fully processed document.updated_at = get_current_timestamp() diff --git a/surfsense_backend/app/utils/blocknote_converter.py b/surfsense_backend/app/utils/blocknote_converter.py deleted file mode 100644 index b57a82996..000000000 --- a/surfsense_backend/app/utils/blocknote_converter.py +++ /dev/null @@ -1,123 +0,0 @@ -import logging -from typing import Any - -import httpx - -from app.config import config - -logger = logging.getLogger(__name__) - - -async def convert_markdown_to_blocknote(markdown: str) -> dict[str, Any] | None: - """ - Convert markdown to BlockNote JSON via Next.js API. - - Args: - markdown: Markdown string to convert - - Returns: - BlockNote document as dict, or None if conversion fails - """ - if not markdown or not markdown.strip(): - logger.warning("Empty markdown provided for conversion") - return None - - if not markdown or len(markdown) < 10: - logger.warning("Markdown became too short after sanitization") - # Return a minimal BlockNote document - return [ - { - "type": "paragraph", - "content": [ - { - "type": "text", - "text": "Document content could not be converted for editing.", - "styles": {}, - } - ], - "children": [], - } - ] - - async with httpx.AsyncClient() as client: - try: - response = await client.post( - f"{config.NEXT_FRONTEND_URL}/api/convert-to-blocknote", - json={"markdown": markdown}, - timeout=30.0, - ) - response.raise_for_status() - data = response.json() - blocknote_document = data.get("blocknote_document") - - if blocknote_document: - logger.info( - f"Successfully converted markdown to BlockNote (original: {len(markdown)} chars, sanitized: {len(markdown)} chars)" - ) - return blocknote_document - else: - logger.warning("Next.js API returned empty blocknote_document") - return None - - except httpx.TimeoutException: - logger.error("Timeout converting markdown to BlockNote after 30s") - return None - except httpx.HTTPStatusError as e: - logger.error( - f"HTTP error converting markdown to BlockNote: {e.response.status_code} - {e.response.text}" - ) - # Log first 1000 chars of problematic markdown for debugging - logger.debug(f"Problematic markdown sample: {markdown[:1000]}") - return None - except Exception as e: - logger.error(f"Failed to convert markdown to BlockNote: {e}", exc_info=True) - return None - - -async def convert_blocknote_to_markdown( - blocknote_document: dict[str, Any] | list[dict[str, Any]], -) -> str | None: - """ - Convert BlockNote JSON to markdown via Next.js API. - - Args: - blocknote_document: BlockNote document as dict or list of blocks - - Returns: - Markdown string, or None if conversion fails - """ - if not blocknote_document: - logger.warning("Empty BlockNote document provided for conversion") - return None - - async with httpx.AsyncClient() as client: - try: - response = await client.post( - f"{config.NEXT_FRONTEND_URL}/api/convert-to-markdown", - json={"blocknote_document": blocknote_document}, - timeout=30.0, - ) - response.raise_for_status() - data = response.json() - markdown = data.get("markdown") - - if markdown: - logger.info( - f"Successfully converted BlockNote to markdown ({len(markdown)} chars)" - ) - return markdown - else: - logger.warning("Next.js API returned empty markdown") - return None - - except httpx.TimeoutException: - logger.error("Timeout converting BlockNote to markdown after 30s") - return None - except httpx.HTTPStatusError as e: - logger.error( - f"HTTP error converting BlockNote to markdown: {e.response.status_code} - {e.response.text}" - ) - return None - except Exception as e: - logger.error(f"Failed to convert BlockNote to markdown: {e}", exc_info=True) - return None diff --git a/surfsense_backend/app/utils/blocknote_to_markdown.py b/surfsense_backend/app/utils/blocknote_to_markdown.py new file mode 100644 index 000000000..0556df705 --- /dev/null +++ b/surfsense_backend/app/utils/blocknote_to_markdown.py @@ -0,0 +1,281 @@ +"""Pure-Python converter: BlockNote JSON → Markdown. + +No external dependencies (no Node.js, no npm packages, no HTTP calls). +Handles all standard BlockNote block types. Produces output equivalent to +BlockNote's own ``blocksToMarkdownLossy()``. + +Usage: + from app.utils.blocknote_to_markdown import blocknote_to_markdown + + markdown = blocknote_to_markdown(blocknote_json) +""" + +from __future__ import annotations + +import logging +from typing import Any + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Inline content → markdown text +# --------------------------------------------------------------------------- + +def _render_inline_content(content: list[dict[str, Any]] | None) -> str: + """Convert BlockNote inline content array to a markdown string.""" + if not content: + return "" + + parts: list[str] = [] + for item in content: + if not isinstance(item, dict): + continue + + item_type = item.get("type", "text") + + if item_type == "text": + text = item.get("text", "") + styles: dict[str, Any] = item.get("styles", {}) + + # Apply inline styles (order: code first so nested marks don't break it) + if styles.get("code"): + text = f"`{text}`" + else: + if styles.get("bold"): + text = f"**{text}**" + if styles.get("italic"): + text = f"*{text}*" + if styles.get("strikethrough"): + text = f"~~{text}~~" + # underline has no markdown equivalent — keep as plain text (lossy) + + parts.append(text) + + elif item_type == "link": + href = item.get("href", "") + link_content = item.get("content", []) + link_text = _render_inline_content(link_content) if link_content else href + parts.append(f"[{link_text}]({href})") + + else: + # Unknown inline type — extract text if possible + text = item.get("text", "") + if text: + parts.append(text) + + return "".join(parts) + + +# --------------------------------------------------------------------------- +# Block → markdown lines +# --------------------------------------------------------------------------- + +# Track numbered list state for consecutive numberedListItem blocks +_numbered_list_counter: int = 0 + + +def _render_block(block: dict[str, Any], indent: int = 0) -> list[str]: + """Convert a single BlockNote block (and its children) to markdown lines. + + Args: + block: A BlockNote block dict. + indent: Current indentation level (for nested children). + + Returns: + A list of markdown lines (without trailing newlines). + """ + global _numbered_list_counter + + block_type = block.get("type", "paragraph") + props: dict[str, Any] = block.get("props", {}) + content = block.get("content") + children: list[dict[str, Any]] = block.get("children", []) + prefix = " " * indent # 2-space indent per nesting level + + lines: list[str] = [] + + # --- Block type handlers --- + + if block_type == "paragraph": + text = _render_inline_content(content) if content else "" + lines.append(f"{prefix}{text}") + + elif block_type == "heading": + level = props.get("level", 1) + hashes = "#" * min(max(level, 1), 6) + text = _render_inline_content(content) if content else "" + lines.append(f"{prefix}{hashes} {text}") + + elif block_type == "bulletListItem": + text = _render_inline_content(content) if content else "" + lines.append(f"{prefix}- {text}") + + elif block_type == "numberedListItem": + # Use props.start if present, otherwise increment counter + start = props.get("start") + if start is not None: + _numbered_list_counter = int(start) + else: + _numbered_list_counter += 1 + text = _render_inline_content(content) if content else "" + lines.append(f"{prefix}{_numbered_list_counter}. {text}") + + elif block_type == "checkListItem": + checked = props.get("checked", False) + marker = "[x]" if checked else "[ ]" + text = _render_inline_content(content) if content else "" + lines.append(f"{prefix}- {marker} {text}") + + elif block_type == "codeBlock": + language = props.get("language", "") + # Code blocks store content as a single text item + code_text = _render_inline_content(content) if content else "" + lines.append(f"{prefix}```{language}") + for code_line in code_text.split("\n"): + lines.append(f"{prefix}{code_line}") + lines.append(f"{prefix}```") + + elif block_type == "table": + # Table content is a nested structure: content.rows[].cells[][] + table_content = block.get("content", {}) + rows: list[dict[str, Any]] = [] + + if isinstance(table_content, dict): + rows = table_content.get("rows", []) + elif isinstance(table_content, list): + # Some versions store rows directly as a list + rows = table_content + + if rows: + for row_idx, row in enumerate(rows): + cells = row.get("cells", []) if isinstance(row, dict) else row + cell_texts: list[str] = [] + for cell in cells: + if isinstance(cell, list): + # Cell is a list of inline content + cell_texts.append(_render_inline_content(cell)) + elif isinstance(cell, str): + cell_texts.append(cell) + else: + cell_texts.append(str(cell)) + lines.append(f"{prefix}| {' | '.join(cell_texts)} |") + # Add header separator after first row + if row_idx == 0: + lines.append(f"{prefix}| {' | '.join('---' for _ in cell_texts)} |") + + elif block_type == "image": + url = props.get("url", "") + caption = props.get("caption", "") or props.get("name", "") + if url: + lines.append(f"{prefix}![{caption}]({url})") + + elif block_type == "video": + url = props.get("url", "") + caption = props.get("caption", "") or "video" + if url: + lines.append(f"{prefix}[{caption}]({url})") + + elif block_type == "audio": + url = props.get("url", "") + caption = props.get("caption", "") or "audio" + if url: + lines.append(f"{prefix}[{caption}]({url})") + + elif block_type == "file": + url = props.get("url", "") + name = props.get("name", "") or props.get("caption", "") or "file" + if url: + lines.append(f"{prefix}[{name}]({url})") + + else: + # Unknown block type — extract text content if possible, skip otherwise + if content: + text = _render_inline_content(content) if isinstance(content, list) else "" + if text: + lines.append(f"{prefix}{text}") + # If no content at all, silently skip (lossy) + + # --- Render nested children (indented) --- + if children: + for child in children: + lines.extend(_render_block(child, indent=indent + 1)) + + return lines + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + +def blocknote_to_markdown( + blocks: list[dict[str, Any]] | dict[str, Any] | None, +) -> str | None: + """Convert a BlockNote document (list of blocks) to a markdown string. + + Args: + blocks: BlockNote JSON — either a list of block dicts, or a single + block dict, or None. + + Returns: + Markdown string, or None if input is empty / unconvertible. + + Examples: + >>> blocknote_to_markdown([ + ... {"type": "heading", "props": {"level": 2}, + ... "content": [{"type": "text", "text": "Hello", "styles": {}}], + ... "children": []}, + ... {"type": "paragraph", + ... "content": [{"type": "text", "text": "World", "styles": {"bold": True}}], + ... "children": []}, + ... ]) + '## Hello\\n\\nWorld' + """ + global _numbered_list_counter + + if not blocks: + return None + + # Normalise: accept a single block as well as a list + if isinstance(blocks, dict): + blocks = [blocks] + + if not isinstance(blocks, list): + logger.warning(f"blocknote_to_markdown received unexpected type: {type(blocks)}") + return None + + all_lines: list[str] = [] + prev_type: str | None = None + + # Reset numbered list counter for each document + _numbered_list_counter = 0 + + for block in blocks: + if not isinstance(block, dict): + continue + + block_type = block.get("type", "paragraph") + + # Reset numbered list counter when we leave a numbered list run + if block_type != "numberedListItem" and prev_type == "numberedListItem": + _numbered_list_counter = 0 + + block_lines = _render_block(block) + + # Add a blank line between blocks (standard markdown spacing) + # Exception: consecutive list items of the same type don't get extra blank lines + if all_lines and block_lines: + same_list = ( + (block_type == prev_type and block_type in ( + "bulletListItem", "numberedListItem", "checkListItem" + )) + ) + if not same_list: + all_lines.append("") + + all_lines.extend(block_lines) + prev_type = block_type + + result = "\n".join(all_lines).strip() + return result if result else None + diff --git a/surfsense_web/app/api/convert-to-blocknote/route.ts b/surfsense_web/app/api/convert-to-blocknote/route.ts deleted file mode 100644 index e11c9cb47..000000000 --- a/surfsense_web/app/api/convert-to-blocknote/route.ts +++ /dev/null @@ -1,40 +0,0 @@ -import { ServerBlockNoteEditor } from "@blocknote/server-util"; -import { type NextRequest, NextResponse } from "next/server"; - -export async function POST(request: NextRequest) { - try { - const { markdown } = await request.json(); - - if (!markdown || typeof markdown !== "string") { - return NextResponse.json({ error: "Markdown string is required" }, { status: 400 }); - } - - // Log raw markdown input before conversion - // console.log(`\n${"=".repeat(80)}`); - // console.log("RAW MARKDOWN INPUT (BEFORE CONVERSION):"); - // console.log("=".repeat(80)); - // console.log(markdown); - // console.log(`${"=".repeat(80)}\n`); - - // Create server-side editor instance - const editor = ServerBlockNoteEditor.create(); - - // Convert markdown directly to BlockNote blocks - const blocks = await editor.tryParseMarkdownToBlocks(markdown); - - if (!blocks || blocks.length === 0) { - throw new Error("Markdown parsing returned no blocks"); - } - - return NextResponse.json({ blocknote_document: blocks }); - } catch (error: any) { - console.error("Failed to convert markdown to BlockNote:", error); - return NextResponse.json( - { - error: "Failed to convert markdown to BlockNote blocks", - details: error.message, - }, - { status: 500 } - ); - } -} diff --git a/surfsense_web/app/api/convert-to-markdown/route.ts b/surfsense_web/app/api/convert-to-markdown/route.ts deleted file mode 100644 index 7005a800f..000000000 --- a/surfsense_web/app/api/convert-to-markdown/route.ts +++ /dev/null @@ -1,28 +0,0 @@ -import { ServerBlockNoteEditor } from "@blocknote/server-util"; -import { type NextRequest, NextResponse } from "next/server"; - -export async function POST(request: NextRequest) { - try { - const { blocknote_document } = await request.json(); - - if (!blocknote_document || !Array.isArray(blocknote_document)) { - return NextResponse.json({ error: "BlockNote document array is required" }, { status: 400 }); - } - - // Create server-side editor instance - const editor = ServerBlockNoteEditor.create(); - - // Convert BlockNote blocks to markdown - const markdown = await editor.blocksToMarkdownLossy(blocknote_document); - - return NextResponse.json({ - markdown, - }); - } catch (error) { - console.error("Failed to convert BlockNote to markdown:", error); - return NextResponse.json( - { error: "Failed to convert BlockNote blocks to markdown" }, - { status: 500 } - ); - } -} diff --git a/surfsense_web/app/dashboard/[search_space_id]/editor/[documentId]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/editor/[documentId]/page.tsx index 74104f450..7acc595e7 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/editor/[documentId]/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/editor/[documentId]/page.tsx @@ -3,11 +3,11 @@ import { useAtom } from "jotai"; import { AlertCircle, ArrowLeft, FileText, Save } from "lucide-react"; import { motion } from "motion/react"; +import dynamic from "next/dynamic"; import { useParams, useRouter } from "next/navigation"; -import { useEffect, useMemo, useState } from "react"; +import { useCallback, useEffect, useMemo, useRef, useState } from "react"; import { toast } from "sonner"; import { hasUnsavedEditorChangesAtom, pendingEditorNavigationAtom } from "@/atoms/editor/ui.atoms"; -import { BlockNoteEditor } from "@/components/DynamicBlockNoteEditor"; import { AlertDialog, AlertDialogAction, @@ -24,54 +24,28 @@ import { Spinner } from "@/components/ui/spinner"; import { notesApiService } from "@/lib/apis/notes-api.service"; import { authenticatedFetch, getBearerToken, redirectToLogin } from "@/lib/auth-utils"; -// BlockNote types -type BlockNoteInlineContent = - | string - | { text?: string; type?: string; styles?: Record }; - -interface BlockNoteBlock { - type: string; - content?: BlockNoteInlineContent[]; - children?: BlockNoteBlock[]; - props?: Record; -} - -type BlockNoteDocument = BlockNoteBlock[] | null | undefined; +// Dynamically import PlateEditor (uses 'use client' internally) +const PlateEditor = dynamic( + () => import("@/components/editor/plate-editor").then((mod) => ({ default: mod.PlateEditor })), + { ssr: false, loading: () =>
} +); interface EditorContent { document_id: number; title: string; document_type?: string; - blocknote_document: BlockNoteDocument; + source_markdown: string; updated_at: string | null; } -// Helper function to extract title from BlockNote document -// Takes the text content from the first block (should be a heading for notes) -function extractTitleFromBlockNote(blocknoteDocument: BlockNoteDocument): string { - if (!blocknoteDocument || !Array.isArray(blocknoteDocument) || blocknoteDocument.length === 0) { - return "Untitled"; +/** Extract title from markdown: first # heading, or first non-empty line. */ +function extractTitleFromMarkdown(markdown: string | null | undefined): string { + if (!markdown) return "Untitled"; + for (const line of markdown.split("\n")) { + const trimmed = line.trim(); + if (trimmed.startsWith("# ")) return trimmed.slice(2).trim() || "Untitled"; + if (trimmed) return trimmed.slice(0, 100); } - - const firstBlock = blocknoteDocument[0]; - if (!firstBlock) { - return "Untitled"; - } - - // Extract text from block content - // BlockNote blocks have a content array with inline content - if (firstBlock.content && Array.isArray(firstBlock.content)) { - const textContent = firstBlock.content - .map((item: BlockNoteInlineContent) => { - if (typeof item === "string") return item; - if (typeof item === "object" && item?.text) return item.text; - return ""; - }) - .join("") - .trim(); - return textContent || "Untitled"; - } - return "Untitled"; } @@ -85,11 +59,14 @@ export default function EditorPage() { const [document, setDocument] = useState(null); const [loading, setLoading] = useState(true); const [saving, setSaving] = useState(false); - const [editorContent, setEditorContent] = useState(null); const [error, setError] = useState(null); const [hasUnsavedChanges, setHasUnsavedChanges] = useState(false); const [showUnsavedDialog, setShowUnsavedDialog] = useState(false); + // Store the latest markdown from the editor + const markdownRef = useRef(""); + const initialLoadDone = useRef(false); + // Global state for cross-component communication const [, setGlobalHasUnsavedChanges] = useAtom(hasUnsavedEditorChangesAtom); const [pendingNavigation, setPendingNavigation] = useAtom(pendingEditorNavigationAtom); @@ -107,51 +84,46 @@ export default function EditorPage() { }; }, [setGlobalHasUnsavedChanges, setPendingNavigation]); - // Handle pending navigation from sidebar (e.g., when user clicks "+" to create new note) + // Handle pending navigation from sidebar useEffect(() => { if (pendingNavigation) { if (hasUnsavedChanges) { - // Show dialog to confirm navigation setShowUnsavedDialog(true); } else { - // No unsaved changes, navigate immediately router.push(pendingNavigation); setPendingNavigation(null); } } }, [pendingNavigation, hasUnsavedChanges, router, setPendingNavigation]); - // Reset state when documentId changes (e.g., navigating from existing note to new note) + // Reset state when documentId changes useEffect(() => { setDocument(null); - setEditorContent(null); setError(null); setHasUnsavedChanges(false); setLoading(true); - }, []); + initialLoadDone.current = false; + }, [documentId]); - // Fetch document content - DIRECT CALL TO FASTAPI - // Skip fetching if this is a new note + // Fetch document content useEffect(() => { async function fetchDocument() { - // For new notes, initialize with empty state if (isNewNote) { + markdownRef.current = ""; setDocument({ document_id: 0, title: "Untitled", document_type: "NOTE", - blocknote_document: null, + source_markdown: "", updated_at: null, }); - setEditorContent(null); setLoading(false); + initialLoadDone.current = true; return; } const token = getBearerToken(); if (!token) { - console.error("No auth token found"); - // Redirect to login with current path saved redirectToLogin(); return; } @@ -166,29 +138,28 @@ export default function EditorPage() { const errorData = await response .json() .catch(() => ({ detail: "Failed to fetch document" })); - const errorMessage = errorData.detail || "Failed to fetch document"; - throw new Error(errorMessage); + throw new Error(errorData.detail || "Failed to fetch document"); } const data = await response.json(); - // Check if blocknote_document exists - if (!data.blocknote_document) { - const errorMsg = - "This document does not have BlockNote content. Please re-upload the document to enable editing."; - setError(errorMsg); + if (data.source_markdown === undefined || data.source_markdown === null) { + setError( + "This document does not have editable content. Please re-upload to enable editing." + ); setLoading(false); return; } + markdownRef.current = data.source_markdown; setDocument(data); - setEditorContent(data.blocknote_document); setError(null); + initialLoadDone.current = true; } catch (error) { console.error("Error fetching document:", error); - const errorMessage = - error instanceof Error ? error.message : "Failed to fetch document. Please try again."; - setError(errorMessage); + setError( + error instanceof Error ? error.message : "Failed to fetch document. Please try again." + ); } finally { setLoading(false); } @@ -199,29 +170,30 @@ export default function EditorPage() { } }, [documentId, params.search_space_id, isNewNote]); - // Track changes to mark as unsaved - useEffect(() => { - if (editorContent && document) { - setHasUnsavedChanges(true); - } - }, [editorContent, document]); - - // Check if this is a NOTE type document const isNote = isNewNote || document?.document_type === "NOTE"; - // Extract title dynamically from editor content for notes, otherwise use document title + // Extract title dynamically from current markdown for notes const displayTitle = useMemo(() => { - if (isNote && editorContent) { - return extractTitleFromBlockNote(editorContent); + if (isNote) { + return extractTitleFromMarkdown(markdownRef.current || document?.source_markdown); } return document?.title || "Untitled"; - }, [isNote, editorContent, document?.title]); + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [isNote, document?.title, document?.source_markdown, hasUnsavedChanges]); - // TODO: Maybe add Auto-save every 30 seconds - DIRECT CALL TO FASTAPI + // Handle markdown changes from the Plate editor + const handleMarkdownChange = useCallback( + (md: string) => { + markdownRef.current = md; + if (initialLoadDone.current) { + setHasUnsavedChanges(true); + } + }, + [] + ); - // Save and exit - DIRECT CALL TO FASTAPI - // For new notes, create the note first, then save - const handleSave = async () => { + // Save handler + const handleSave = useCallback(async () => { const token = getBearerToken(); if (!token) { toast.error("Please login to save"); @@ -233,25 +205,26 @@ export default function EditorPage() { setError(null); try { - // If this is a new note, create it first - if (isNewNote) { - const title = extractTitleFromBlockNote(editorContent); + const currentMarkdown = markdownRef.current; - // Create the note first + if (isNewNote) { + const title = extractTitleFromMarkdown(currentMarkdown); + + // Create the note const note = await notesApiService.createNote({ search_space_id: searchSpaceId, - title: title, - blocknote_document: editorContent || undefined, + title, + source_markdown: currentMarkdown || undefined, }); - // If there's content, save it properly and trigger reindexing - if (editorContent) { + // If there's content, save & trigger reindexing + if (currentMarkdown) { const response = await authenticatedFetch( `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/search-spaces/${searchSpaceId}/documents/${note.id}/save`, { method: "POST", headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ blocknote_document: editorContent }), + body: JSON.stringify({ source_markdown: currentMarkdown }), } ); @@ -265,24 +238,15 @@ export default function EditorPage() { setHasUnsavedChanges(false); toast.success("Note created successfully! Reindexing in background..."); - - // Redirect to documents page after successful save router.push(`/dashboard/${searchSpaceId}/documents`); } else { - // Existing document - save normally - if (!editorContent) { - toast.error("No content to save"); - setSaving(false); - return; - } - - // Save blocknote_document and trigger reindexing in background + // Existing document — save const response = await authenticatedFetch( `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/search-spaces/${params.search_space_id}/documents/${documentId}/save`, { method: "POST", headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ blocknote_document: editorContent }), + body: JSON.stringify({ source_markdown: currentMarkdown }), } ); @@ -295,8 +259,6 @@ export default function EditorPage() { setHasUnsavedChanges(false); toast.success("Document saved! Reindexing in background..."); - - // Redirect to documents page after successful save router.push(`/dashboard/${searchSpaceId}/documents`); } } catch (error) { @@ -312,7 +274,7 @@ export default function EditorPage() { } finally { setSaving(false); } - }; + }, [isNewNote, searchSpaceId, documentId, params.search_space_id, router]); const handleBack = () => { if (hasUnsavedChanges) { @@ -324,11 +286,9 @@ export default function EditorPage() { const handleConfirmLeave = () => { setShowUnsavedDialog(false); - // Clear global unsaved state setGlobalHasUnsavedChanges(false); setHasUnsavedChanges(false); - // If there's a pending navigation (from sidebar), use that; otherwise go back to documents if (pendingNavigation) { router.push(pendingNavigation); setPendingNavigation(null); @@ -339,7 +299,6 @@ export default function EditorPage() { const handleCancelLeave = () => { setShowUnsavedDialog(false); - // Clear pending navigation if user cancels setPendingNavigation(null); }; @@ -356,7 +315,7 @@ export default function EditorPage() { ); } - if (error) { + if (error && !document) { return (
)}
-
diff --git a/surfsense_web/components/BlockNoteEditor.tsx b/surfsense_web/components/BlockNoteEditor.tsx deleted file mode 100644 index 440c63625..000000000 --- a/surfsense_web/components/BlockNoteEditor.tsx +++ /dev/null @@ -1,213 +0,0 @@ -"use client"; - -import { useTheme } from "next-themes"; -import { useEffect, useMemo, useRef } from "react"; -import "@blocknote/core/fonts/inter.css"; -import "@blocknote/mantine/style.css"; -import { BlockNoteView } from "@blocknote/mantine"; -import { useCreateBlockNote } from "@blocknote/react"; - -interface BlockNoteEditorProps { - initialContent?: any; - onChange?: (content: any) => void; - useTitleBlock?: boolean; // Whether to use first block as title (Notion-style) -} - -// Helper to ensure first block is a heading for title -function ensureTitleBlock(content: any[] | undefined): any[] { - if (!content || content.length === 0) { - // Return empty heading block for new notes - return [ - { - type: "heading", - props: { level: 1 }, - content: [], - children: [], - }, - ]; - } - - // If first block is not a heading, convert it to one - const firstBlock = content[0]; - if (firstBlock?.type !== "heading") { - // Extract text from first block - let titleText = ""; - if (firstBlock?.content && Array.isArray(firstBlock.content)) { - titleText = firstBlock.content - .map((item: any) => { - if (typeof item === "string") return item; - if (item?.text) return item.text; - return ""; - }) - .join("") - .trim(); - } - - // Create heading block with extracted text - const titleBlock = { - type: "heading", - props: { level: 1 }, - content: titleText - ? [ - { - type: "text", - text: titleText, - styles: {}, - }, - ] - : [], - children: [], - }; - - // Replace first block with heading, keep rest - return [titleBlock, ...content.slice(1)]; - } - - return content; -} - -export default function BlockNoteEditor({ - initialContent, - onChange, - useTitleBlock = false, -}: BlockNoteEditorProps) { - const { resolvedTheme } = useTheme(); - - // Track the initial content to prevent re-initialization - const initialContentRef = useRef(null); - const isInitializedRef = useRef(false); - - // Prepare initial content - ensure first block is a heading if useTitleBlock is true - const preparedInitialContent = useMemo(() => { - if (initialContentRef.current !== null) { - return undefined; // Already initialized - } - if (initialContent === undefined) { - // New note - create empty heading block - return useTitleBlock - ? [ - { - type: "heading", - props: { level: 1 }, - content: [], - children: [], - }, - ] - : undefined; - } - // Existing note - ensure first block is heading - return useTitleBlock ? ensureTitleBlock(initialContent) : initialContent; - }, [initialContent, useTitleBlock]); - - // Creates a new editor instance - only use initialContent on first render - const editor = useCreateBlockNote({ - initialContent: initialContentRef.current === null ? preparedInitialContent : undefined, - }); - - // Store initial content on first render only - useEffect(() => { - if (preparedInitialContent !== undefined && initialContentRef.current === null) { - initialContentRef.current = preparedInitialContent; - isInitializedRef.current = true; - } else if (preparedInitialContent === undefined && initialContentRef.current === null) { - // Mark as initialized even when initialContent is undefined (for new notes) - isInitializedRef.current = true; - } - }, [preparedInitialContent]); - - // Call onChange when document changes (but don't update from props) - useEffect(() => { - if (!onChange || !editor) return; - - // For new notes (no initialContent), we need to wait for editor to be ready - // Use a small delay to ensure editor is fully initialized - if (!isInitializedRef.current) { - const timer = setTimeout(() => { - isInitializedRef.current = true; - }, 100); - return () => clearTimeout(timer); - } - - const handleChange = () => { - onChange(editor.document); - }; - - // Subscribe to document changes - const unsubscribe = editor.onChange(handleChange); - - // Also call onChange once with current document to capture initial state - // This ensures we capture content even if user doesn't make changes - if (editor.document) { - onChange(editor.document); - } - - return () => { - unsubscribe(); - }; - }, [editor, onChange]); - - // Determine theme for BlockNote with custom dark mode background - const blockNoteTheme = useMemo(() => { - if (resolvedTheme === "dark") { - // Custom dark theme - only override editor background, let BlockNote handle the rest - return { - colors: { - editor: { - background: "#0A0A0A", // Custom dark background - }, - }, - }; - } - return "light" as const; - }, [resolvedTheme]); - - // Renders the editor instance - return ( -
- - -
- ); -} diff --git a/surfsense_web/components/DynamicBlockNoteEditor.tsx b/surfsense_web/components/DynamicBlockNoteEditor.tsx deleted file mode 100644 index 60fc6b11c..000000000 --- a/surfsense_web/components/DynamicBlockNoteEditor.tsx +++ /dev/null @@ -1,6 +0,0 @@ -"use client"; - -import dynamic from "next/dynamic"; - -// Dynamically import BlockNote editor with SSR disabled -export const BlockNoteEditor = dynamic(() => import("./BlockNoteEditor"), { ssr: false }); diff --git a/surfsense_web/lib/apis/notes-api.service.ts b/surfsense_web/lib/apis/notes-api.service.ts index caef9e1a5..eac3d96ed 100644 --- a/surfsense_web/lib/apis/notes-api.service.ts +++ b/surfsense_web/lib/apis/notes-api.service.ts @@ -6,7 +6,7 @@ import { baseApiService } from "./base-api.service"; const createNoteRequest = z.object({ search_space_id: z.number(), title: z.string().min(1), - blocknote_document: z.array(z.any()).optional(), + source_markdown: z.string().optional(), }); const createNoteResponse = z.object({ @@ -82,12 +82,12 @@ class NotesApiService { throw new ValidationError(`Invalid request: ${errorMessage}`); } - const { search_space_id, title, blocknote_document } = parsedRequest.data; + const { search_space_id, title, source_markdown } = parsedRequest.data; - // Send both title and blocknote_document in request body + // Send both title and source_markdown in request body const body = { title, - ...(blocknote_document && { blocknote_document }), + ...(source_markdown !== undefined && { source_markdown }), }; return baseApiService.post( diff --git a/surfsense_web/next.config.ts b/surfsense_web/next.config.ts index 3278b9f3d..2b8c31062 100644 --- a/surfsense_web/next.config.ts +++ b/surfsense_web/next.config.ts @@ -7,7 +7,6 @@ const withNextIntl = createNextIntlPlugin("./i18n/request.ts"); const nextConfig: NextConfig = { output: "standalone", - // Disable StrictMode for BlockNote compatibility with React 19/Next 15 reactStrictMode: false, typescript: { ignoreBuildErrors: true, @@ -20,9 +19,6 @@ const nextConfig: NextConfig = { }, ], }, - // Mark BlockNote server packages as external - serverExternalPackages: ["@blocknote/server-util"], - // Turbopack config (used during `next dev --turbopack`) turbopack: { rules: { @@ -33,13 +29,8 @@ const nextConfig: NextConfig = { }, }, - // Configure webpack to handle blocknote packages + SVGR - webpack: (config, { isServer }) => { - if (isServer) { - // Don't bundle these packages on the server - config.externals = [...(config.externals || []), "@blocknote/server-util"]; - } - + // Configure webpack (SVGR) + webpack: (config) => { // SVGR: import *.svg as React components const fileLoaderRule = config.module.rules.find((rule: any) => rule.test?.test?.(".svg")); config.module.rules.push( diff --git a/surfsense_web/package.json b/surfsense_web/package.json index d98ce2338..5505c614f 100644 --- a/surfsense_web/package.json +++ b/surfsense_web/package.json @@ -26,10 +26,6 @@ "@assistant-ui/react": "^0.11.53", "@assistant-ui/react-ai-sdk": "^1.1.20", "@assistant-ui/react-markdown": "^0.11.9", - "@blocknote/core": "^0.45.0", - "@blocknote/mantine": "^0.45.0", - "@blocknote/react": "^0.45.0", - "@blocknote/server-util": "^0.45.0", "@electric-sql/client": "^1.4.0", "@electric-sql/pglite": "^0.3.14", "@electric-sql/pglite-sync": "^0.4.0",