mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-26 17:26:23 +02:00
introduced blocknote editor
This commit is contained in:
parent
70f3381d7e
commit
e68286f22e
23 changed files with 2158 additions and 14 deletions
|
|
@ -144,6 +144,16 @@ async def add_extension_received_document(
|
|||
|
||||
# Process chunks
|
||||
chunks = await create_document_chunks(content.pageContent)
|
||||
|
||||
from app.utils.blocknote_converter import convert_markdown_to_blocknote
|
||||
|
||||
# Convert markdown to BlockNote JSON
|
||||
blocknote_json = await convert_markdown_to_blocknote(combined_document_string)
|
||||
if not blocknote_json:
|
||||
logging.warning(
|
||||
f"Failed to convert extension document '{content.metadata.VisitedWebPageTitle}' "
|
||||
f"to BlockNote JSON, document will not be editable"
|
||||
)
|
||||
|
||||
# Update or create document
|
||||
if existing_document:
|
||||
|
|
@ -154,6 +164,7 @@ async def add_extension_received_document(
|
|||
existing_document.embedding = summary_embedding
|
||||
existing_document.document_metadata = content.metadata.model_dump()
|
||||
existing_document.chunks = chunks
|
||||
existing_document.blocknote_document = blocknote_json
|
||||
|
||||
await session.commit()
|
||||
await session.refresh(existing_document)
|
||||
|
|
@ -170,6 +181,7 @@ async def add_extension_received_document(
|
|||
chunks=chunks,
|
||||
content_hash=content_hash,
|
||||
unique_identifier_hash=unique_identifier_hash,
|
||||
blocknote_document=blocknote_json,
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -99,6 +99,14 @@ async def add_received_file_document_using_unstructured(
|
|||
|
||||
# Process chunks
|
||||
chunks = await create_document_chunks(file_in_markdown)
|
||||
|
||||
from app.utils.blocknote_converter import convert_markdown_to_blocknote
|
||||
|
||||
# Convert markdown to BlockNote JSON
|
||||
blocknote_json = await convert_markdown_to_blocknote(file_in_markdown)
|
||||
if not blocknote_json:
|
||||
logging.warning(f"Failed to convert {file_name} to BlockNote JSON, document will not be editable")
|
||||
|
||||
|
||||
# Update or create document
|
||||
if existing_document:
|
||||
|
|
@ -112,6 +120,7 @@ async def add_received_file_document_using_unstructured(
|
|||
"ETL_SERVICE": "UNSTRUCTURED",
|
||||
}
|
||||
existing_document.chunks = chunks
|
||||
existing_document.blocknote_document = blocknote_json
|
||||
|
||||
await session.commit()
|
||||
await session.refresh(existing_document)
|
||||
|
|
@ -131,6 +140,7 @@ async def add_received_file_document_using_unstructured(
|
|||
chunks=chunks,
|
||||
content_hash=content_hash,
|
||||
unique_identifier_hash=unique_identifier_hash,
|
||||
blocknote_document=blocknote_json,
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
@ -213,6 +223,14 @@ async def add_received_file_document_using_llamacloud(
|
|||
|
||||
# Process chunks
|
||||
chunks = await create_document_chunks(file_in_markdown)
|
||||
|
||||
from app.utils.blocknote_converter import convert_markdown_to_blocknote
|
||||
|
||||
# Convert markdown to BlockNote JSON
|
||||
blocknote_json = await convert_markdown_to_blocknote(file_in_markdown)
|
||||
if not blocknote_json:
|
||||
logging.warning(f"Failed to convert {file_name} to BlockNote JSON, document will not be editable")
|
||||
|
||||
|
||||
# Update or create document
|
||||
if existing_document:
|
||||
|
|
@ -226,6 +244,7 @@ async def add_received_file_document_using_llamacloud(
|
|||
"ETL_SERVICE": "LLAMACLOUD",
|
||||
}
|
||||
existing_document.chunks = chunks
|
||||
existing_document.blocknote_document = blocknote_json
|
||||
|
||||
await session.commit()
|
||||
await session.refresh(existing_document)
|
||||
|
|
@ -245,6 +264,7 @@ async def add_received_file_document_using_llamacloud(
|
|||
chunks=chunks,
|
||||
content_hash=content_hash,
|
||||
unique_identifier_hash=unique_identifier_hash,
|
||||
blocknote_document=blocknote_json,
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
@ -352,6 +372,14 @@ async def add_received_file_document_using_docling(
|
|||
|
||||
# Process chunks
|
||||
chunks = await create_document_chunks(file_in_markdown)
|
||||
|
||||
from app.utils.blocknote_converter import convert_markdown_to_blocknote
|
||||
|
||||
# Convert markdown to BlockNote JSON
|
||||
blocknote_json = await convert_markdown_to_blocknote(file_in_markdown)
|
||||
if not blocknote_json:
|
||||
logging.warning(f"Failed to convert {file_name} to BlockNote JSON, document will not be editable")
|
||||
|
||||
|
||||
# Update or create document
|
||||
if existing_document:
|
||||
|
|
@ -365,6 +393,7 @@ async def add_received_file_document_using_docling(
|
|||
"ETL_SERVICE": "DOCLING",
|
||||
}
|
||||
existing_document.chunks = chunks
|
||||
existing_document.blocknote_document = blocknote_json
|
||||
|
||||
await session.commit()
|
||||
await session.refresh(existing_document)
|
||||
|
|
@ -384,6 +413,7 @@ async def add_received_file_document_using_docling(
|
|||
chunks=chunks,
|
||||
content_hash=content_hash,
|
||||
unique_identifier_hash=unique_identifier_hash,
|
||||
blocknote_document=blocknote_json,
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -109,6 +109,14 @@ async def add_received_markdown_file_document(
|
|||
|
||||
# Process chunks
|
||||
chunks = await create_document_chunks(file_in_markdown)
|
||||
|
||||
from app.utils.blocknote_converter import convert_markdown_to_blocknote
|
||||
|
||||
# Convert to BlockNote JSON
|
||||
blocknote_json = await convert_markdown_to_blocknote(file_in_markdown)
|
||||
if not blocknote_json:
|
||||
logging.warning(f"Failed to convert {file_name} to BlockNote JSON, document will not be editable")
|
||||
|
||||
|
||||
# Update or create document
|
||||
if existing_document:
|
||||
|
|
@ -121,6 +129,7 @@ async def add_received_markdown_file_document(
|
|||
"FILE_NAME": file_name,
|
||||
}
|
||||
existing_document.chunks = chunks
|
||||
existing_document.blocknote_document = blocknote_json
|
||||
|
||||
await session.commit()
|
||||
await session.refresh(existing_document)
|
||||
|
|
@ -139,6 +148,7 @@ async def add_received_markdown_file_document(
|
|||
chunks=chunks,
|
||||
content_hash=content_hash,
|
||||
unique_identifier_hash=unique_identifier_hash,
|
||||
blocknote_document=blocknote_json,
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -247,6 +247,16 @@ async def add_crawled_url_document(
|
|||
f"Processing content chunks for URL: {url}",
|
||||
{"stage": "chunk_processing"},
|
||||
)
|
||||
|
||||
from app.utils.blocknote_converter import convert_markdown_to_blocknote
|
||||
|
||||
# Convert markdown to BlockNote JSON
|
||||
blocknote_json = await convert_markdown_to_blocknote(combined_document_string)
|
||||
if not blocknote_json:
|
||||
logging.warning(
|
||||
f"Failed to convert crawled URL '{url}' to BlockNote JSON, "
|
||||
"document will not be editable"
|
||||
)
|
||||
|
||||
chunks = await create_document_chunks(content_in_markdown)
|
||||
|
||||
|
|
@ -267,6 +277,7 @@ async def add_crawled_url_document(
|
|||
existing_document.embedding = summary_embedding
|
||||
existing_document.document_metadata = url_crawled[0].metadata
|
||||
existing_document.chunks = chunks
|
||||
existing_document.blocknote_document = blocknote_json
|
||||
|
||||
document = existing_document
|
||||
else:
|
||||
|
|
@ -289,6 +300,7 @@ async def add_crawled_url_document(
|
|||
chunks=chunks,
|
||||
content_hash=content_hash,
|
||||
unique_identifier_hash=unique_identifier_hash,
|
||||
blocknote_document=blocknote_json,
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -290,6 +290,16 @@ async def add_youtube_video_document(
|
|||
f"Processing content chunks for video: {video_data.get('title', 'YouTube Video')}",
|
||||
{"stage": "chunk_processing"},
|
||||
)
|
||||
|
||||
from app.utils.blocknote_converter import convert_markdown_to_blocknote
|
||||
|
||||
# Convert transcript to BlockNote JSON
|
||||
blocknote_json = await convert_markdown_to_blocknote(combined_document_string)
|
||||
if not blocknote_json:
|
||||
logging.warning(
|
||||
f"Failed to convert YouTube video '{video_id}' to BlockNote JSON, "
|
||||
"document will not be editable"
|
||||
)
|
||||
|
||||
chunks = await create_document_chunks(combined_document_string)
|
||||
|
||||
|
|
@ -314,6 +324,7 @@ async def add_youtube_video_document(
|
|||
"thumbnail": video_data.get("thumbnail_url", ""),
|
||||
}
|
||||
existing_document.chunks = chunks
|
||||
existing_document.blocknote_document = blocknote_json
|
||||
|
||||
await session.commit()
|
||||
await session.refresh(existing_document)
|
||||
|
|
@ -342,6 +353,7 @@ async def add_youtube_video_document(
|
|||
search_space_id=search_space_id,
|
||||
content_hash=content_hash,
|
||||
unique_identifier_hash=unique_identifier_hash,
|
||||
blocknote_document=blocknote_json,
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue