mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-07 14:52:39 +02:00
chore: Added direct handling for markdown files.
- Fixed podcast imports.
This commit is contained in:
parent
d716347531
commit
9ccc147bb0
10 changed files with 118 additions and 59 deletions
|
|
@ -194,6 +194,52 @@ async def add_extension_received_document(
|
|||
await session.rollback()
|
||||
raise RuntimeError(f"Failed to process extension document: {str(e)}")
|
||||
|
||||
async def add_received_markdown_file_document(
|
||||
session: AsyncSession,
|
||||
file_name: str,
|
||||
file_in_markdown: str,
|
||||
search_space_id: int
|
||||
) -> Optional[Document]:
|
||||
try:
|
||||
|
||||
# Generate summary
|
||||
summary_chain = SUMMARY_PROMPT_TEMPLATE | config.long_context_llm_instance
|
||||
summary_result = await summary_chain.ainvoke({"document": file_in_markdown})
|
||||
summary_content = summary_result.content
|
||||
summary_embedding = config.embedding_model_instance.embed(
|
||||
summary_content)
|
||||
|
||||
# Process chunks
|
||||
chunks = [
|
||||
Chunk(content=chunk.text, embedding=config.embedding_model_instance.embed(chunk.text))
|
||||
for chunk in config.chunker_instance.chunk(file_in_markdown)
|
||||
]
|
||||
|
||||
# Create and store document
|
||||
document = Document(
|
||||
search_space_id=search_space_id,
|
||||
title=file_name,
|
||||
document_type=DocumentType.FILE,
|
||||
document_metadata={
|
||||
"FILE_NAME": file_name,
|
||||
"SAVED_AT": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
},
|
||||
content=summary_content,
|
||||
embedding=summary_embedding,
|
||||
chunks=chunks
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
await session.commit()
|
||||
await session.refresh(document)
|
||||
|
||||
return document
|
||||
except SQLAlchemyError as db_error:
|
||||
await session.rollback()
|
||||
raise db_error
|
||||
except Exception as e:
|
||||
await session.rollback()
|
||||
raise RuntimeError(f"Failed to process file document: {str(e)}")
|
||||
|
||||
async def add_received_file_document(
|
||||
session: AsyncSession,
|
||||
|
|
|
|||
|
|
@ -1,10 +1,9 @@
|
|||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from app.schemas import PodcastGenerateRequest
|
||||
from typing import List
|
||||
from sqlalchemy import select
|
||||
from app.db import Chat, Podcast
|
||||
|
||||
from app.agents.podcaster.graph import graph as podcaster_graph
|
||||
from surfsense_backend.app.agents.podcaster.state import State
|
||||
from app.agents.podcaster.state import State
|
||||
from app.db import Chat, Podcast
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
|
||||
async def generate_document_podcast(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue