mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-25 08:46:22 +02:00
feat: added circleback connector
This commit is contained in:
parent
23870042f3
commit
c19d300c9d
27 changed files with 1153 additions and 97 deletions
|
|
@ -0,0 +1,183 @@
|
|||
"""
|
||||
Circleback meeting document processor.
|
||||
|
||||
This module processes meeting data received from Circleback webhooks
|
||||
and stores it as searchable documents in the database.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.db import Document, DocumentType
|
||||
from app.services.llm_service import get_document_summary_llm
|
||||
from app.utils.document_converters import (
|
||||
create_document_chunks,
|
||||
generate_content_hash,
|
||||
generate_document_summary,
|
||||
generate_unique_identifier_hash,
|
||||
)
|
||||
|
||||
from .base import (
|
||||
check_document_by_unique_identifier,
|
||||
get_current_timestamp,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def add_circleback_meeting_document(
|
||||
session: AsyncSession,
|
||||
meeting_id: int,
|
||||
meeting_name: str,
|
||||
markdown_content: str,
|
||||
metadata: dict[str, Any],
|
||||
search_space_id: int,
|
||||
) -> Document | None:
|
||||
"""
|
||||
Process and store a Circleback meeting document.
|
||||
|
||||
Args:
|
||||
session: Database session
|
||||
meeting_id: Circleback meeting ID
|
||||
meeting_name: Name of the meeting
|
||||
markdown_content: Meeting content formatted as markdown
|
||||
metadata: Meeting metadata dictionary
|
||||
search_space_id: ID of the search space
|
||||
|
||||
Returns:
|
||||
Document object if successful, None if failed or duplicate
|
||||
"""
|
||||
try:
|
||||
# Generate unique identifier hash using Circleback meeting ID
|
||||
unique_identifier = f"circleback_{meeting_id}"
|
||||
unique_identifier_hash = generate_unique_identifier_hash(
|
||||
DocumentType.CIRCLEBACK, unique_identifier, search_space_id
|
||||
)
|
||||
|
||||
# Generate content hash
|
||||
content_hash = generate_content_hash(markdown_content, search_space_id)
|
||||
|
||||
# Check if document with this unique identifier already exists
|
||||
existing_document = await check_document_by_unique_identifier(
|
||||
session, unique_identifier_hash
|
||||
)
|
||||
|
||||
if existing_document:
|
||||
# Document exists - check if content has changed
|
||||
if existing_document.content_hash == content_hash:
|
||||
logger.info(f"Circleback meeting {meeting_id} unchanged. Skipping.")
|
||||
return existing_document
|
||||
else:
|
||||
# Content has changed - update the existing document
|
||||
logger.info(
|
||||
f"Content changed for Circleback meeting {meeting_id}. Updating document."
|
||||
)
|
||||
|
||||
# Get LLM for generating summary
|
||||
llm = await get_document_summary_llm(session, search_space_id)
|
||||
if not llm:
|
||||
logger.warning(
|
||||
f"No LLM configured for search space {search_space_id}. Using content as summary."
|
||||
)
|
||||
# Use first 1000 chars as summary if no LLM available
|
||||
summary_content = (
|
||||
markdown_content[:1000] + "..."
|
||||
if len(markdown_content) > 1000
|
||||
else markdown_content
|
||||
)
|
||||
summary_embedding = None
|
||||
else:
|
||||
# Generate summary with metadata
|
||||
document_metadata = {
|
||||
"meeting_name": meeting_name,
|
||||
"meeting_id": meeting_id,
|
||||
"document_type": "Circleback Meeting",
|
||||
**{
|
||||
k: v
|
||||
for k, v in metadata.items()
|
||||
if isinstance(v, str | int | float | bool)
|
||||
},
|
||||
}
|
||||
summary_content, summary_embedding = await generate_document_summary(
|
||||
markdown_content, llm, document_metadata
|
||||
)
|
||||
|
||||
# Process chunks
|
||||
chunks = await create_document_chunks(markdown_content)
|
||||
|
||||
# Convert to BlockNote JSON for editing capability
|
||||
from app.utils.blocknote_converter import convert_markdown_to_blocknote
|
||||
|
||||
blocknote_json = await convert_markdown_to_blocknote(markdown_content)
|
||||
if not blocknote_json:
|
||||
logger.warning(
|
||||
f"Failed to convert Circleback meeting {meeting_id} to BlockNote JSON, document will not be editable"
|
||||
)
|
||||
|
||||
# Prepare document metadata
|
||||
document_metadata = {
|
||||
"CIRCLEBACK_MEETING_ID": meeting_id,
|
||||
"MEETING_NAME": meeting_name,
|
||||
"SOURCE": "CIRCLEBACK_WEBHOOK",
|
||||
**metadata,
|
||||
}
|
||||
|
||||
# Update or create document
|
||||
if existing_document:
|
||||
# Update existing document
|
||||
existing_document.title = meeting_name
|
||||
existing_document.content = summary_content
|
||||
existing_document.content_hash = content_hash
|
||||
if summary_embedding is not None:
|
||||
existing_document.embedding = summary_embedding
|
||||
existing_document.document_metadata = document_metadata
|
||||
existing_document.chunks = chunks
|
||||
existing_document.blocknote_document = blocknote_json
|
||||
existing_document.content_needs_reindexing = False
|
||||
existing_document.updated_at = get_current_timestamp()
|
||||
|
||||
await session.commit()
|
||||
await session.refresh(existing_document)
|
||||
document = existing_document
|
||||
logger.info(
|
||||
f"Updated Circleback meeting document {meeting_id} in search space {search_space_id}"
|
||||
)
|
||||
else:
|
||||
# Create new document
|
||||
document = Document(
|
||||
search_space_id=search_space_id,
|
||||
title=meeting_name,
|
||||
document_type=DocumentType.CIRCLEBACK,
|
||||
document_metadata=document_metadata,
|
||||
content=summary_content,
|
||||
embedding=summary_embedding,
|
||||
chunks=chunks,
|
||||
content_hash=content_hash,
|
||||
unique_identifier_hash=unique_identifier_hash,
|
||||
blocknote_document=blocknote_json,
|
||||
content_needs_reindexing=False,
|
||||
updated_at=get_current_timestamp(),
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
await session.commit()
|
||||
await session.refresh(document)
|
||||
logger.info(
|
||||
f"Created new Circleback meeting document {meeting_id} in search space {search_space_id}"
|
||||
)
|
||||
|
||||
return document
|
||||
|
||||
except SQLAlchemyError as db_error:
|
||||
await session.rollback()
|
||||
logger.error(
|
||||
f"Database error processing Circleback meeting {meeting_id}: {db_error}"
|
||||
)
|
||||
raise db_error
|
||||
except Exception as e:
|
||||
await session.rollback()
|
||||
logger.error(f"Failed to process Circleback meeting {meeting_id}: {e!s}")
|
||||
raise RuntimeError(f"Failed to process Circleback meeting: {e!s}") from e
|
||||
|
|
@ -473,7 +473,8 @@ async def process_file_in_background(
|
|||
session: AsyncSession,
|
||||
task_logger: TaskLoggingService,
|
||||
log_entry: Log,
|
||||
connector: dict | None = None, # Optional: {"type": "GOOGLE_DRIVE_FILE", "metadata": {...}}
|
||||
connector: dict
|
||||
| None = None, # Optional: {"type": "GOOGLE_DRIVE_FILE", "metadata": {...}}
|
||||
):
|
||||
try:
|
||||
# Check if the file is a markdown or text file
|
||||
|
|
@ -926,7 +927,9 @@ async def process_file_in_background(
|
|||
)
|
||||
|
||||
if connector:
|
||||
await _update_document_from_connector(last_created_doc, connector, session)
|
||||
await _update_document_from_connector(
|
||||
last_created_doc, connector, session
|
||||
)
|
||||
|
||||
await task_logger.log_task_success(
|
||||
log_entry,
|
||||
|
|
@ -1053,7 +1056,9 @@ async def process_file_in_background(
|
|||
)
|
||||
|
||||
if connector:
|
||||
await _update_document_from_connector(doc_result, connector, session)
|
||||
await _update_document_from_connector(
|
||||
doc_result, connector, session
|
||||
)
|
||||
|
||||
await task_logger.log_task_success(
|
||||
log_entry,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue