chore: ran linting

This commit is contained in:
Anish Sarkar 2026-02-06 05:35:15 +05:30
parent 00a617ef17
commit aa66928154
44 changed files with 2025 additions and 1658 deletions

View file

@ -13,8 +13,6 @@ Changes:
from collections.abc import Sequence from collections.abc import Sequence
import sqlalchemy as sa
from alembic import op from alembic import op
# revision identifiers, used by Alembic. # revision identifiers, used by Alembic.
@ -77,4 +75,3 @@ def downgrade() -> None:
END$$; END$$;
""" """
) )

View file

@ -285,24 +285,28 @@ async def _analyze_gmail_messages_phase1(
if existing_document: if existing_document:
if existing_document.content_hash == content_hash: if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status, DocumentStatus.READY
):
existing_document.status = DocumentStatus.ready() existing_document.status = DocumentStatus.ready()
documents_skipped += 1 documents_skipped += 1
continue continue
# Queue existing document for update (will be set to processing in Phase 2) # Queue existing document for update (will be set to processing in Phase 2)
messages_to_process.append({ messages_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'markdown_content': markdown_content, "is_new": False,
'content_hash': content_hash, "markdown_content": markdown_content,
'message_id': message_id, "content_hash": content_hash,
'thread_id': thread_id, "message_id": message_id,
'subject': subject, "thread_id": thread_id,
'sender': sender, "subject": subject,
'date_str': date_str, "sender": sender,
'label_ids': label_ids, "date_str": date_str,
}) "label_ids": label_ids,
}
)
continue continue
# Document doesn't exist by unique_identifier_hash # Document doesn't exist by unique_identifier_hash
@ -350,18 +354,20 @@ async def _analyze_gmail_messages_phase1(
) )
session.add(document) session.add(document)
messages_to_process.append({ messages_to_process.append(
'document': document, {
'is_new': True, "document": document,
'markdown_content': markdown_content, "is_new": True,
'content_hash': content_hash, "markdown_content": markdown_content,
'message_id': message_id, "content_hash": content_hash,
'thread_id': thread_id, "message_id": message_id,
'subject': subject, "thread_id": thread_id,
'sender': sender, "subject": subject,
'date_str': date_str, "sender": sender,
'label_ids': label_ids, "date_str": date_str,
}) "label_ids": label_ids,
}
)
except Exception as e: except Exception as e:
logger.error(f"Error in Phase 1 for message: {e!s}", exc_info=True) logger.error(f"Error in Phase 1 for message: {e!s}", exc_info=True)
@ -398,7 +404,7 @@ async def _process_gmail_messages_phase2(
await on_heartbeat_callback(documents_indexed) await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
@ -411,37 +417,35 @@ async def _process_gmail_messages_phase2(
if user_llm: if user_llm:
document_metadata_for_summary = { document_metadata_for_summary = {
"message_id": item['message_id'], "message_id": item["message_id"],
"thread_id": item['thread_id'], "thread_id": item["thread_id"],
"subject": item['subject'], "subject": item["subject"],
"sender": item['sender'], "sender": item["sender"],
"document_type": "Gmail Message (Composio)", "document_type": "Gmail Message (Composio)",
} }
summary_content, summary_embedding = await generate_document_summary( summary_content, summary_embedding = await generate_document_summary(
item['markdown_content'], user_llm, document_metadata_for_summary item["markdown_content"], user_llm, document_metadata_for_summary
) )
else: else:
summary_content = ( summary_content = f"Gmail: {item['subject']}\n\nFrom: {item['sender']}\nDate: {item['date_str']}"
f"Gmail: {item['subject']}\n\nFrom: {item['sender']}\nDate: {item['date_str']}"
)
summary_embedding = config.embedding_model_instance.embed( summary_embedding = config.embedding_model_instance.embed(
summary_content summary_content
) )
chunks = await create_document_chunks(item['markdown_content']) chunks = await create_document_chunks(item["markdown_content"])
# Update document to READY with actual content # Update document to READY with actual content
document.title = item['subject'] document.title = item["subject"]
document.content = summary_content document.content = summary_content
document.content_hash = item['content_hash'] document.content_hash = item["content_hash"]
document.embedding = summary_embedding document.embedding = summary_embedding
document.document_metadata = { document.document_metadata = {
"message_id": item['message_id'], "message_id": item["message_id"],
"thread_id": item['thread_id'], "thread_id": item["thread_id"],
"subject": item['subject'], "subject": item["subject"],
"sender": item['sender'], "sender": item["sender"],
"date": item['date_str'], "date": item["date_str"],
"labels": item['label_ids'], "labels": item["label_ids"],
"connector_id": connector_id, "connector_id": connector_id,
"source": "composio", "source": "composio",
} }
@ -465,7 +469,9 @@ async def _process_gmail_messages_phase2(
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
documents_failed += 1 documents_failed += 1
continue continue
@ -571,7 +577,9 @@ async def index_composio_gmail(
) )
all_messages.extend(messages) all_messages.extend(messages)
logger.info(f"Fetched {len(messages)} messages (total: {len(all_messages)})") logger.info(
f"Fetched {len(messages)} messages (total: {len(all_messages)})"
)
if not next_token or len(messages) < current_batch_size: if not next_token or len(messages) < current_batch_size:
break break
@ -616,7 +624,7 @@ async def index_composio_gmail(
) )
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
new_documents_count = len([m for m in messages_to_process if m['is_new']]) new_documents_count = len([m for m in messages_to_process if m["is_new"]])
if new_documents_count > 0: if new_documents_count > 0:
logger.info(f"Phase 1: Committing {new_documents_count} pending documents") logger.info(f"Phase 1: Committing {new_documents_count} pending documents")
await session.commit() await session.commit()
@ -645,9 +653,7 @@ async def index_composio_gmail(
await update_connector_last_indexed(session, connector, update_last_indexed) await update_connector_last_indexed(session, connector, update_last_indexed)
# Final commit to ensure all documents are persisted # Final commit to ensure all documents are persisted
logger.info( logger.info(f"Final commit: Total {documents_indexed} Gmail messages processed")
f"Final commit: Total {documents_indexed} Gmail messages processed"
)
try: try:
await session.commit() await session.commit()
logger.info( logger.info(

View file

@ -268,7 +268,9 @@ async def index_composio_google_calendar(
documents_indexed = 0 documents_indexed = 0
documents_skipped = 0 documents_skipped = 0
documents_failed = 0 # Track events that failed processing documents_failed = 0 # Track events that failed processing
duplicate_content_count = 0 # Track events skipped due to duplicate content_hash duplicate_content_count = (
0 # Track events skipped due to duplicate content_hash
)
last_heartbeat_time = time.time() last_heartbeat_time = time.time()
# ======================================================================= # =======================================================================
@ -317,23 +319,27 @@ async def index_composio_google_calendar(
if existing_document: if existing_document:
if existing_document.content_hash == content_hash: if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status, DocumentStatus.READY
):
existing_document.status = DocumentStatus.ready() existing_document.status = DocumentStatus.ready()
documents_skipped += 1 documents_skipped += 1
continue continue
# Queue existing document for update (will be set to processing in Phase 2) # Queue existing document for update (will be set to processing in Phase 2)
events_to_process.append({ events_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'markdown_content': markdown_content, "is_new": False,
'content_hash': content_hash, "markdown_content": markdown_content,
'event_id': event_id, "content_hash": content_hash,
'summary': summary, "event_id": event_id,
'start_time': start_time, "summary": summary,
'end_time': end_time, "start_time": start_time,
'location': location, "end_time": end_time,
}) "location": location,
}
)
continue continue
# Document doesn't exist by unique_identifier_hash # Document doesn't exist by unique_identifier_hash
@ -383,17 +389,19 @@ async def index_composio_google_calendar(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
events_to_process.append({ events_to_process.append(
'document': document, {
'is_new': True, "document": document,
'markdown_content': markdown_content, "is_new": True,
'content_hash': content_hash, "markdown_content": markdown_content,
'event_id': event_id, "content_hash": content_hash,
'summary': summary, "event_id": event_id,
'start_time': start_time, "summary": summary,
'end_time': end_time, "start_time": start_time,
'location': location, "end_time": end_time,
}) "location": location,
}
)
except Exception as e: except Exception as e:
logger.error(f"Error in Phase 1 for event: {e!s}", exc_info=True) logger.error(f"Error in Phase 1 for event: {e!s}", exc_info=True)
@ -402,7 +410,9 @@ async def index_composio_google_calendar(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([e for e in events_to_process if e['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([e for e in events_to_process if e['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -419,7 +429,7 @@ async def index_composio_google_calendar(
await on_heartbeat_callback(documents_indexed) await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
@ -432,35 +442,40 @@ async def index_composio_google_calendar(
if user_llm: if user_llm:
document_metadata_for_summary = { document_metadata_for_summary = {
"event_id": item['event_id'], "event_id": item["event_id"],
"summary": item['summary'], "summary": item["summary"],
"start_time": item['start_time'], "start_time": item["start_time"],
"document_type": "Google Calendar Event (Composio)", "document_type": "Google Calendar Event (Composio)",
} }
summary_content, summary_embedding = await generate_document_summary( (
item['markdown_content'], user_llm, document_metadata_for_summary summary_content,
summary_embedding,
) = await generate_document_summary(
item["markdown_content"],
user_llm,
document_metadata_for_summary,
) )
else: else:
summary_content = f"Calendar: {item['summary']}\n\nStart: {item['start_time']}\nEnd: {item['end_time']}" summary_content = f"Calendar: {item['summary']}\n\nStart: {item['start_time']}\nEnd: {item['end_time']}"
if item['location']: if item["location"]:
summary_content += f"\nLocation: {item['location']}" summary_content += f"\nLocation: {item['location']}"
summary_embedding = config.embedding_model_instance.embed( summary_embedding = config.embedding_model_instance.embed(
summary_content summary_content
) )
chunks = await create_document_chunks(item['markdown_content']) chunks = await create_document_chunks(item["markdown_content"])
# Update document to READY with actual content # Update document to READY with actual content
document.title = item['summary'] document.title = item["summary"]
document.content = summary_content document.content = summary_content
document.content_hash = item['content_hash'] document.content_hash = item["content_hash"]
document.embedding = summary_embedding document.embedding = summary_embedding
document.document_metadata = { document.document_metadata = {
"event_id": item['event_id'], "event_id": item["event_id"],
"summary": item['summary'], "summary": item["summary"],
"start_time": item['start_time'], "start_time": item["start_time"],
"end_time": item['end_time'], "end_time": item["end_time"],
"location": item['location'], "location": item["location"],
"connector_id": connector_id, "connector_id": connector_id,
"source": "composio", "source": "composio",
} }
@ -484,7 +499,9 @@ async def index_composio_google_calendar(
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
documents_failed += 1 documents_failed += 1
continue continue

View file

@ -938,13 +938,15 @@ async def _index_composio_drive_delta_sync(
if existing_document: if existing_document:
# Queue existing document for update # Queue existing document for update
files_to_process.append({ files_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'file_id': file_id, "is_new": False,
'file_name': file_name, "file_id": file_id,
'mime_type': mime_type, "file_name": file_name,
}) "mime_type": mime_type,
}
)
continue continue
# Create new document with PENDING status # Create new document with PENDING status
@ -974,13 +976,15 @@ async def _index_composio_drive_delta_sync(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
files_to_process.append({ files_to_process.append(
'document': document, {
'is_new': True, "document": document,
'file_id': file_id, "is_new": True,
'file_name': file_name, "file_id": file_id,
'mime_type': mime_type, "file_name": file_name,
}) "mime_type": mime_type,
}
)
except Exception as e: except Exception as e:
logger.error(f"Error in Phase 1 for change: {e!s}", exc_info=True) logger.error(f"Error in Phase 1 for change: {e!s}", exc_info=True)
@ -989,7 +993,9 @@ async def _index_composio_drive_delta_sync(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([f for f in files_to_process if f['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([f for f in files_to_process if f['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -1005,7 +1011,7 @@ async def _index_composio_drive_delta_sync(
await on_heartbeat_callback(documents_indexed) await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit # Set to PROCESSING and commit
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
@ -1013,11 +1019,13 @@ async def _index_composio_drive_delta_sync(
# Get file content # Get file content
content, content_error = await composio_connector.get_drive_file_content( content, content_error = await composio_connector.get_drive_file_content(
item['file_id'], original_mime_type=item['mime_type'] item["file_id"], original_mime_type=item["mime_type"]
) )
if content_error or not content: if content_error or not content:
logger.warning(f"Could not get content for file {item['file_name']}: {content_error}") logger.warning(
f"Could not get content for file {item['file_name']}: {content_error}"
)
markdown_content = f"# {item['file_name']}\n\n" markdown_content = f"# {item['file_name']}\n\n"
markdown_content += f"**File ID:** {item['file_id']}\n" markdown_content += f"**File ID:** {item['file_id']}\n"
markdown_content += f"**Type:** {item['mime_type']}\n" markdown_content += f"**Type:** {item['mime_type']}\n"
@ -1031,9 +1039,9 @@ async def _index_composio_drive_delta_sync(
else: else:
markdown_content = await _process_file_content( markdown_content = await _process_file_content(
content=content, content=content,
file_name=item['file_name'], file_name=item["file_name"],
file_id=item['file_id'], file_id=item["file_id"],
mime_type=item['mime_type'], mime_type=item["mime_type"],
search_space_id=search_space_id, search_space_id=search_space_id,
user_id=user_id, user_id=user_id,
session=session, session=session,
@ -1045,14 +1053,14 @@ async def _index_composio_drive_delta_sync(
content_hash = generate_content_hash(markdown_content, search_space_id) content_hash = generate_content_hash(markdown_content, search_space_id)
# For existing documents, check if content changed # For existing documents, check if content changed
if not item['is_new'] and document.content_hash == content_hash: if not item["is_new"] and document.content_hash == content_hash:
if not DocumentStatus.is_state(document.status, DocumentStatus.READY): if not DocumentStatus.is_state(document.status, DocumentStatus.READY):
document.status = DocumentStatus.ready() document.status = DocumentStatus.ready()
documents_skipped += 1 documents_skipped += 1
continue continue
# Check for duplicate content hash (for new documents) # Check for duplicate content hash (for new documents)
if item['is_new']: if item["is_new"]:
with session.no_autoflush: with session.no_autoflush:
duplicate_by_content = await check_duplicate_document_by_hash( duplicate_by_content = await check_duplicate_document_by_hash(
session, content_hash session, content_hash
@ -1067,13 +1075,15 @@ async def _index_composio_drive_delta_sync(
continue continue
# Heavy processing (LLM, embeddings, chunks) # Heavy processing (LLM, embeddings, chunks)
user_llm = await get_user_long_context_llm(session, user_id, search_space_id) user_llm = await get_user_long_context_llm(
session, user_id, search_space_id
)
if user_llm: if user_llm:
document_metadata_for_summary = { document_metadata_for_summary = {
"file_id": item['file_id'], "file_id": item["file_id"],
"file_name": item['file_name'], "file_name": item["file_name"],
"mime_type": item['mime_type'], "mime_type": item["mime_type"],
"document_type": "Google Drive File (Composio)", "document_type": "Google Drive File (Composio)",
} }
summary_content, summary_embedding = await generate_document_summary( summary_content, summary_embedding = await generate_document_summary(
@ -1081,20 +1091,22 @@ async def _index_composio_drive_delta_sync(
) )
else: else:
summary_content = f"Google Drive File: {item['file_name']}\n\nType: {item['mime_type']}" summary_content = f"Google Drive File: {item['file_name']}\n\nType: {item['mime_type']}"
summary_embedding = config.embedding_model_instance.embed(summary_content) summary_embedding = config.embedding_model_instance.embed(
summary_content
)
chunks = await create_document_chunks(markdown_content) chunks = await create_document_chunks(markdown_content)
# Update document to READY # Update document to READY
document.title = item['file_name'] document.title = item["file_name"]
document.content = summary_content document.content = summary_content
document.content_hash = content_hash document.content_hash = content_hash
document.embedding = summary_embedding document.embedding = summary_embedding
document.document_metadata = { document.document_metadata = {
"file_id": item['file_id'], "file_id": item["file_id"],
"file_name": item['file_name'], "file_name": item["file_name"],
"FILE_NAME": item['file_name'], "FILE_NAME": item["file_name"],
"mime_type": item['mime_type'], "mime_type": item["mime_type"],
"connector_id": connector_id, "connector_id": connector_id,
"source": "composio", "source": "composio",
} }
@ -1117,7 +1129,9 @@ async def _index_composio_drive_delta_sync(
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
documents_failed += 1 documents_failed += 1
continue continue
@ -1329,13 +1343,15 @@ async def _index_composio_drive_full_scan(
if existing_document: if existing_document:
# Queue existing document for update (will be set to processing in Phase 2) # Queue existing document for update (will be set to processing in Phase 2)
files_to_process.append({ files_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'file_id': file_id, "is_new": False,
'file_name': file_name, "file_id": file_id,
'mime_type': mime_type, "file_name": file_name,
}) "mime_type": mime_type,
}
)
continue continue
# Create new document with PENDING status (visible in UI immediately) # Create new document with PENDING status (visible in UI immediately)
@ -1365,13 +1381,15 @@ async def _index_composio_drive_full_scan(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
files_to_process.append({ files_to_process.append(
'document': document, {
'is_new': True, "document": document,
'file_id': file_id, "is_new": True,
'file_name': file_name, "file_id": file_id,
'mime_type': mime_type, "file_name": file_name,
}) "mime_type": mime_type,
}
)
except Exception as e: except Exception as e:
logger.error(f"Error in Phase 1 for file: {e!s}", exc_info=True) logger.error(f"Error in Phase 1 for file: {e!s}", exc_info=True)
@ -1380,7 +1398,9 @@ async def _index_composio_drive_full_scan(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([f for f in files_to_process if f['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([f for f in files_to_process if f['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -1397,7 +1417,7 @@ async def _index_composio_drive_full_scan(
await on_heartbeat_callback(documents_indexed) await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
@ -1405,11 +1425,13 @@ async def _index_composio_drive_full_scan(
# Get file content (pass mime_type for Google Workspace export handling) # Get file content (pass mime_type for Google Workspace export handling)
content, content_error = await composio_connector.get_drive_file_content( content, content_error = await composio_connector.get_drive_file_content(
item['file_id'], original_mime_type=item['mime_type'] item["file_id"], original_mime_type=item["mime_type"]
) )
if content_error or not content: if content_error or not content:
logger.warning(f"Could not get content for file {item['file_name']}: {content_error}") logger.warning(
f"Could not get content for file {item['file_name']}: {content_error}"
)
markdown_content = f"# {item['file_name']}\n\n" markdown_content = f"# {item['file_name']}\n\n"
markdown_content += f"**File ID:** {item['file_id']}\n" markdown_content += f"**File ID:** {item['file_id']}\n"
markdown_content += f"**Type:** {item['mime_type']}\n" markdown_content += f"**Type:** {item['mime_type']}\n"
@ -1424,9 +1446,9 @@ async def _index_composio_drive_full_scan(
# Process content based on file type # Process content based on file type
markdown_content = await _process_file_content( markdown_content = await _process_file_content(
content=content, content=content,
file_name=item['file_name'], file_name=item["file_name"],
file_id=item['file_id'], file_id=item["file_id"],
mime_type=item['mime_type'], mime_type=item["mime_type"],
search_space_id=search_space_id, search_space_id=search_space_id,
user_id=user_id, user_id=user_id,
session=session, session=session,
@ -1438,7 +1460,7 @@ async def _index_composio_drive_full_scan(
content_hash = generate_content_hash(markdown_content, search_space_id) content_hash = generate_content_hash(markdown_content, search_space_id)
# For existing documents, check if content changed # For existing documents, check if content changed
if not item['is_new'] and document.content_hash == content_hash: if not item["is_new"] and document.content_hash == content_hash:
# Ensure status is ready # Ensure status is ready
if not DocumentStatus.is_state(document.status, DocumentStatus.READY): if not DocumentStatus.is_state(document.status, DocumentStatus.READY):
document.status = DocumentStatus.ready() document.status = DocumentStatus.ready()
@ -1446,7 +1468,7 @@ async def _index_composio_drive_full_scan(
continue continue
# Check for duplicate content hash (for new documents) # Check for duplicate content hash (for new documents)
if item['is_new']: if item["is_new"]:
with session.no_autoflush: with session.no_autoflush:
duplicate_by_content = await check_duplicate_document_by_hash( duplicate_by_content = await check_duplicate_document_by_hash(
session, content_hash session, content_hash
@ -1462,13 +1484,15 @@ async def _index_composio_drive_full_scan(
continue continue
# Heavy processing (LLM, embeddings, chunks) # Heavy processing (LLM, embeddings, chunks)
user_llm = await get_user_long_context_llm(session, user_id, search_space_id) user_llm = await get_user_long_context_llm(
session, user_id, search_space_id
)
if user_llm: if user_llm:
document_metadata_for_summary = { document_metadata_for_summary = {
"file_id": item['file_id'], "file_id": item["file_id"],
"file_name": item['file_name'], "file_name": item["file_name"],
"mime_type": item['mime_type'], "mime_type": item["mime_type"],
"document_type": "Google Drive File (Composio)", "document_type": "Google Drive File (Composio)",
} }
summary_content, summary_embedding = await generate_document_summary( summary_content, summary_embedding = await generate_document_summary(
@ -1476,20 +1500,22 @@ async def _index_composio_drive_full_scan(
) )
else: else:
summary_content = f"Google Drive File: {item['file_name']}\n\nType: {item['mime_type']}" summary_content = f"Google Drive File: {item['file_name']}\n\nType: {item['mime_type']}"
summary_embedding = config.embedding_model_instance.embed(summary_content) summary_embedding = config.embedding_model_instance.embed(
summary_content
)
chunks = await create_document_chunks(markdown_content) chunks = await create_document_chunks(markdown_content)
# Update document to READY with actual content # Update document to READY with actual content
document.title = item['file_name'] document.title = item["file_name"]
document.content = summary_content document.content = summary_content
document.content_hash = content_hash document.content_hash = content_hash
document.embedding = summary_embedding document.embedding = summary_embedding
document.document_metadata = { document.document_metadata = {
"file_id": item['file_id'], "file_id": item["file_id"],
"file_name": item['file_name'], "file_name": item["file_name"],
"FILE_NAME": item['file_name'], "FILE_NAME": item["file_name"],
"mime_type": item['mime_type'], "mime_type": item["mime_type"],
"connector_id": connector_id, "connector_id": connector_id,
"source": "composio", "source": "composio",
} }
@ -1515,7 +1541,9 @@ async def _index_composio_drive_full_scan(
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
documents_failed += 1 documents_failed += 1
continue continue

View file

@ -103,67 +103,70 @@ class PodcastStatus(str, Enum):
class DocumentStatus: class DocumentStatus:
""" """
Helper class for document processing status (stored as JSONB). Helper class for document processing status (stored as JSONB).
Status values: Status values:
- {"state": "ready"} - Document is fully processed and searchable - {"state": "ready"} - Document is fully processed and searchable
- {"state": "pending"} - Document is queued, waiting to be processed - {"state": "pending"} - Document is queued, waiting to be processed
- {"state": "processing"} - Document is currently being processed (only 1 at a time) - {"state": "processing"} - Document is currently being processed (only 1 at a time)
- {"state": "failed", "reason": "..."} - Processing failed with reason - {"state": "failed", "reason": "..."} - Processing failed with reason
Usage: Usage:
document.status = DocumentStatus.pending() document.status = DocumentStatus.pending()
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
document.status = DocumentStatus.ready() document.status = DocumentStatus.ready()
document.status = DocumentStatus.failed("LLM rate limit exceeded") document.status = DocumentStatus.failed("LLM rate limit exceeded")
""" """
# State constants # State constants
READY = "ready" READY = "ready"
PENDING = "pending" PENDING = "pending"
PROCESSING = "processing" PROCESSING = "processing"
FAILED = "failed" FAILED = "failed"
@staticmethod @staticmethod
def ready() -> dict: def ready() -> dict:
"""Return status dict for a ready/searchable document.""" """Return status dict for a ready/searchable document."""
return {"state": DocumentStatus.READY} return {"state": DocumentStatus.READY}
@staticmethod @staticmethod
def pending() -> dict: def pending() -> dict:
"""Return status dict for a document waiting to be processed.""" """Return status dict for a document waiting to be processed."""
return {"state": DocumentStatus.PENDING} return {"state": DocumentStatus.PENDING}
@staticmethod @staticmethod
def processing() -> dict: def processing() -> dict:
"""Return status dict for a document being processed.""" """Return status dict for a document being processed."""
return {"state": DocumentStatus.PROCESSING} return {"state": DocumentStatus.PROCESSING}
@staticmethod @staticmethod
def failed(reason: str, **extra_details) -> dict: def failed(reason: str, **extra_details) -> dict:
""" """
Return status dict for a failed document. Return status dict for a failed document.
Args: Args:
reason: Human-readable failure reason reason: Human-readable failure reason
**extra_details: Optional additional details (duplicate_of, error_code, etc.) **extra_details: Optional additional details (duplicate_of, error_code, etc.)
""" """
status = {"state": DocumentStatus.FAILED, "reason": reason[:500]} # Truncate long reasons status = {
"state": DocumentStatus.FAILED,
"reason": reason[:500],
} # Truncate long reasons
if extra_details: if extra_details:
status.update(extra_details) status.update(extra_details)
return status return status
@staticmethod @staticmethod
def get_state(status: dict | None) -> str | None: def get_state(status: dict | None) -> str | None:
"""Extract state from status dict, returns None if invalid.""" """Extract state from status dict, returns None if invalid."""
if status is None: if status is None:
return None return None
return status.get("state") if isinstance(status, dict) else None return status.get("state") if isinstance(status, dict) else None
@staticmethod @staticmethod
def is_state(status: dict | None, state: str) -> bool: def is_state(status: dict | None, state: str) -> bool:
"""Check if status matches a given state.""" """Check if status matches a given state."""
return DocumentStatus.get_state(status) == state return DocumentStatus.get_state(status) == state
@staticmethod @staticmethod
def get_failure_reason(status: dict | None) -> str | None: def get_failure_reason(status: dict | None) -> str | None:
"""Extract failure reason from status dict.""" """Extract failure reason from status dict."""
@ -866,7 +869,7 @@ class Document(BaseModel, TimestampMixin):
JSONB, JSONB,
nullable=False, nullable=False,
default=DocumentStatus.ready, default=DocumentStatus.ready,
server_default=text("'{\"state\": \"ready\"}'::jsonb"), server_default=text('\'{"state": "ready"}\'::jsonb'),
index=True, index=True,
) )

View file

@ -114,11 +114,11 @@ async def create_documents_file_upload(
): ):
""" """
Upload files as documents with real-time status tracking. Upload files as documents with real-time status tracking.
Implements 2-phase document status updates for real-time UI feedback: Implements 2-phase document status updates for real-time UI feedback:
- Phase 1: Create all documents with 'pending' status (visible in UI immediately via ElectricSQL) - Phase 1: Create all documents with 'pending' status (visible in UI immediately via ElectricSQL)
- Phase 2: Celery processes each file: pending processing ready/failed - Phase 2: Celery processes each file: pending processing ready/failed
Requires DOCUMENTS_CREATE permission. Requires DOCUMENTS_CREATE permission.
""" """
from datetime import datetime from datetime import datetime
@ -144,7 +144,9 @@ async def create_documents_file_upload(
raise HTTPException(status_code=400, detail="No files provided") raise HTTPException(status_code=400, detail="No files provided")
created_documents: list[Document] = [] created_documents: list[Document] = []
files_to_process: list[tuple[Document, str, str]] = [] # (document, temp_path, filename) files_to_process: list[
tuple[Document, str, str]
] = [] # (document, temp_path, filename)
skipped_duplicates = 0 skipped_duplicates = 0
# ===== PHASE 1: Create pending documents for all files ===== # ===== PHASE 1: Create pending documents for all files =====
@ -201,7 +203,9 @@ async def create_documents_file_upload(
) )
session.add(document) session.add(document)
created_documents.append(document) created_documents.append(document)
files_to_process.append((document, temp_path, file.filename or "unknown")) files_to_process.append(
(document, temp_path, file.filename or "unknown")
)
except Exception as e: except Exception as e:
raise HTTPException( raise HTTPException(
@ -348,15 +352,15 @@ async def read_documents(
created_by_name = None created_by_name = None
if doc.created_by: if doc.created_by:
created_by_name = doc.created_by.display_name or doc.created_by.email created_by_name = doc.created_by.display_name or doc.created_by.email
# Parse status from JSONB # Parse status from JSONB
status_data = None status_data = None
if hasattr(doc, 'status') and doc.status: if hasattr(doc, "status") and doc.status:
status_data = DocumentStatusSchema( status_data = DocumentStatusSchema(
state=doc.status.get("state", "ready"), state=doc.status.get("state", "ready"),
reason=doc.status.get("reason"), reason=doc.status.get("reason"),
) )
api_documents.append( api_documents.append(
DocumentRead( DocumentRead(
id=doc.id, id=doc.id,
@ -503,15 +507,15 @@ async def search_documents(
created_by_name = None created_by_name = None
if doc.created_by: if doc.created_by:
created_by_name = doc.created_by.display_name or doc.created_by.email created_by_name = doc.created_by.display_name or doc.created_by.email
# Parse status from JSONB # Parse status from JSONB
status_data = None status_data = None
if hasattr(doc, 'status') and doc.status: if hasattr(doc, "status") and doc.status:
status_data = DocumentStatusSchema( status_data = DocumentStatusSchema(
state=doc.status.get("state", "ready"), state=doc.status.get("state", "ready"),
reason=doc.status.get("reason"), reason=doc.status.get("reason"),
) )
api_documents.append( api_documents.append(
DocumentRead( DocumentRead(
id=doc.id, id=doc.id,

View file

@ -43,6 +43,7 @@ class DocumentUpdate(DocumentBase):
class DocumentStatusSchema(BaseModel): class DocumentStatusSchema(BaseModel):
"""Document processing status.""" """Document processing status."""
state: str # "ready", "processing", "failed" state: str # "ready", "processing", "failed"
reason: str | None = None reason: str | None = None
@ -59,8 +60,12 @@ class DocumentRead(BaseModel):
updated_at: datetime | None updated_at: datetime | None
search_space_id: int search_space_id: int
created_by_id: UUID | None = None # User who created/uploaded this document created_by_id: UUID | None = None # User who created/uploaded this document
created_by_name: str | None = None # Display name or email of the user who created this document created_by_name: str | None = (
status: DocumentStatusSchema | None = None # Processing status (ready, processing, failed) None # Display name or email of the user who created this document
)
status: DocumentStatusSchema | None = (
None # Processing status (ready, processing, failed)
)
model_config = ConfigDict(from_attributes=True) model_config = ConfigDict(from_attributes=True)

View file

@ -1465,11 +1465,7 @@ class ConnectorService:
issue_key = metadata.get("issue_key", "") issue_key = metadata.get("issue_key", "")
issue_title = metadata.get("issue_title", "Untitled Issue") issue_title = metadata.get("issue_title", "Untitled Issue")
status = metadata.get("status", "") status = metadata.get("status", "")
title = ( title = f"{issue_key} - {issue_title}" if issue_key else issue_title
f"{issue_key} - {issue_title}"
if issue_key
else issue_title
)
if status: if status:
title += f" ({status})" title += f" ({status})"
return title return title
@ -2387,11 +2383,7 @@ class ConnectorService:
def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str: def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
event_name = metadata.get("event_name", "Untitled Event") event_name = metadata.get("event_name", "Untitled Event")
start_time = metadata.get("start_time", "") start_time = metadata.get("start_time", "")
return ( return f"{event_name} ({start_time})" if start_time else event_name
f"{event_name} ({start_time})"
if start_time
else event_name
)
def _url_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str: def _url_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
return metadata.get("event_url", "") or "" return metadata.get("event_url", "") or ""

View file

@ -548,11 +548,11 @@ def process_file_upload_with_document_task(
): ):
""" """
Celery task to process uploaded file with existing pending document. Celery task to process uploaded file with existing pending document.
This task is used by the 2-phase document upload flow: This task is used by the 2-phase document upload flow:
- Phase 1 (API): Creates pending document (visible in UI immediately) - Phase 1 (API): Creates pending document (visible in UI immediately)
- Phase 2 (this task): Updates document status: pending processing ready/failed - Phase 2 (this task): Updates document status: pending processing ready/failed
Args: Args:
document_id: ID of the pending document created in Phase 1 document_id: ID of the pending document created in Phase 1
temp_path: Path to the uploaded file temp_path: Path to the uploaded file
@ -634,7 +634,7 @@ async def _process_file_with_document(
): ):
""" """
Process file and update existing pending document status. Process file and update existing pending document status.
This function implements Phase 2 of the 2-phase document upload: This function implements Phase 2 of the 2-phase document upload:
- Sets document status to 'processing' (shows spinner in UI) - Sets document status to 'processing' (shows spinner in UI)
- Processes the file (parsing, embedding, chunking) - Processes the file (parsing, embedding, chunking)
@ -669,11 +669,15 @@ async def _process_file_with_document(
file_size = os.path.getsize(temp_path) file_size = os.path.getsize(temp_path)
logger.info(f"[_process_file_with_document] File size: {file_size} bytes") logger.info(f"[_process_file_with_document] File size: {file_size} bytes")
except Exception as e: except Exception as e:
logger.warning(f"[_process_file_with_document] Could not get file size: {e}") logger.warning(
f"[_process_file_with_document] Could not get file size: {e}"
)
file_size = None file_size = None
# Create notification for document processing # Create notification for document processing
logger.info(f"[_process_file_with_document] Creating notification for: {filename}") logger.info(
f"[_process_file_with_document] Creating notification for: {filename}"
)
notification = ( notification = (
await NotificationService.document_processing.notify_processing_started( await NotificationService.document_processing.notify_processing_started(
session=session, session=session,
@ -822,7 +826,9 @@ async def _process_file_with_document(
if os.path.exists(temp_path): if os.path.exists(temp_path):
try: try:
os.unlink(temp_path) os.unlink(temp_path)
logger.info(f"[_process_file_with_document] Cleaned up temp file: {temp_path}") logger.info(
f"[_process_file_with_document] Cleaned up temp file: {temp_path}"
)
except Exception as cleanup_error: except Exception as cleanup_error:
logger.warning( logger.warning(
f"[_process_file_with_document] Failed to clean up temp file: {cleanup_error}" f"[_process_file_with_document] Failed to clean up temp file: {cleanup_error}"

View file

@ -154,9 +154,7 @@ async def _cleanup_stale_notifications():
f"Found {len(stale_notification_ids)} stale connector indexing notifications " f"Found {len(stale_notification_ids)} stale connector indexing notifications "
f"(no Redis heartbeat key): {stale_notification_ids}" f"(no Redis heartbeat key): {stale_notification_ids}"
) )
logger.info( logger.info(f"Connector IDs for document cleanup: {stale_connector_ids}")
f"Connector IDs for document cleanup: {stale_connector_ids}"
)
# O(1) Batch UPDATE notifications using JSONB || operator # O(1) Batch UPDATE notifications using JSONB || operator
# This merges the update data into existing notification_metadata # This merges the update data into existing notification_metadata

View file

@ -140,7 +140,9 @@ async def index_airtable_records(
log_entry, success_msg, {"bases_count": 0} log_entry, success_msg, {"bases_count": 0}
) )
# CRITICAL: Update timestamp even when no bases found so Electric SQL syncs # CRITICAL: Update timestamp even when no bases found so Electric SQL syncs
await update_connector_last_indexed(session, connector, update_last_indexed) await update_connector_last_indexed(
session, connector, update_last_indexed
)
await session.commit() await session.commit()
return 0, None # Return None (not error) when no items found return 0, None # Return None (not error) when no items found
@ -277,22 +279,28 @@ async def index_airtable_records(
# Document exists - check if content has changed # Document exists - check if content has changed
if existing_document.content_hash == content_hash: if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status = DocumentStatus.ready() existing_document.status, DocumentStatus.READY
):
existing_document.status = (
DocumentStatus.ready()
)
documents_skipped += 1 documents_skipped += 1
continue continue
# Queue existing document for update (will be set to processing in Phase 2) # Queue existing document for update (will be set to processing in Phase 2)
records_to_process.append({ records_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'markdown_content': markdown_content, "is_new": False,
'content_hash': content_hash, "markdown_content": markdown_content,
'record_id': record_id, "content_hash": content_hash,
'record': record, "record_id": record_id,
'base_name': base_name, "record": record,
'table_name': table_name, "base_name": base_name,
}) "table_name": table_name,
}
)
continue continue
# Document doesn't exist by unique_identifier_hash # Document doesn't exist by unique_identifier_hash
@ -339,25 +347,31 @@ async def index_airtable_records(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
records_to_process.append({ records_to_process.append(
'document': document, {
'is_new': True, "document": document,
'markdown_content': markdown_content, "is_new": True,
'content_hash': content_hash, "markdown_content": markdown_content,
'record_id': record_id, "content_hash": content_hash,
'record': record, "record_id": record_id,
'base_name': base_name, "record": record,
'table_name': table_name, "base_name": base_name,
}) "table_name": table_name,
}
)
except Exception as e: except Exception as e:
logger.error(f"Error in Phase 1 for record: {e!s}", exc_info=True) logger.error(
f"Error in Phase 1 for record: {e!s}", exc_info=True
)
documents_failed += 1 documents_failed += 1
continue continue
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([r for r in records_to_process if r['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([r for r in records_to_process if r['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -374,7 +388,7 @@ async def index_airtable_records(
await on_heartbeat_callback(documents_indexed) await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
@ -387,13 +401,18 @@ async def index_airtable_records(
if user_llm: if user_llm:
document_metadata_for_summary = { document_metadata_for_summary = {
"record_id": item['record_id'], "record_id": item["record_id"],
"created_time": item['record'].get("CREATED_TIME()", ""), "created_time": item["record"].get("CREATED_TIME()", ""),
"document_type": "Airtable Record", "document_type": "Airtable Record",
"connector_type": "Airtable", "connector_type": "Airtable",
} }
summary_content, summary_embedding = await generate_document_summary( (
item['markdown_content'], user_llm, document_metadata_for_summary summary_content,
summary_embedding,
) = await generate_document_summary(
item["markdown_content"],
user_llm,
document_metadata_for_summary,
) )
else: else:
# Fallback to simple summary if no LLM configured # Fallback to simple summary if no LLM configured
@ -402,18 +421,18 @@ async def index_airtable_records(
summary_content summary_content
) )
chunks = await create_document_chunks(item['markdown_content']) chunks = await create_document_chunks(item["markdown_content"])
# Update document to READY with actual content # Update document to READY with actual content
document.title = item['record_id'] document.title = item["record_id"]
document.content = summary_content document.content = summary_content
document.content_hash = item['content_hash'] document.content_hash = item["content_hash"]
document.embedding = summary_embedding document.embedding = summary_embedding
document.document_metadata = { document.document_metadata = {
"record_id": item['record_id'], "record_id": item["record_id"],
"created_time": item['record'].get("CREATED_TIME()", ""), "created_time": item["record"].get("CREATED_TIME()", ""),
"base_name": item['base_name'], "base_name": item["base_name"],
"table_name": item['table_name'], "table_name": item["table_name"],
"connector_id": connector_id, "connector_id": connector_id,
} }
safe_set_chunks(document, chunks) safe_set_chunks(document, chunks)
@ -430,13 +449,17 @@ async def index_airtable_records(
await session.commit() await session.commit()
except Exception as e: except Exception as e:
logger.error(f"Error processing Airtable record: {e!s}", exc_info=True) logger.error(
f"Error processing Airtable record: {e!s}", exc_info=True
)
# Mark document as failed with reason (visible in UI) # Mark document as failed with reason (visible in UI)
try: try:
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
documents_failed += 1 documents_failed += 1
continue continue
@ -446,7 +469,9 @@ async def index_airtable_records(
total_processed = documents_indexed total_processed = documents_indexed
# Final commit to ensure all documents are persisted (safety net) # Final commit to ensure all documents are persisted (safety net)
logger.info(f"Final commit: Total {documents_indexed} Airtable records processed") logger.info(
f"Final commit: Total {documents_indexed} Airtable records processed"
)
try: try:
await session.commit() await session.commit()
logger.info( logger.info(

View file

@ -31,29 +31,30 @@ def get_current_timestamp() -> datetime:
def safe_set_chunks(document: Document, chunks: list) -> None: def safe_set_chunks(document: Document, chunks: list) -> None:
""" """
Safely assign chunks to a document without triggering lazy loading. Safely assign chunks to a document without triggering lazy loading.
ALWAYS use this instead of `document.chunks = chunks` to avoid ALWAYS use this instead of `document.chunks = chunks` to avoid
SQLAlchemy async errors (MissingGreenlet / greenlet_spawn). SQLAlchemy async errors (MissingGreenlet / greenlet_spawn).
Why this is needed: Why this is needed:
- Direct assignment `document.chunks = chunks` triggers SQLAlchemy to - Direct assignment `document.chunks = chunks` triggers SQLAlchemy to
load the OLD chunks first (for comparison/orphan detection) load the OLD chunks first (for comparison/orphan detection)
- This lazy loading fails in async context with asyncpg driver - This lazy loading fails in async context with asyncpg driver
- set_committed_value bypasses this by setting the value directly - set_committed_value bypasses this by setting the value directly
This function is safe regardless of how the document was loaded This function is safe regardless of how the document was loaded
(with or without selectinload). (with or without selectinload).
Args: Args:
document: The Document object to update document: The Document object to update
chunks: List of Chunk objects to assign chunks: List of Chunk objects to assign
Example: Example:
# Instead of: document.chunks = chunks (DANGEROUS!) # Instead of: document.chunks = chunks (DANGEROUS!)
safe_set_chunks(document, chunks) # Always safe safe_set_chunks(document, chunks) # Always safe
""" """
from sqlalchemy.orm.attributes import set_committed_value from sqlalchemy.orm.attributes import set_committed_value
set_committed_value(document, 'chunks', chunks)
set_committed_value(document, "chunks", chunks)
async def check_duplicate_document_by_hash( async def check_duplicate_document_by_hash(

View file

@ -261,7 +261,9 @@ async def index_bookstack_pages(
# Document exists - check if content has changed # Document exists - check if content has changed
if existing_document.content_hash == content_hash: if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status, DocumentStatus.READY
):
existing_document.status = DocumentStatus.ready() existing_document.status = DocumentStatus.ready()
logger.info( logger.info(
f"Document for BookStack page {page_name} unchanged. Skipping." f"Document for BookStack page {page_name} unchanged. Skipping."
@ -270,20 +272,22 @@ async def index_bookstack_pages(
continue continue
# Queue existing document for update (will be set to processing in Phase 2) # Queue existing document for update (will be set to processing in Phase 2)
pages_to_process.append({ pages_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'page_id': page_id, "is_new": False,
'page_name': page_name, "page_id": page_id,
'page_slug': page_slug, "page_name": page_name,
'book_id': book_id, "page_slug": page_slug,
'book_slug': book_slug, "book_id": book_id,
'chapter_id': chapter_id, "book_slug": book_slug,
'page_url': page_url, "chapter_id": chapter_id,
'page_content': page_content, "page_url": page_url,
'full_content': full_content, "page_content": page_content,
'content_hash': content_hash, "full_content": full_content,
}) "content_hash": content_hash,
}
)
continue continue
# Document doesn't exist by unique_identifier_hash # Document doesn't exist by unique_identifier_hash
@ -331,20 +335,22 @@ async def index_bookstack_pages(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
pages_to_process.append({ pages_to_process.append(
'document': document, {
'is_new': True, "document": document,
'page_id': page_id, "is_new": True,
'page_name': page_name, "page_id": page_id,
'page_slug': page_slug, "page_name": page_name,
'book_id': book_id, "page_slug": page_slug,
'book_slug': book_slug, "book_id": book_id,
'chapter_id': chapter_id, "book_slug": book_slug,
'page_url': page_url, "chapter_id": chapter_id,
'page_content': page_content, "page_url": page_url,
'full_content': full_content, "page_content": page_content,
'content_hash': content_hash, "full_content": full_content,
}) "content_hash": content_hash,
}
)
except Exception as e: except Exception as e:
logger.error(f"Error in Phase 1 for page: {e!s}", exc_info=True) logger.error(f"Error in Phase 1 for page: {e!s}", exc_info=True)
@ -353,7 +359,9 @@ async def index_bookstack_pages(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([p for p in pages_to_process if p['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([p for p in pages_to_process if p['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -370,7 +378,7 @@ async def index_bookstack_pages(
await on_heartbeat_callback(documents_indexed) await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
@ -383,23 +391,23 @@ async def index_bookstack_pages(
# Build document metadata # Build document metadata
doc_metadata = { doc_metadata = {
"page_id": item['page_id'], "page_id": item["page_id"],
"page_name": item['page_name'], "page_name": item["page_name"],
"page_slug": item['page_slug'], "page_slug": item["page_slug"],
"book_id": item['book_id'], "book_id": item["book_id"],
"book_slug": item['book_slug'], "book_slug": item["book_slug"],
"chapter_id": item['chapter_id'], "chapter_id": item["chapter_id"],
"base_url": bookstack_base_url, "base_url": bookstack_base_url,
"page_url": item['page_url'], "page_url": item["page_url"],
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"connector_id": connector_id, "connector_id": connector_id,
} }
if user_llm: if user_llm:
summary_metadata = { summary_metadata = {
"page_name": item['page_name'], "page_name": item["page_name"],
"page_id": item['page_id'], "page_id": item["page_id"],
"book_id": item['book_id'], "book_id": item["book_id"],
"document_type": "BookStack Page", "document_type": "BookStack Page",
"connector_type": "BookStack", "connector_type": "BookStack",
} }
@ -407,17 +415,15 @@ async def index_bookstack_pages(
summary_content, summary_content,
summary_embedding, summary_embedding,
) = await generate_document_summary( ) = await generate_document_summary(
item['full_content'], user_llm, summary_metadata item["full_content"], user_llm, summary_metadata
) )
else: else:
# Fallback to simple summary if no LLM configured # Fallback to simple summary if no LLM configured
summary_content = ( summary_content = f"BookStack Page: {item['page_name']}\n\nBook ID: {item['book_id']}\n\n"
f"BookStack Page: {item['page_name']}\n\nBook ID: {item['book_id']}\n\n" if item["page_content"]:
)
if item['page_content']:
# Take first 1000 characters of content for summary # Take first 1000 characters of content for summary
content_preview = item['page_content'][:1000] content_preview = item["page_content"][:1000]
if len(item['page_content']) > 1000: if len(item["page_content"]) > 1000:
content_preview += "..." content_preview += "..."
summary_content += f"Content Preview: {content_preview}\n\n" summary_content += f"Content Preview: {content_preview}\n\n"
summary_embedding = config.embedding_model_instance.embed( summary_embedding = config.embedding_model_instance.embed(
@ -425,12 +431,12 @@ async def index_bookstack_pages(
) )
# Process chunks - using the full page content # Process chunks - using the full page content
chunks = await create_document_chunks(item['full_content']) chunks = await create_document_chunks(item["full_content"])
# Update document to READY with actual content # Update document to READY with actual content
document.title = item['page_name'] document.title = item["page_name"]
document.content = summary_content document.content = summary_content
document.content_hash = item['content_hash'] document.content_hash = item["content_hash"]
document.embedding = summary_embedding document.embedding = summary_embedding
document.document_metadata = doc_metadata document.document_metadata = doc_metadata
safe_set_chunks(document, chunks) safe_set_chunks(document, chunks)
@ -456,7 +462,9 @@ async def index_bookstack_pages(
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
skipped_pages.append( skipped_pages.append(
f"{item.get('page_name', 'Unknown')} (processing error)" f"{item.get('page_name', 'Unknown')} (processing error)"
) )
@ -473,7 +481,9 @@ async def index_bookstack_pages(
) )
try: try:
await session.commit() await session.commit()
logger.info("Successfully committed all BookStack document changes to database") logger.info(
"Successfully committed all BookStack document changes to database"
)
except Exception as e: except Exception as e:
# Handle any remaining integrity errors gracefully (race conditions, etc.) # Handle any remaining integrity errors gracefully (race conditions, etc.)
if ( if (

View file

@ -260,7 +260,9 @@ async def index_clickup_tasks(
# Document exists - check if content has changed # Document exists - check if content has changed
if existing_document.content_hash == content_hash: if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status, DocumentStatus.READY
):
existing_document.status = DocumentStatus.ready() existing_document.status = DocumentStatus.ready()
logger.info( logger.info(
f"Document for ClickUp task {task_name} unchanged. Skipping." f"Document for ClickUp task {task_name} unchanged. Skipping."
@ -272,22 +274,24 @@ async def index_clickup_tasks(
logger.info( logger.info(
f"Content changed for ClickUp task {task_name}. Queuing for update." f"Content changed for ClickUp task {task_name}. Queuing for update."
) )
tasks_to_process.append({ tasks_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'task_content': task_content, "is_new": False,
'content_hash': content_hash, "task_content": task_content,
'task_id': task_id, "content_hash": content_hash,
'task_name': task_name, "task_id": task_id,
'task_status': task_status, "task_name": task_name,
'task_priority': task_priority, "task_status": task_status,
'task_list_name': task_list_name, "task_priority": task_priority,
'task_space_name': task_space_name, "task_list_name": task_list_name,
'task_assignees': task_assignees, "task_space_name": task_space_name,
'task_due_date': task_due_date, "task_assignees": task_assignees,
'task_created': task_created, "task_due_date": task_due_date,
'task_updated': task_updated, "task_created": task_created,
}) "task_updated": task_updated,
}
)
continue continue
# Document doesn't exist by unique_identifier_hash # Document doesn't exist by unique_identifier_hash
@ -335,22 +339,24 @@ async def index_clickup_tasks(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
tasks_to_process.append({ tasks_to_process.append(
'document': document, {
'is_new': True, "document": document,
'task_content': task_content, "is_new": True,
'content_hash': content_hash, "task_content": task_content,
'task_id': task_id, "content_hash": content_hash,
'task_name': task_name, "task_id": task_id,
'task_status': task_status, "task_name": task_name,
'task_priority': task_priority, "task_status": task_status,
'task_list_name': task_list_name, "task_priority": task_priority,
'task_space_name': task_space_name, "task_list_name": task_list_name,
'task_assignees': task_assignees, "task_space_name": task_space_name,
'task_due_date': task_due_date, "task_assignees": task_assignees,
'task_created': task_created, "task_due_date": task_due_date,
'task_updated': task_updated, "task_created": task_created,
}) "task_updated": task_updated,
}
)
except Exception as e: except Exception as e:
logger.error( logger.error(
@ -362,7 +368,9 @@ async def index_clickup_tasks(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([t for t in tasks_to_process if t['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([t for t in tasks_to_process if t['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -379,7 +387,7 @@ async def index_clickup_tasks(
await on_heartbeat_callback(documents_indexed) await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
@ -392,13 +400,13 @@ async def index_clickup_tasks(
if user_llm: if user_llm:
document_metadata_for_summary = { document_metadata_for_summary = {
"task_id": item['task_id'], "task_id": item["task_id"],
"task_name": item['task_name'], "task_name": item["task_name"],
"task_status": item['task_status'], "task_status": item["task_status"],
"task_priority": item['task_priority'], "task_priority": item["task_priority"],
"task_list": item['task_list_name'], "task_list": item["task_list_name"],
"task_space": item['task_space_name'], "task_space": item["task_space_name"],
"assignees": len(item['task_assignees']), "assignees": len(item["task_assignees"]),
"document_type": "ClickUp Task", "document_type": "ClickUp Task",
"connector_type": "ClickUp", "connector_type": "ClickUp",
} }
@ -406,30 +414,30 @@ async def index_clickup_tasks(
summary_content, summary_content,
summary_embedding, summary_embedding,
) = await generate_document_summary( ) = await generate_document_summary(
item['task_content'], user_llm, document_metadata_for_summary item["task_content"], user_llm, document_metadata_for_summary
) )
else: else:
summary_content = item['task_content'] summary_content = item["task_content"]
summary_embedding = config.embedding_model_instance.embed( summary_embedding = config.embedding_model_instance.embed(
item['task_content'] item["task_content"]
) )
chunks = await create_document_chunks(item['task_content']) chunks = await create_document_chunks(item["task_content"])
# Update document to READY with actual content # Update document to READY with actual content
document.title = item['task_name'] document.title = item["task_name"]
document.content = summary_content document.content = summary_content
document.content_hash = item['content_hash'] document.content_hash = item["content_hash"]
document.embedding = summary_embedding document.embedding = summary_embedding
document.document_metadata = { document.document_metadata = {
"task_id": item['task_id'], "task_id": item["task_id"],
"task_name": item['task_name'], "task_name": item["task_name"],
"task_status": item['task_status'], "task_status": item["task_status"],
"task_priority": item['task_priority'], "task_priority": item["task_priority"],
"task_assignees": item['task_assignees'], "task_assignees": item["task_assignees"],
"task_due_date": item['task_due_date'], "task_due_date": item["task_due_date"],
"task_created": item['task_created'], "task_created": item["task_created"],
"task_updated": item['task_updated'], "task_updated": item["task_updated"],
"connector_id": connector_id, "connector_id": connector_id,
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
} }
@ -456,7 +464,9 @@ async def index_clickup_tasks(
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
documents_failed += 1 documents_failed += 1
continue continue

View file

@ -262,23 +262,27 @@ async def index_confluence_pages(
# Document exists - check if content has changed # Document exists - check if content has changed
if existing_document.content_hash == content_hash: if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status, DocumentStatus.READY
):
existing_document.status = DocumentStatus.ready() existing_document.status = DocumentStatus.ready()
documents_skipped += 1 documents_skipped += 1
continue continue
# Queue existing document for update (will be set to processing in Phase 2) # Queue existing document for update (will be set to processing in Phase 2)
pages_to_process.append({ pages_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'full_content': full_content, "is_new": False,
'page_content': page_content, "full_content": full_content,
'content_hash': content_hash, "page_content": page_content,
'page_id': page_id, "content_hash": content_hash,
'page_title': page_title, "page_id": page_id,
'space_id': space_id, "page_title": page_title,
'comment_count': comment_count, "space_id": space_id,
}) "comment_count": comment_count,
}
)
continue continue
# Document doesn't exist by unique_identifier_hash # Document doesn't exist by unique_identifier_hash
@ -323,17 +327,19 @@ async def index_confluence_pages(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
pages_to_process.append({ pages_to_process.append(
'document': document, {
'is_new': True, "document": document,
'full_content': full_content, "is_new": True,
'page_content': page_content, "full_content": full_content,
'content_hash': content_hash, "page_content": page_content,
'page_id': page_id, "content_hash": content_hash,
'page_title': page_title, "page_id": page_id,
'space_id': space_id, "page_title": page_title,
'comment_count': comment_count, "space_id": space_id,
}) "comment_count": comment_count,
}
)
except Exception as e: except Exception as e:
logger.error(f"Error in Phase 1 for page: {e!s}", exc_info=True) logger.error(f"Error in Phase 1 for page: {e!s}", exc_info=True)
@ -342,7 +348,9 @@ async def index_confluence_pages(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([p for p in pages_to_process if p['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([p for p in pages_to_process if p['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -359,7 +367,7 @@ async def index_confluence_pages(
await on_heartbeat_callback(documents_indexed) await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
@ -372,10 +380,10 @@ async def index_confluence_pages(
if user_llm: if user_llm:
document_metadata = { document_metadata = {
"page_title": item['page_title'], "page_title": item["page_title"],
"page_id": item['page_id'], "page_id": item["page_id"],
"space_id": item['space_id'], "space_id": item["space_id"],
"comment_count": item['comment_count'], "comment_count": item["comment_count"],
"document_type": "Confluence Page", "document_type": "Confluence Page",
"connector_type": "Confluence", "connector_type": "Confluence",
} }
@ -383,17 +391,15 @@ async def index_confluence_pages(
summary_content, summary_content,
summary_embedding, summary_embedding,
) = await generate_document_summary( ) = await generate_document_summary(
item['full_content'], user_llm, document_metadata item["full_content"], user_llm, document_metadata
) )
else: else:
# Fallback to simple summary if no LLM configured # Fallback to simple summary if no LLM configured
summary_content = ( summary_content = f"Confluence Page: {item['page_title']}\n\nSpace ID: {item['space_id']}\n\n"
f"Confluence Page: {item['page_title']}\n\nSpace ID: {item['space_id']}\n\n" if item["page_content"]:
)
if item['page_content']:
# Take first 1000 characters of content for summary # Take first 1000 characters of content for summary
content_preview = item['page_content'][:1000] content_preview = item["page_content"][:1000]
if len(item['page_content']) > 1000: if len(item["page_content"]) > 1000:
content_preview += "..." content_preview += "..."
summary_content += f"Content Preview: {content_preview}\n\n" summary_content += f"Content Preview: {content_preview}\n\n"
summary_content += f"Comments: {item['comment_count']}" summary_content += f"Comments: {item['comment_count']}"
@ -402,18 +408,18 @@ async def index_confluence_pages(
) )
# Process chunks - using the full page content with comments # Process chunks - using the full page content with comments
chunks = await create_document_chunks(item['full_content']) chunks = await create_document_chunks(item["full_content"])
# Update document to READY with actual content # Update document to READY with actual content
document.title = item['page_title'] document.title = item["page_title"]
document.content = summary_content document.content = summary_content
document.content_hash = item['content_hash'] document.content_hash = item["content_hash"]
document.embedding = summary_embedding document.embedding = summary_embedding
document.document_metadata = { document.document_metadata = {
"page_id": item['page_id'], "page_id": item["page_id"],
"page_title": item['page_title'], "page_title": item["page_title"],
"space_id": item['space_id'], "space_id": item["space_id"],
"comment_count": item['comment_count'], "comment_count": item["comment_count"],
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"connector_id": connector_id, "connector_id": connector_id,
} }
@ -440,7 +446,9 @@ async def index_confluence_pages(
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
documents_failed += 1 documents_failed += 1
continue # Skip this page and continue with others continue # Skip this page and continue with others

View file

@ -352,9 +352,7 @@ async def index_discord_messages(
try: try:
channels = await discord_client.get_text_channels(guild_id) channels = await discord_client.get_text_channels(guild_id)
if not channels: if not channels:
logger.info( logger.info(f"No channels found in guild {guild_name}. Skipping.")
f"No channels found in guild {guild_name}. Skipping."
)
skipped_channels.append(f"{guild_name} (no channels)") skipped_channels.append(f"{guild_name} (no channels)")
else: else:
for channel in channels: for channel in channels:
@ -456,25 +454,31 @@ async def index_discord_messages(
# Document exists - check if content has changed # Document exists - check if content has changed
if existing_document.content_hash == content_hash: if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status = DocumentStatus.ready() existing_document.status, DocumentStatus.READY
):
existing_document.status = (
DocumentStatus.ready()
)
documents_skipped += 1 documents_skipped += 1
continue continue
# Queue existing document for update (will be set to processing in Phase 2) # Queue existing document for update (will be set to processing in Phase 2)
messages_to_process.append({ messages_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'combined_document_string': combined_document_string, "is_new": False,
'content_hash': content_hash, "combined_document_string": combined_document_string,
'guild_name': guild_name, "content_hash": content_hash,
'guild_id': guild_id, "guild_name": guild_name,
'channel_name': channel_name, "guild_id": guild_id,
'channel_id': channel_id, "channel_name": channel_name,
'message_id': msg_id, "channel_id": channel_id,
'message_timestamp': msg_timestamp, "message_id": msg_id,
'message_user_name': msg_user_name, "message_timestamp": msg_timestamp,
}) "message_user_name": msg_user_name,
}
)
continue continue
# Document doesn't exist by unique_identifier_hash # Document doesn't exist by unique_identifier_hash
@ -522,19 +526,21 @@ async def index_discord_messages(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
messages_to_process.append({ messages_to_process.append(
'document': document, {
'is_new': True, "document": document,
'combined_document_string': combined_document_string, "is_new": True,
'content_hash': content_hash, "combined_document_string": combined_document_string,
'guild_name': guild_name, "content_hash": content_hash,
'guild_id': guild_id, "guild_name": guild_name,
'channel_name': channel_name, "guild_id": guild_id,
'channel_id': channel_id, "channel_name": channel_name,
'message_id': msg_id, "channel_id": channel_id,
'message_timestamp': msg_timestamp, "message_id": msg_id,
'message_user_name': msg_user_name, "message_timestamp": msg_timestamp,
}) "message_user_name": msg_user_name,
}
)
except Exception as e: except Exception as e:
logger.error( logger.error(
@ -547,7 +553,9 @@ async def index_discord_messages(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -564,31 +572,31 @@ async def index_discord_messages(
await on_heartbeat_callback(documents_indexed) await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
await session.commit() await session.commit()
# Heavy processing (embeddings, chunks) # Heavy processing (embeddings, chunks)
chunks = await create_document_chunks(item['combined_document_string']) chunks = await create_document_chunks(item["combined_document_string"])
doc_embedding = config.embedding_model_instance.embed( doc_embedding = config.embedding_model_instance.embed(
item['combined_document_string'] item["combined_document_string"]
) )
# Update document to READY with actual content # Update document to READY with actual content
document.title = f"{item['guild_name']}#{item['channel_name']}" document.title = f"{item['guild_name']}#{item['channel_name']}"
document.content = item['combined_document_string'] document.content = item["combined_document_string"]
document.content_hash = item['content_hash'] document.content_hash = item["content_hash"]
document.embedding = doc_embedding document.embedding = doc_embedding
document.document_metadata = { document.document_metadata = {
"guild_name": item['guild_name'], "guild_name": item["guild_name"],
"guild_id": item['guild_id'], "guild_id": item["guild_id"],
"channel_name": item['channel_name'], "channel_name": item["channel_name"],
"channel_id": item['channel_id'], "channel_id": item["channel_id"],
"message_id": item['message_id'], "message_id": item["message_id"],
"message_timestamp": item['message_timestamp'], "message_timestamp": item["message_timestamp"],
"message_user_name": item['message_user_name'], "message_user_name": item["message_user_name"],
"indexed_at": datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S"), "indexed_at": datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S"),
"connector_id": connector_id, "connector_id": connector_id,
} }
@ -612,7 +620,9 @@ async def index_discord_messages(
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
documents_failed += 1 documents_failed += 1
continue continue

View file

@ -253,7 +253,9 @@ async def index_elasticsearch_documents(
# If content is unchanged, skip. Otherwise queue for update. # If content is unchanged, skip. Otherwise queue for update.
if existing_doc.content_hash == content_hash: if existing_doc.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_doc.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_doc.status, DocumentStatus.READY
):
existing_doc.status = DocumentStatus.ready() existing_doc.status = DocumentStatus.ready()
logger.info( logger.info(
f"Skipping ES doc {doc_id} — already indexed (doc id {existing_doc.id})" f"Skipping ES doc {doc_id} — already indexed (doc id {existing_doc.id})"
@ -262,17 +264,19 @@ async def index_elasticsearch_documents(
continue continue
# Queue existing document for update (will be set to processing in Phase 2) # Queue existing document for update (will be set to processing in Phase 2)
docs_to_process.append({ docs_to_process.append(
'document': existing_doc, {
'is_new': False, "document": existing_doc,
'doc_id': doc_id, "is_new": False,
'title': title, "doc_id": doc_id,
'content': content, "title": title,
'content_hash': content_hash, "content": content,
'unique_identifier_hash': unique_identifier_hash, "content_hash": content_hash,
'hit': hit, "unique_identifier_hash": unique_identifier_hash,
'source': source, "hit": hit,
}) "source": source,
}
)
hits_collected += 1 hits_collected += 1
continue continue
@ -310,17 +314,19 @@ async def index_elasticsearch_documents(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
docs_to_process.append({ docs_to_process.append(
'document': document, {
'is_new': True, "document": document,
'doc_id': doc_id, "is_new": True,
'title': title, "doc_id": doc_id,
'content': content, "title": title,
'content_hash': content_hash, "content": content,
'unique_identifier_hash': unique_identifier_hash, "content_hash": content_hash,
'hit': hit, "unique_identifier_hash": unique_identifier_hash,
'source': source, "hit": hit,
}) "source": source,
}
)
hits_collected += 1 hits_collected += 1
except Exception as e: except Exception as e:
@ -330,7 +336,9 @@ async def index_elasticsearch_documents(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([d for d in docs_to_process if d['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([d for d in docs_to_process if d['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -347,7 +355,7 @@ async def index_elasticsearch_documents(
await on_heartbeat_callback(documents_processed) await on_heartbeat_callback(documents_processed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
@ -355,9 +363,9 @@ async def index_elasticsearch_documents(
# Build metadata # Build metadata
metadata = { metadata = {
"elasticsearch_id": item['doc_id'], "elasticsearch_id": item["doc_id"],
"elasticsearch_index": item['hit'].get("_index", index_name), "elasticsearch_index": item["hit"].get("_index", index_name),
"elasticsearch_score": item['hit'].get("_score"), "elasticsearch_score": item["hit"].get("_score"),
"indexed_at": datetime.now().isoformat(), "indexed_at": datetime.now().isoformat(),
"source": "ELASTICSEARCH_CONNECTOR", "source": "ELASTICSEARCH_CONNECTOR",
"connector_id": connector_id, "connector_id": connector_id,
@ -366,17 +374,17 @@ async def index_elasticsearch_documents(
# Add any additional metadata fields specified in config # Add any additional metadata fields specified in config
if "ELASTICSEARCH_METADATA_FIELDS" in config: if "ELASTICSEARCH_METADATA_FIELDS" in config:
for field in config["ELASTICSEARCH_METADATA_FIELDS"]: for field in config["ELASTICSEARCH_METADATA_FIELDS"]:
if field in item['source']: if field in item["source"]:
metadata[f"es_{field}"] = item['source'][field] metadata[f"es_{field}"] = item["source"][field]
# Create chunks # Create chunks
chunks = await create_document_chunks(item['content']) chunks = await create_document_chunks(item["content"])
# Update document to READY with actual content # Update document to READY with actual content
document.title = item['title'] document.title = item["title"]
document.content = item['content'] document.content = item["content"]
document.content_hash = item['content_hash'] document.content_hash = item["content_hash"]
document.unique_identifier_hash = item['unique_identifier_hash'] document.unique_identifier_hash = item["unique_identifier_hash"]
document.document_metadata = metadata document.document_metadata = metadata
safe_set_chunks(document, chunks) safe_set_chunks(document, chunks)
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
@ -399,7 +407,9 @@ async def index_elasticsearch_documents(
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
documents_failed += 1 documents_failed += 1
continue continue
@ -411,10 +421,14 @@ async def index_elasticsearch_documents(
) )
# Final commit for any remaining documents not yet committed in batches # Final commit for any remaining documents not yet committed in batches
logger.info(f"Final commit: Total {documents_processed} Elasticsearch documents processed") logger.info(
f"Final commit: Total {documents_processed} Elasticsearch documents processed"
)
try: try:
await session.commit() await session.commit()
logger.info("Successfully committed all Elasticsearch document changes to database") logger.info(
"Successfully committed all Elasticsearch document changes to database"
)
except Exception as e: except Exception as e:
# Handle any remaining integrity errors gracefully (race conditions, etc.) # Handle any remaining integrity errors gracefully (race conditions, etc.)
if ( if (

View file

@ -17,7 +17,7 @@ from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from app.config import config from app.config import config
from app.connectors.github_connector import GitHubConnector, RepositoryDigest from app.connectors.github_connector import GitHubConnector
from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
from app.services.llm_service import get_user_long_context_llm from app.services.llm_service import get_user_long_context_llm
from app.services.task_logging_service import TaskLoggingService from app.services.task_logging_service import TaskLoggingService
@ -237,7 +237,9 @@ async def index_github_repos(
# Document exists - check if content has changed # Document exists - check if content has changed
if existing_document.content_hash == content_hash: if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status, DocumentStatus.READY
):
existing_document.status = DocumentStatus.ready() existing_document.status = DocumentStatus.ready()
logger.info(f"Repository {repo_full_name} unchanged. Skipping.") logger.info(f"Repository {repo_full_name} unchanged. Skipping.")
documents_skipped += 1 documents_skipped += 1
@ -247,14 +249,16 @@ async def index_github_repos(
logger.info( logger.info(
f"Content changed for repository {repo_full_name}. Queuing for update." f"Content changed for repository {repo_full_name}. Queuing for update."
) )
repos_to_process.append({ repos_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'digest': digest, "is_new": False,
'content_hash': content_hash, "digest": digest,
'repo_full_name': repo_full_name, "content_hash": content_hash,
'unique_identifier_hash': unique_identifier_hash, "repo_full_name": repo_full_name,
}) "unique_identifier_hash": unique_identifier_hash,
}
)
continue continue
# Document doesn't exist by unique_identifier_hash # Document doesn't exist by unique_identifier_hash
@ -298,14 +302,16 @@ async def index_github_repos(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
repos_to_process.append({ repos_to_process.append(
'document': document, {
'is_new': True, "document": document,
'digest': digest, "is_new": True,
'content_hash': content_hash, "digest": digest,
'repo_full_name': repo_full_name, "content_hash": content_hash,
'unique_identifier_hash': unique_identifier_hash, "repo_full_name": repo_full_name,
}) "unique_identifier_hash": unique_identifier_hash,
}
)
except Exception as repo_err: except Exception as repo_err:
logger.error( logger.error(
@ -317,7 +323,9 @@ async def index_github_repos(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([r for r in repos_to_process if r['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([r for r in repos_to_process if r['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -334,9 +342,9 @@ async def index_github_repos(
await on_heartbeat_callback(documents_indexed) await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
digest = item['digest'] digest = item["digest"]
repo_full_name = item['repo_full_name'] repo_full_name = item["repo_full_name"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
@ -353,7 +361,9 @@ async def index_github_repos(
"document_type": "GitHub Repository", "document_type": "GitHub Repository",
"connector_type": "GitHub", "connector_type": "GitHub",
"ingestion_method": "gitingest", "ingestion_method": "gitingest",
"file_tree": digest.tree[:2000] if len(digest.tree) > 2000 else digest.tree, "file_tree": digest.tree[:2000]
if len(digest.tree) > 2000
else digest.tree,
"estimated_tokens": digest.estimated_tokens, "estimated_tokens": digest.estimated_tokens,
} }
@ -377,13 +387,17 @@ async def index_github_repos(
f"## Summary\n{digest.summary}\n\n" f"## Summary\n{digest.summary}\n\n"
f"## File Structure\n{digest.tree[:3000]}" f"## File Structure\n{digest.tree[:3000]}"
) )
summary_embedding = config.embedding_model_instance.embed(summary_text) summary_embedding = config.embedding_model_instance.embed(
summary_text
)
# Chunk the full digest content for granular search # Chunk the full digest content for granular search
try: try:
chunks_data = await create_document_chunks(digest.content) chunks_data = await create_document_chunks(digest.content)
except Exception as chunk_err: except Exception as chunk_err:
logger.error(f"Failed to chunk repository {repo_full_name}: {chunk_err}") logger.error(
f"Failed to chunk repository {repo_full_name}: {chunk_err}"
)
chunks_data = await _simple_chunk_content(digest.content) chunks_data = await _simple_chunk_content(digest.content)
# Update document to READY with actual content # Update document to READY with actual content
@ -401,7 +415,7 @@ async def index_github_repos(
document.title = repo_full_name document.title = repo_full_name
document.content = summary_text document.content = summary_text
document.content_hash = item['content_hash'] document.content_hash = item["content_hash"]
document.embedding = summary_embedding document.embedding = summary_embedding
document.document_metadata = doc_metadata document.document_metadata = doc_metadata
safe_set_chunks(document, chunks_data) safe_set_chunks(document, chunks_data)
@ -433,7 +447,9 @@ async def index_github_repos(
document.status = DocumentStatus.failed(str(repo_err)) document.status = DocumentStatus.failed(str(repo_err))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
errors.append(f"Failed processing {repo_full_name}: {repo_err}") errors.append(f"Failed processing {repo_full_name}: {repo_err}")
documents_failed += 1 documents_failed += 1
continue continue
@ -442,7 +458,9 @@ async def index_github_repos(
await update_connector_last_indexed(session, connector, update_last_indexed) await update_connector_last_indexed(session, connector, update_last_indexed)
# Final commit # Final commit
logger.info(f"Final commit: Total {documents_processed} GitHub repositories processed") logger.info(
f"Final commit: Total {documents_processed} GitHub repositories processed"
)
try: try:
await session.commit() await session.commit()
logger.info( logger.info(

View file

@ -345,25 +345,29 @@ async def index_google_calendar_events(
# Document exists - check if content has changed # Document exists - check if content has changed
if existing_document.content_hash == content_hash: if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status, DocumentStatus.READY
):
existing_document.status = DocumentStatus.ready() existing_document.status = DocumentStatus.ready()
documents_skipped += 1 documents_skipped += 1
continue continue
# Queue existing document for update (will be set to processing in Phase 2) # Queue existing document for update (will be set to processing in Phase 2)
events_to_process.append({ events_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'event_markdown': event_markdown, "is_new": False,
'content_hash': content_hash, "event_markdown": event_markdown,
'event_id': event_id, "content_hash": content_hash,
'event_summary': event_summary, "event_id": event_id,
'calendar_id': calendar_id, "event_summary": event_summary,
'start_time': start_time, "calendar_id": calendar_id,
'end_time': end_time, "start_time": start_time,
'location': location, "end_time": end_time,
'description': description, "location": location,
}) "description": description,
}
)
continue continue
# Document doesn't exist by unique_identifier_hash # Document doesn't exist by unique_identifier_hash
@ -411,19 +415,21 @@ async def index_google_calendar_events(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
events_to_process.append({ events_to_process.append(
'document': document, {
'is_new': True, "document": document,
'event_markdown': event_markdown, "is_new": True,
'content_hash': content_hash, "event_markdown": event_markdown,
'event_id': event_id, "content_hash": content_hash,
'event_summary': event_summary, "event_id": event_id,
'calendar_id': calendar_id, "event_summary": event_summary,
'start_time': start_time, "calendar_id": calendar_id,
'end_time': end_time, "start_time": start_time,
'location': location, "end_time": end_time,
'description': description, "location": location,
}) "description": description,
}
)
except Exception as e: except Exception as e:
logger.error(f"Error in Phase 1 for event: {e!s}", exc_info=True) logger.error(f"Error in Phase 1 for event: {e!s}", exc_info=True)
@ -432,7 +438,9 @@ async def index_google_calendar_events(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([e for e in events_to_process if e['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([e for e in events_to_process if e['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -449,7 +457,7 @@ async def index_google_calendar_events(
await on_heartbeat_callback(documents_indexed) await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
@ -462,48 +470,53 @@ async def index_google_calendar_events(
if user_llm: if user_llm:
document_metadata_for_summary = { document_metadata_for_summary = {
"event_id": item['event_id'], "event_id": item["event_id"],
"event_summary": item['event_summary'], "event_summary": item["event_summary"],
"calendar_id": item['calendar_id'], "calendar_id": item["calendar_id"],
"start_time": item['start_time'], "start_time": item["start_time"],
"end_time": item['end_time'], "end_time": item["end_time"],
"location": item['location'] or "No location", "location": item["location"] or "No location",
"document_type": "Google Calendar Event", "document_type": "Google Calendar Event",
"connector_type": "Google Calendar", "connector_type": "Google Calendar",
} }
summary_content, summary_embedding = await generate_document_summary( (
item['event_markdown'], user_llm, document_metadata_for_summary summary_content,
summary_embedding,
) = await generate_document_summary(
item["event_markdown"], user_llm, document_metadata_for_summary
) )
else: else:
summary_content = f"Google Calendar Event: {item['event_summary']}\n\n" summary_content = (
f"Google Calendar Event: {item['event_summary']}\n\n"
)
summary_content += f"Calendar: {item['calendar_id']}\n" summary_content += f"Calendar: {item['calendar_id']}\n"
summary_content += f"Start: {item['start_time']}\n" summary_content += f"Start: {item['start_time']}\n"
summary_content += f"End: {item['end_time']}\n" summary_content += f"End: {item['end_time']}\n"
if item['location']: if item["location"]:
summary_content += f"Location: {item['location']}\n" summary_content += f"Location: {item['location']}\n"
if item['description']: if item["description"]:
desc_preview = item['description'][:1000] desc_preview = item["description"][:1000]
if len(item['description']) > 1000: if len(item["description"]) > 1000:
desc_preview += "..." desc_preview += "..."
summary_content += f"Description: {desc_preview}\n" summary_content += f"Description: {desc_preview}\n"
summary_embedding = config.embedding_model_instance.embed( summary_embedding = config.embedding_model_instance.embed(
summary_content summary_content
) )
chunks = await create_document_chunks(item['event_markdown']) chunks = await create_document_chunks(item["event_markdown"])
# Update document to READY with actual content # Update document to READY with actual content
document.title = item['event_summary'] document.title = item["event_summary"]
document.content = summary_content document.content = summary_content
document.content_hash = item['content_hash'] document.content_hash = item["content_hash"]
document.embedding = summary_embedding document.embedding = summary_embedding
document.document_metadata = { document.document_metadata = {
"event_id": item['event_id'], "event_id": item["event_id"],
"event_summary": item['event_summary'], "event_summary": item["event_summary"],
"calendar_id": item['calendar_id'], "calendar_id": item["calendar_id"],
"start_time": item['start_time'], "start_time": item["start_time"],
"end_time": item['end_time'], "end_time": item["end_time"],
"location": item['location'], "location": item["location"],
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"connector_id": connector_id, "connector_id": connector_id,
} }
@ -527,7 +540,9 @@ async def index_google_calendar_events(
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
documents_failed += 1 documents_failed += 1
continue continue

View file

@ -435,7 +435,7 @@ async def _index_full_scan(
on_heartbeat_callback: HeartbeatCallbackType | None = None, on_heartbeat_callback: HeartbeatCallbackType | None = None,
) -> tuple[int, int]: ) -> tuple[int, int]:
"""Perform full scan indexing of a folder. """Perform full scan indexing of a folder.
Implements 2-phase document status updates for real-time UI feedback: Implements 2-phase document status updates for real-time UI feedback:
- Phase 1: Collect all files and create pending documents (visible in UI immediately) - Phase 1: Collect all files and create pending documents (visible in UI immediately)
- Phase 2: Process each file: pending processing ready/failed - Phase 2: Process each file: pending processing ready/failed
@ -533,7 +533,9 @@ async def _index_full_scan(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([f for f in files_to_process if f[1] and f[1].id is None])} pending documents") logger.info(
f"Phase 1: Committing {len([f for f in files_to_process if f[1] and f[1].id is None])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -568,9 +570,7 @@ async def _index_full_scan(
if documents_indexed % 10 == 0 and documents_indexed > 0: if documents_indexed % 10 == 0 and documents_indexed > 0:
await session.commit() await session.commit()
logger.info( logger.info(f"Committed batch: {documents_indexed} files indexed so far")
f"Committed batch: {documents_indexed} files indexed so far"
)
logger.info( logger.info(
f"Full scan complete: {documents_indexed} indexed, {documents_skipped} skipped, {documents_failed} failed" f"Full scan complete: {documents_indexed} indexed, {documents_skipped} skipped, {documents_failed} failed"
@ -597,7 +597,7 @@ async def _index_with_delta_sync(
Note: include_subfolders is accepted for API consistency but delta sync Note: include_subfolders is accepted for API consistency but delta sync
automatically tracks changes across all folders including subfolders. automatically tracks changes across all folders including subfolders.
Implements 2-phase document status updates for real-time UI feedback: Implements 2-phase document status updates for real-time UI feedback:
- Phase 1: Collect all changes and create pending documents (visible in UI immediately) - Phase 1: Collect all changes and create pending documents (visible in UI immediately)
- Phase 2: Process each file: pending processing ready/failed - Phase 2: Process each file: pending processing ready/failed
@ -676,7 +676,7 @@ async def _index_with_delta_sync(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing pending documents") logger.info("Phase 1: Committing pending documents")
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -685,7 +685,7 @@ async def _index_with_delta_sync(
# ======================================================================= # =======================================================================
logger.info(f"Phase 2: Processing {len(changes_to_process)} changes") logger.info(f"Phase 2: Processing {len(changes_to_process)} changes")
for change, file, pending_doc in changes_to_process: for _, file, pending_doc in changes_to_process:
# Check if it's time for a heartbeat update # Check if it's time for a heartbeat update
if on_heartbeat_callback: if on_heartbeat_callback:
current_time = time.time() current_time = time.time()
@ -728,17 +728,17 @@ async def _create_pending_document_for_file(
) -> tuple[Document | None, bool]: ) -> tuple[Document | None, bool]:
""" """
Create a pending document for a Google Drive file if it doesn't exist. Create a pending document for a Google Drive file if it doesn't exist.
This is Phase 1 of the 2-phase document status update pattern. This is Phase 1 of the 2-phase document status update pattern.
Creates documents with 'pending' status so they appear in UI immediately. Creates documents with 'pending' status so they appear in UI immediately.
Args: Args:
session: Database session session: Database session
file: File metadata from Google Drive API file: File metadata from Google Drive API
connector_id: ID of the Drive connector connector_id: ID of the Drive connector
search_space_id: ID of the search space search_space_id: ID of the search space
user_id: ID of the user user_id: ID of the user
Returns: Returns:
Tuple of (document, should_skip): Tuple of (document, should_skip):
- (existing_doc, False): Existing document that needs update - (existing_doc, False): Existing document that needs update
@ -746,28 +746,28 @@ async def _create_pending_document_for_file(
- (None, True): File should be skipped (unchanged, rename-only, or folder) - (None, True): File should be skipped (unchanged, rename-only, or folder)
""" """
from app.connectors.google_drive.file_types import should_skip_file from app.connectors.google_drive.file_types import should_skip_file
file_id = file.get("id") file_id = file.get("id")
file_name = file.get("name", "Unknown") file_name = file.get("name", "Unknown")
mime_type = file.get("mimeType", "") mime_type = file.get("mimeType", "")
# Skip folders and shortcuts # Skip folders and shortcuts
if should_skip_file(mime_type): if should_skip_file(mime_type):
return None, True return None, True
if not file_id: if not file_id:
return None, True return None, True
# Generate unique identifier hash for this file # Generate unique identifier hash for this file
unique_identifier_hash = generate_unique_identifier_hash( unique_identifier_hash = generate_unique_identifier_hash(
DocumentType.GOOGLE_DRIVE_FILE, file_id, search_space_id DocumentType.GOOGLE_DRIVE_FILE, file_id, search_space_id
) )
# Check if document exists # Check if document exists
existing_document = await check_document_by_unique_identifier( existing_document = await check_document_by_unique_identifier(
session, unique_identifier_hash session, unique_identifier_hash
) )
if existing_document: if existing_document:
# Check if this is a rename-only update (content unchanged) # Check if this is a rename-only update (content unchanged)
incoming_md5 = file.get("md5Checksum") incoming_md5 = file.get("md5Checksum")
@ -775,7 +775,7 @@ async def _create_pending_document_for_file(
doc_metadata = existing_document.document_metadata or {} doc_metadata = existing_document.document_metadata or {}
stored_md5 = doc_metadata.get("md5_checksum") stored_md5 = doc_metadata.get("md5_checksum")
stored_modified_time = doc_metadata.get("modified_time") stored_modified_time = doc_metadata.get("modified_time")
# Determine if content changed # Determine if content changed
content_unchanged = False content_unchanged = False
if incoming_md5 and stored_md5: if incoming_md5 and stored_md5:
@ -783,16 +783,18 @@ async def _create_pending_document_for_file(
elif not incoming_md5 and incoming_modified_time and stored_modified_time: elif not incoming_md5 and incoming_modified_time and stored_modified_time:
# Google Workspace file - use modifiedTime as fallback # Google Workspace file - use modifiedTime as fallback
content_unchanged = incoming_modified_time == stored_modified_time content_unchanged = incoming_modified_time == stored_modified_time
if content_unchanged: if content_unchanged:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status, DocumentStatus.READY
):
existing_document.status = DocumentStatus.ready() existing_document.status = DocumentStatus.ready()
return None, True return None, True
# Content changed - return existing document for update # Content changed - return existing document for update
return existing_document, False return existing_document, False
# Create new pending document # Create new pending document
document = Document( document = Document(
search_space_id=search_space_id, search_space_id=search_space_id,
@ -815,7 +817,7 @@ async def _create_pending_document_for_file(
connector_id=connector_id, connector_id=connector_id,
) )
session.add(document) session.add(document)
return document, False return document, False
@ -958,7 +960,7 @@ async def _process_single_file(
) -> tuple[int, int, int]: ) -> tuple[int, int, int]:
""" """
Process a single file by downloading and using Surfsense's file processor. Process a single file by downloading and using Surfsense's file processor.
Implements Phase 2 of the 2-phase document status update pattern. Implements Phase 2 of the 2-phase document status update pattern.
Updates document status: pending processing ready/failed Updates document status: pending processing ready/failed
@ -1042,12 +1044,13 @@ async def _process_single_file(
processed_doc = await check_document_by_unique_identifier( processed_doc = await check_document_by_unique_identifier(
session, unique_identifier_hash session, unique_identifier_hash
) )
if processed_doc: # Ensure status is READY
# Ensure status is READY if processed_doc and not DocumentStatus.is_state(
if not DocumentStatus.is_state(processed_doc.status, DocumentStatus.READY): processed_doc.status, DocumentStatus.READY
processed_doc.status = DocumentStatus.ready() ):
processed_doc.updated_at = get_current_timestamp() processed_doc.status = DocumentStatus.ready()
await session.commit() processed_doc.updated_at = get_current_timestamp()
await session.commit()
logger.info(f"Successfully indexed Google Drive file: {file_name}") logger.info(f"Successfully indexed Google Drive file: {file_name}")
return 1, 0, 0 return 1, 0, 0
@ -1061,7 +1064,9 @@ async def _process_single_file(
pending_document.updated_at = get_current_timestamp() pending_document.updated_at = get_current_timestamp()
await session.commit() await session.commit()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
return 0, 0, 1 return 0, 0, 1

View file

@ -228,7 +228,9 @@ async def index_google_gmail_messages(
documents_indexed = 0 documents_indexed = 0
documents_skipped = 0 documents_skipped = 0
documents_failed = 0 # Track messages that failed processing documents_failed = 0 # Track messages that failed processing
duplicate_content_count = 0 # Track messages skipped due to duplicate content_hash duplicate_content_count = (
0 # Track messages skipped due to duplicate content_hash
)
# Heartbeat tracking - update notification periodically to prevent appearing stuck # Heartbeat tracking - update notification periodically to prevent appearing stuck
last_heartbeat_time = time.time() last_heartbeat_time = time.time()
@ -294,23 +296,27 @@ async def index_google_gmail_messages(
# Document exists - check if content has changed # Document exists - check if content has changed
if existing_document.content_hash == content_hash: if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status, DocumentStatus.READY
):
existing_document.status = DocumentStatus.ready() existing_document.status = DocumentStatus.ready()
documents_skipped += 1 documents_skipped += 1
continue continue
# Queue existing document for update (will be set to processing in Phase 2) # Queue existing document for update (will be set to processing in Phase 2)
messages_to_process.append({ messages_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'markdown_content': markdown_content, "is_new": False,
'content_hash': content_hash, "markdown_content": markdown_content,
'message_id': message_id, "content_hash": content_hash,
'thread_id': thread_id, "message_id": message_id,
'subject': subject, "thread_id": thread_id,
'sender': sender, "subject": subject,
'date_str': date_str, "sender": sender,
}) "date_str": date_str,
}
)
continue continue
# Document doesn't exist by unique_identifier_hash # Document doesn't exist by unique_identifier_hash
@ -356,17 +362,19 @@ async def index_google_gmail_messages(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
messages_to_process.append({ messages_to_process.append(
'document': document, {
'is_new': True, "document": document,
'markdown_content': markdown_content, "is_new": True,
'content_hash': content_hash, "markdown_content": markdown_content,
'message_id': message_id, "content_hash": content_hash,
'thread_id': thread_id, "message_id": message_id,
'subject': subject, "thread_id": thread_id,
'sender': sender, "subject": subject,
'date_str': date_str, "sender": sender,
}) "date_str": date_str,
}
)
except Exception as e: except Exception as e:
logger.error(f"Error in Phase 1 for message: {e!s}", exc_info=True) logger.error(f"Error in Phase 1 for message: {e!s}", exc_info=True)
@ -375,7 +383,9 @@ async def index_google_gmail_messages(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -392,7 +402,7 @@ async def index_google_gmail_messages(
await on_heartbeat_callback(documents_indexed) await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
@ -405,16 +415,21 @@ async def index_google_gmail_messages(
if user_llm: if user_llm:
document_metadata_for_summary = { document_metadata_for_summary = {
"message_id": item['message_id'], "message_id": item["message_id"],
"thread_id": item['thread_id'], "thread_id": item["thread_id"],
"subject": item['subject'], "subject": item["subject"],
"sender": item['sender'], "sender": item["sender"],
"date": item['date_str'], "date": item["date_str"],
"document_type": "Gmail Message", "document_type": "Gmail Message",
"connector_type": "Google Gmail", "connector_type": "Google Gmail",
} }
summary_content, summary_embedding = await generate_document_summary( (
item['markdown_content'], user_llm, document_metadata_for_summary summary_content,
summary_embedding,
) = await generate_document_summary(
item["markdown_content"],
user_llm,
document_metadata_for_summary,
) )
else: else:
summary_content = f"Google Gmail Message: {item['subject']}\n\n" summary_content = f"Google Gmail Message: {item['subject']}\n\n"
@ -424,19 +439,19 @@ async def index_google_gmail_messages(
summary_content summary_content
) )
chunks = await create_document_chunks(item['markdown_content']) chunks = await create_document_chunks(item["markdown_content"])
# Update document to READY with actual content # Update document to READY with actual content
document.title = item['subject'] document.title = item["subject"]
document.content = summary_content document.content = summary_content
document.content_hash = item['content_hash'] document.content_hash = item["content_hash"]
document.embedding = summary_embedding document.embedding = summary_embedding
document.document_metadata = { document.document_metadata = {
"message_id": item['message_id'], "message_id": item["message_id"],
"thread_id": item['thread_id'], "thread_id": item["thread_id"],
"subject": item['subject'], "subject": item["subject"],
"sender": item['sender'], "sender": item["sender"],
"date": item['date_str'], "date": item["date_str"],
"connector_id": connector_id, "connector_id": connector_id,
} }
safe_set_chunks(document, chunks) safe_set_chunks(document, chunks)
@ -459,7 +474,9 @@ async def index_google_gmail_messages(
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
documents_failed += 1 documents_failed += 1
continue continue

View file

@ -239,23 +239,27 @@ async def index_jira_issues(
# Document exists - check if content has changed # Document exists - check if content has changed
if existing_document.content_hash == content_hash: if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status, DocumentStatus.READY
):
existing_document.status = DocumentStatus.ready() existing_document.status = DocumentStatus.ready()
documents_skipped += 1 documents_skipped += 1
continue continue
# Queue existing document for update (will be set to processing in Phase 2) # Queue existing document for update (will be set to processing in Phase 2)
issues_to_process.append({ issues_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'issue_content': issue_content, "is_new": False,
'content_hash': content_hash, "issue_content": issue_content,
'issue_id': issue_id, "content_hash": content_hash,
'issue_identifier': issue_identifier, "issue_id": issue_id,
'issue_title': issue_title, "issue_identifier": issue_identifier,
'formatted_issue': formatted_issue, "issue_title": issue_title,
'comment_count': comment_count, "formatted_issue": formatted_issue,
}) "comment_count": comment_count,
}
)
continue continue
# Document doesn't exist by unique_identifier_hash # Document doesn't exist by unique_identifier_hash
@ -301,17 +305,19 @@ async def index_jira_issues(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
issues_to_process.append({ issues_to_process.append(
'document': document, {
'is_new': True, "document": document,
'issue_content': issue_content, "is_new": True,
'content_hash': content_hash, "issue_content": issue_content,
'issue_id': issue_id, "content_hash": content_hash,
'issue_identifier': issue_identifier, "issue_id": issue_id,
'issue_title': issue_title, "issue_identifier": issue_identifier,
'formatted_issue': formatted_issue, "issue_title": issue_title,
'comment_count': comment_count, "formatted_issue": formatted_issue,
}) "comment_count": comment_count,
}
)
except Exception as e: except Exception as e:
logger.error(f"Error in Phase 1 for issue: {e!s}", exc_info=True) logger.error(f"Error in Phase 1 for issue: {e!s}", exc_info=True)
@ -320,7 +326,9 @@ async def index_jira_issues(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([i for i in issues_to_process if i['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([i for i in issues_to_process if i['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -337,7 +345,7 @@ async def index_jira_issues(
await on_heartbeat_callback(documents_indexed) await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
@ -350,11 +358,11 @@ async def index_jira_issues(
if user_llm: if user_llm:
document_metadata = { document_metadata = {
"issue_key": item['issue_identifier'], "issue_key": item["issue_identifier"],
"issue_title": item['issue_title'], "issue_title": item["issue_title"],
"status": item['formatted_issue'].get("status", "Unknown"), "status": item["formatted_issue"].get("status", "Unknown"),
"priority": item['formatted_issue'].get("priority", "Unknown"), "priority": item["formatted_issue"].get("priority", "Unknown"),
"comment_count": item['comment_count'], "comment_count": item["comment_count"],
"document_type": "Jira Issue", "document_type": "Jira Issue",
"connector_type": "Jira", "connector_type": "Jira",
} }
@ -362,34 +370,32 @@ async def index_jira_issues(
summary_content, summary_content,
summary_embedding, summary_embedding,
) = await generate_document_summary( ) = await generate_document_summary(
item['issue_content'], user_llm, document_metadata item["issue_content"], user_llm, document_metadata
) )
else: else:
# Fallback to simple summary if no LLM configured # Fallback to simple summary if no LLM configured
summary_content = f"Jira Issue {item['issue_identifier']}: {item['issue_title']}\n\nStatus: {item['formatted_issue'].get('status', 'Unknown')}\n\n" summary_content = f"Jira Issue {item['issue_identifier']}: {item['issue_title']}\n\nStatus: {item['formatted_issue'].get('status', 'Unknown')}\n\n"
if item['formatted_issue'].get("description"): if item["formatted_issue"].get("description"):
summary_content += ( summary_content += f"Description: {item['formatted_issue'].get('description')}\n\n"
f"Description: {item['formatted_issue'].get('description')}\n\n"
)
summary_content += f"Comments: {item['comment_count']}" summary_content += f"Comments: {item['comment_count']}"
summary_embedding = config.embedding_model_instance.embed( summary_embedding = config.embedding_model_instance.embed(
summary_content summary_content
) )
# Process chunks - using the full issue content with comments # Process chunks - using the full issue content with comments
chunks = await create_document_chunks(item['issue_content']) chunks = await create_document_chunks(item["issue_content"])
# Update document to READY with actual content # Update document to READY with actual content
document.title = f"{item['issue_identifier']}: {item['issue_title']}" document.title = f"{item['issue_identifier']}: {item['issue_title']}"
document.content = summary_content document.content = summary_content
document.content_hash = item['content_hash'] document.content_hash = item["content_hash"]
document.embedding = summary_embedding document.embedding = summary_embedding
document.document_metadata = { document.document_metadata = {
"issue_id": item['issue_id'], "issue_id": item["issue_id"],
"issue_identifier": item['issue_identifier'], "issue_identifier": item["issue_identifier"],
"issue_title": item['issue_title'], "issue_title": item["issue_title"],
"state": item['formatted_issue'].get("status", "Unknown"), "state": item["formatted_issue"].get("status", "Unknown"),
"comment_count": item['comment_count'], "comment_count": item["comment_count"],
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"connector_id": connector_id, "connector_id": connector_id,
} }
@ -416,7 +422,9 @@ async def index_jira_issues(
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
documents_failed += 1 documents_failed += 1
continue # Skip this issue and continue with others continue # Skip this issue and continue with others

View file

@ -272,7 +272,9 @@ async def index_linear_issues(
# Document exists - check if content has changed # Document exists - check if content has changed
if existing_document.content_hash == content_hash: if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status, DocumentStatus.READY
):
existing_document.status = DocumentStatus.ready() existing_document.status = DocumentStatus.ready()
logger.info( logger.info(
f"Document for Linear issue {issue_identifier} unchanged. Skipping." f"Document for Linear issue {issue_identifier} unchanged. Skipping."
@ -281,19 +283,21 @@ async def index_linear_issues(
continue continue
# Queue existing document for update (will be set to processing in Phase 2) # Queue existing document for update (will be set to processing in Phase 2)
issues_to_process.append({ issues_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'issue_content': issue_content, "is_new": False,
'content_hash': content_hash, "issue_content": issue_content,
'issue_id': issue_id, "content_hash": content_hash,
'issue_identifier': issue_identifier, "issue_id": issue_id,
'issue_title': issue_title, "issue_identifier": issue_identifier,
'state': state, "issue_title": issue_title,
'description': description, "state": state,
'comment_count': comment_count, "description": description,
'priority': priority, "comment_count": comment_count,
}) "priority": priority,
}
)
continue continue
# Document doesn't exist by unique_identifier_hash # Document doesn't exist by unique_identifier_hash
@ -338,19 +342,21 @@ async def index_linear_issues(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
issues_to_process.append({ issues_to_process.append(
'document': document, {
'is_new': True, "document": document,
'issue_content': issue_content, "is_new": True,
'content_hash': content_hash, "issue_content": issue_content,
'issue_id': issue_id, "content_hash": content_hash,
'issue_identifier': issue_identifier, "issue_id": issue_id,
'issue_title': issue_title, "issue_identifier": issue_identifier,
'state': state, "issue_title": issue_title,
'description': description, "state": state,
'comment_count': comment_count, "description": description,
'priority': priority, "comment_count": comment_count,
}) "priority": priority,
}
)
except Exception as e: except Exception as e:
logger.error(f"Error in Phase 1 for issue: {e!s}", exc_info=True) logger.error(f"Error in Phase 1 for issue: {e!s}", exc_info=True)
@ -359,7 +365,9 @@ async def index_linear_issues(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([i for i in issues_to_process if i['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([i for i in issues_to_process if i['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -376,7 +384,7 @@ async def index_linear_issues(
await on_heartbeat_callback(documents_indexed) await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
@ -389,20 +397,23 @@ async def index_linear_issues(
if user_llm: if user_llm:
document_metadata_for_summary = { document_metadata_for_summary = {
"issue_id": item['issue_identifier'], "issue_id": item["issue_identifier"],
"issue_title": item['issue_title'], "issue_title": item["issue_title"],
"state": item['state'], "state": item["state"],
"priority": item['priority'], "priority": item["priority"],
"comment_count": item['comment_count'], "comment_count": item["comment_count"],
"document_type": "Linear Issue", "document_type": "Linear Issue",
"connector_type": "Linear", "connector_type": "Linear",
} }
summary_content, summary_embedding = await generate_document_summary( (
item['issue_content'], user_llm, document_metadata_for_summary summary_content,
summary_embedding,
) = await generate_document_summary(
item["issue_content"], user_llm, document_metadata_for_summary
) )
else: else:
# Fallback to simple summary if no LLM configured # Fallback to simple summary if no LLM configured
description = item['description'] description = item["description"]
if description and len(description) > 1000: if description and len(description) > 1000:
description = description[:997] + "..." description = description[:997] + "..."
summary_content = f"Linear Issue {item['issue_identifier']}: {item['issue_title']}\n\nStatus: {item['state']}\n\n" summary_content = f"Linear Issue {item['issue_identifier']}: {item['issue_title']}\n\nStatus: {item['state']}\n\n"
@ -413,19 +424,19 @@ async def index_linear_issues(
summary_content summary_content
) )
chunks = await create_document_chunks(item['issue_content']) chunks = await create_document_chunks(item["issue_content"])
# Update document to READY with actual content # Update document to READY with actual content
document.title = f"{item['issue_identifier']}: {item['issue_title']}" document.title = f"{item['issue_identifier']}: {item['issue_title']}"
document.content = summary_content document.content = summary_content
document.content_hash = item['content_hash'] document.content_hash = item["content_hash"]
document.embedding = summary_embedding document.embedding = summary_embedding
document.document_metadata = { document.document_metadata = {
"issue_id": item['issue_id'], "issue_id": item["issue_id"],
"issue_identifier": item['issue_identifier'], "issue_identifier": item["issue_identifier"],
"issue_title": item['issue_title'], "issue_title": item["issue_title"],
"state": item['state'], "state": item["state"],
"comment_count": item['comment_count'], "comment_count": item["comment_count"],
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"connector_id": connector_id, "connector_id": connector_id,
} }
@ -452,7 +463,9 @@ async def index_linear_issues(
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
skipped_issues.append( skipped_issues.append(
f"{item.get('issue_identifier', 'Unknown')} (processing error)" f"{item.get('issue_identifier', 'Unknown')} (processing error)"
) )
@ -466,7 +479,9 @@ async def index_linear_issues(
logger.info(f"Final commit: Total {documents_indexed} Linear issues processed") logger.info(f"Final commit: Total {documents_indexed} Linear issues processed")
try: try:
await session.commit() await session.commit()
logger.info("Successfully committed all Linear document changes to database") logger.info(
"Successfully committed all Linear document changes to database"
)
except Exception as e: except Exception as e:
# Handle any remaining integrity errors gracefully (race conditions, etc.) # Handle any remaining integrity errors gracefully (race conditions, etc.)
if ( if (

View file

@ -305,7 +305,9 @@ async def index_luma_events(
# Document exists - check if content has changed # Document exists - check if content has changed
if existing_document.content_hash == content_hash: if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status, DocumentStatus.READY
):
existing_document.status = DocumentStatus.ready() existing_document.status = DocumentStatus.ready()
logger.info( logger.info(
f"Document for Luma event {event_name} unchanged. Skipping." f"Document for Luma event {event_name} unchanged. Skipping."
@ -314,23 +316,25 @@ async def index_luma_events(
continue continue
# Queue existing document for update (will be set to processing in Phase 2) # Queue existing document for update (will be set to processing in Phase 2)
events_to_process.append({ events_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'event_id': event_id, "is_new": False,
'event_name': event_name, "event_id": event_id,
'event_url': event_url, "event_name": event_name,
'event_markdown': event_markdown, "event_url": event_url,
'content_hash': content_hash, "event_markdown": event_markdown,
'start_at': start_at, "content_hash": content_hash,
'end_at': end_at, "start_at": start_at,
'timezone': timezone, "end_at": end_at,
'location': location, "timezone": timezone,
'city': city, "location": location,
'host_names': host_names, "city": city,
'description': description, "host_names": host_names,
'cover_url': cover_url, "description": description,
}) "cover_url": cover_url,
}
)
continue continue
# Document doesn't exist by unique_identifier_hash # Document doesn't exist by unique_identifier_hash
@ -380,23 +384,25 @@ async def index_luma_events(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
events_to_process.append({ events_to_process.append(
'document': document, {
'is_new': True, "document": document,
'event_id': event_id, "is_new": True,
'event_name': event_name, "event_id": event_id,
'event_url': event_url, "event_name": event_name,
'event_markdown': event_markdown, "event_url": event_url,
'content_hash': content_hash, "event_markdown": event_markdown,
'start_at': start_at, "content_hash": content_hash,
'end_at': end_at, "start_at": start_at,
'timezone': timezone, "end_at": end_at,
'location': location, "timezone": timezone,
'city': city, "location": location,
'host_names': host_names, "city": city,
'description': description, "host_names": host_names,
'cover_url': cover_url, "description": description,
}) "cover_url": cover_url,
}
)
except Exception as e: except Exception as e:
logger.error(f"Error in Phase 1 for event: {e!s}", exc_info=True) logger.error(f"Error in Phase 1 for event: {e!s}", exc_info=True)
@ -405,7 +411,9 @@ async def index_luma_events(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([e for e in events_to_process if e['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([e for e in events_to_process if e['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -422,7 +430,7 @@ async def index_luma_events(
await on_heartbeat_callback(documents_indexed) await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
@ -435,15 +443,15 @@ async def index_luma_events(
if user_llm: if user_llm:
document_metadata_for_summary = { document_metadata_for_summary = {
"event_id": item['event_id'], "event_id": item["event_id"],
"event_name": item['event_name'], "event_name": item["event_name"],
"event_url": item['event_url'], "event_url": item["event_url"],
"start_at": item['start_at'], "start_at": item["start_at"],
"end_at": item['end_at'], "end_at": item["end_at"],
"timezone": item['timezone'], "timezone": item["timezone"],
"location": item['location'] or "No location", "location": item["location"] or "No location",
"city": item['city'], "city": item["city"],
"hosts": item['host_names'], "hosts": item["host_names"],
"document_type": "Luma Event", "document_type": "Luma Event",
"connector_type": "Luma", "connector_type": "Luma",
} }
@ -451,26 +459,26 @@ async def index_luma_events(
summary_content, summary_content,
summary_embedding, summary_embedding,
) = await generate_document_summary( ) = await generate_document_summary(
item['event_markdown'], user_llm, document_metadata_for_summary item["event_markdown"], user_llm, document_metadata_for_summary
) )
else: else:
# Fallback to simple summary if no LLM configured # Fallback to simple summary if no LLM configured
summary_content = f"Luma Event: {item['event_name']}\n\n" summary_content = f"Luma Event: {item['event_name']}\n\n"
if item['event_url']: if item["event_url"]:
summary_content += f"URL: {item['event_url']}\n" summary_content += f"URL: {item['event_url']}\n"
summary_content += f"Start: {item['start_at']}\n" summary_content += f"Start: {item['start_at']}\n"
summary_content += f"End: {item['end_at']}\n" summary_content += f"End: {item['end_at']}\n"
if item['timezone']: if item["timezone"]:
summary_content += f"Timezone: {item['timezone']}\n" summary_content += f"Timezone: {item['timezone']}\n"
if item['location']: if item["location"]:
summary_content += f"Location: {item['location']}\n" summary_content += f"Location: {item['location']}\n"
if item['city']: if item["city"]:
summary_content += f"City: {item['city']}\n" summary_content += f"City: {item['city']}\n"
if item['host_names']: if item["host_names"]:
summary_content += f"Hosts: {item['host_names']}\n" summary_content += f"Hosts: {item['host_names']}\n"
if item['description']: if item["description"]:
desc_preview = item['description'][:1000] desc_preview = item["description"][:1000]
if len(item['description']) > 1000: if len(item["description"]) > 1000:
desc_preview += "..." desc_preview += "..."
summary_content += f"Description: {desc_preview}\n" summary_content += f"Description: {desc_preview}\n"
@ -478,24 +486,24 @@ async def index_luma_events(
summary_content summary_content
) )
chunks = await create_document_chunks(item['event_markdown']) chunks = await create_document_chunks(item["event_markdown"])
# Update document to READY with actual content # Update document to READY with actual content
document.title = item['event_name'] document.title = item["event_name"]
document.content = summary_content document.content = summary_content
document.content_hash = item['content_hash'] document.content_hash = item["content_hash"]
document.embedding = summary_embedding document.embedding = summary_embedding
document.document_metadata = { document.document_metadata = {
"event_id": item['event_id'], "event_id": item["event_id"],
"event_name": item['event_name'], "event_name": item["event_name"],
"event_url": item['event_url'], "event_url": item["event_url"],
"start_at": item['start_at'], "start_at": item["start_at"],
"end_at": item['end_at'], "end_at": item["end_at"],
"timezone": item['timezone'], "timezone": item["timezone"],
"location": item['location'], "location": item["location"],
"city": item['city'], "city": item["city"],
"hosts": item['host_names'], "hosts": item["host_names"],
"cover_url": item['cover_url'], "cover_url": item["cover_url"],
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"connector_id": connector_id, "connector_id": connector_id,
} }
@ -522,7 +530,9 @@ async def index_luma_events(
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
skipped_events.append( skipped_events.append(
f"{item.get('event_name', 'Unknown')} (processing error)" f"{item.get('event_name', 'Unknown')} (processing error)"
) )

View file

@ -354,20 +354,24 @@ async def index_notion_pages(
# Document exists - check if content has changed # Document exists - check if content has changed
if existing_document.content_hash == content_hash: if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status, DocumentStatus.READY
):
existing_document.status = DocumentStatus.ready() existing_document.status = DocumentStatus.ready()
documents_skipped += 1 documents_skipped += 1
continue continue
# Queue existing document for update (will be set to processing in Phase 2) # Queue existing document for update (will be set to processing in Phase 2)
pages_to_process.append({ pages_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'markdown_content': markdown_content, "is_new": False,
'content_hash': content_hash, "markdown_content": markdown_content,
'page_id': page_id, "content_hash": content_hash,
'page_title': page_title, "page_id": page_id,
}) "page_title": page_title,
}
)
continue continue
# Document doesn't exist by unique_identifier_hash # Document doesn't exist by unique_identifier_hash
@ -410,14 +414,16 @@ async def index_notion_pages(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
pages_to_process.append({ pages_to_process.append(
'document': document, {
'is_new': True, "document": document,
'markdown_content': markdown_content, "is_new": True,
'content_hash': content_hash, "markdown_content": markdown_content,
'page_id': page_id, "content_hash": content_hash,
'page_title': page_title, "page_id": page_id,
}) "page_title": page_title,
}
)
except Exception as e: except Exception as e:
logger.error(f"Error in Phase 1 for page: {e!s}", exc_info=True) logger.error(f"Error in Phase 1 for page: {e!s}", exc_info=True)
@ -426,7 +432,9 @@ async def index_notion_pages(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([p for p in pages_to_process if p['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([p for p in pages_to_process if p['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -443,7 +451,7 @@ async def index_notion_pages(
await on_heartbeat_callback(documents_indexed) await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
@ -456,13 +464,18 @@ async def index_notion_pages(
if user_llm: if user_llm:
document_metadata_for_summary = { document_metadata_for_summary = {
"page_title": item['page_title'], "page_title": item["page_title"],
"page_id": item['page_id'], "page_id": item["page_id"],
"document_type": "Notion Page", "document_type": "Notion Page",
"connector_type": "Notion", "connector_type": "Notion",
} }
summary_content, summary_embedding = await generate_document_summary( (
item['markdown_content'], user_llm, document_metadata_for_summary summary_content,
summary_embedding,
) = await generate_document_summary(
item["markdown_content"],
user_llm,
document_metadata_for_summary,
) )
else: else:
# Fallback to simple summary if no LLM configured # Fallback to simple summary if no LLM configured
@ -471,16 +484,16 @@ async def index_notion_pages(
summary_content summary_content
) )
chunks = await create_document_chunks(item['markdown_content']) chunks = await create_document_chunks(item["markdown_content"])
# Update document to READY with actual content # Update document to READY with actual content
document.title = item['page_title'] document.title = item["page_title"]
document.content = summary_content document.content = summary_content
document.content_hash = item['content_hash'] document.content_hash = item["content_hash"]
document.embedding = summary_embedding document.embedding = summary_embedding
document.document_metadata = { document.document_metadata = {
"page_title": item['page_title'], "page_title": item["page_title"],
"page_id": item['page_id'], "page_id": item["page_id"],
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"connector_id": connector_id, "connector_id": connector_id,
} }
@ -504,7 +517,9 @@ async def index_notion_pages(
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
skipped_pages.append(f"{item['page_title']} (processing error)") skipped_pages.append(f"{item['page_title']} (processing error)")
documents_failed += 1 documents_failed += 1
continue continue

View file

@ -382,27 +382,31 @@ async def index_obsidian_vault(
# Document exists - check if content has changed # Document exists - check if content has changed
if existing_document.content_hash == content_hash: if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status, DocumentStatus.READY
):
existing_document.status = DocumentStatus.ready() existing_document.status = DocumentStatus.ready()
logger.debug(f"Note {title} unchanged, skipping") logger.debug(f"Note {title} unchanged, skipping")
skipped_count += 1 skipped_count += 1
continue continue
# Queue existing document for update (will be set to processing in Phase 2) # Queue existing document for update (will be set to processing in Phase 2)
files_to_process.append({ files_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'file_info': file_info, "is_new": False,
'content': content, "file_info": file_info,
'body_content': body_content, "content": content,
'frontmatter': frontmatter, "body_content": body_content,
'wiki_links': wiki_links, "frontmatter": frontmatter,
'tags': tags, "wiki_links": wiki_links,
'title': title, "tags": tags,
'relative_path': relative_path, "title": title,
'content_hash': content_hash, "relative_path": relative_path,
'unique_identifier_hash': unique_identifier_hash, "content_hash": content_hash,
}) "unique_identifier_hash": unique_identifier_hash,
}
)
continue continue
# Document doesn't exist by unique_identifier_hash # Document doesn't exist by unique_identifier_hash
@ -445,20 +449,22 @@ async def index_obsidian_vault(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
files_to_process.append({ files_to_process.append(
'document': document, {
'is_new': True, "document": document,
'file_info': file_info, "is_new": True,
'content': content, "file_info": file_info,
'body_content': body_content, "content": content,
'frontmatter': frontmatter, "body_content": body_content,
'wiki_links': wiki_links, "frontmatter": frontmatter,
'tags': tags, "wiki_links": wiki_links,
'title': title, "tags": tags,
'relative_path': relative_path, "title": title,
'content_hash': content_hash, "relative_path": relative_path,
'unique_identifier_hash': unique_identifier_hash, "content_hash": content_hash,
}) "unique_identifier_hash": unique_identifier_hash,
}
)
except Exception as e: except Exception as e:
logger.exception( logger.exception(
@ -469,7 +475,9 @@ async def index_obsidian_vault(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([f for f in files_to_process if f['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([f for f in files_to_process if f['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -491,22 +499,22 @@ async def index_obsidian_vault(
await on_heartbeat_callback(indexed_count) await on_heartbeat_callback(indexed_count)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
await session.commit() await session.commit()
# Extract data from item # Extract data from item
title = item['title'] title = item["title"]
relative_path = item['relative_path'] relative_path = item["relative_path"]
content = item['content'] content = item["content"]
body_content = item['body_content'] body_content = item["body_content"]
frontmatter = item['frontmatter'] frontmatter = item["frontmatter"]
wiki_links = item['wiki_links'] wiki_links = item["wiki_links"]
tags = item['tags'] tags = item["tags"]
content_hash = item['content_hash'] content_hash = item["content_hash"]
file_info = item['file_info'] file_info = item["file_info"]
# Build metadata # Build metadata
document_metadata = { document_metadata = {
@ -584,7 +592,9 @@ async def index_obsidian_vault(
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
failed_count += 1 failed_count += 1
continue continue
@ -592,9 +602,7 @@ async def index_obsidian_vault(
await update_connector_last_indexed(session, connector, update_last_indexed) await update_connector_last_indexed(session, connector, update_last_indexed)
# Final commit for any remaining documents not yet committed in batches # Final commit for any remaining documents not yet committed in batches
logger.info( logger.info(f"Final commit: Total {indexed_count} Obsidian notes processed")
f"Final commit: Total {indexed_count} Obsidian notes processed"
)
try: try:
await session.commit() await session.commit()
logger.info( logger.info(

View file

@ -314,7 +314,9 @@ async def index_slack_messages(
# Document exists - check if content has changed # Document exists - check if content has changed
if existing_document.content_hash == content_hash: if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status, DocumentStatus.READY
):
existing_document.status = DocumentStatus.ready() existing_document.status = DocumentStatus.ready()
logger.info( logger.info(
f"Document for Slack message {msg_ts} in channel {channel_name} unchanged. Skipping." f"Document for Slack message {msg_ts} in channel {channel_name} unchanged. Skipping."
@ -323,18 +325,20 @@ async def index_slack_messages(
continue continue
# Queue existing document for update (will be set to processing in Phase 2) # Queue existing document for update (will be set to processing in Phase 2)
messages_to_process.append({ messages_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'combined_document_string': combined_document_string, "is_new": False,
'content_hash': content_hash, "combined_document_string": combined_document_string,
'channel_name': channel_name, "content_hash": content_hash,
'channel_id': channel_id, "channel_name": channel_name,
'msg_ts': msg_ts, "channel_id": channel_id,
'start_date': start_date_str, "msg_ts": msg_ts,
'end_date': end_date_str, "start_date": start_date_str,
'message_count': len(formatted_messages), "end_date": end_date_str,
}) "message_count": len(formatted_messages),
}
)
continue continue
# Document doesn't exist by unique_identifier_hash # Document doesn't exist by unique_identifier_hash
@ -377,18 +381,20 @@ async def index_slack_messages(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
messages_to_process.append({ messages_to_process.append(
'document': document, {
'is_new': True, "document": document,
'combined_document_string': combined_document_string, "is_new": True,
'content_hash': content_hash, "combined_document_string": combined_document_string,
'channel_name': channel_name, "content_hash": content_hash,
'channel_id': channel_id, "channel_name": channel_name,
'msg_ts': msg_ts, "channel_id": channel_id,
'start_date': start_date_str, "msg_ts": msg_ts,
'end_date': end_date_str, "start_date": start_date_str,
'message_count': len(formatted_messages), "end_date": end_date_str,
}) "message_count": len(formatted_messages),
}
)
logger.info( logger.info(
f"Phase 1: Collected {len(formatted_messages)} messages from channel {channel_name}" f"Phase 1: Collected {len(formatted_messages)} messages from channel {channel_name}"
@ -409,7 +415,9 @@ async def index_slack_messages(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -426,29 +434,29 @@ async def index_slack_messages(
await on_heartbeat_callback(documents_indexed) await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
await session.commit() await session.commit()
# Heavy processing (embeddings, chunks) # Heavy processing (embeddings, chunks)
chunks = await create_document_chunks(item['combined_document_string']) chunks = await create_document_chunks(item["combined_document_string"])
doc_embedding = config.embedding_model_instance.embed( doc_embedding = config.embedding_model_instance.embed(
item['combined_document_string'] item["combined_document_string"]
) )
# Update document to READY with actual content # Update document to READY with actual content
document.title = item['channel_name'] document.title = item["channel_name"]
document.content = item['combined_document_string'] document.content = item["combined_document_string"]
document.content_hash = item['content_hash'] document.content_hash = item["content_hash"]
document.embedding = doc_embedding document.embedding = doc_embedding
document.document_metadata = { document.document_metadata = {
"channel_name": item['channel_name'], "channel_name": item["channel_name"],
"channel_id": item['channel_id'], "channel_id": item["channel_id"],
"start_date": item['start_date'], "start_date": item["start_date"],
"end_date": item['end_date'], "end_date": item["end_date"],
"message_count": item['message_count'], "message_count": item["message_count"],
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"connector_id": connector_id, "connector_id": connector_id,
} }
@ -475,7 +483,9 @@ async def index_slack_messages(
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
documents_failed += 1 documents_failed += 1
continue continue

View file

@ -332,25 +332,31 @@ async def index_teams_messages(
# Document exists - check if content has changed # Document exists - check if content has changed
if existing_document.content_hash == content_hash: if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status = DocumentStatus.ready() existing_document.status, DocumentStatus.READY
):
existing_document.status = (
DocumentStatus.ready()
)
documents_skipped += 1 documents_skipped += 1
continue continue
# Queue existing document for update (will be set to processing in Phase 2) # Queue existing document for update (will be set to processing in Phase 2)
messages_to_process.append({ messages_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'combined_document_string': combined_document_string, "is_new": False,
'content_hash': content_hash, "combined_document_string": combined_document_string,
'team_name': team_name, "content_hash": content_hash,
'team_id': team_id, "team_name": team_name,
'channel_name': channel_name, "team_id": team_id,
'channel_id': channel_id, "channel_name": channel_name,
'message_id': message_id, "channel_id": channel_id,
'start_date': start_date_str, "message_id": message_id,
'end_date': end_date_str, "start_date": start_date_str,
}) "end_date": end_date_str,
}
)
continue continue
# Document doesn't exist by unique_identifier_hash # Document doesn't exist by unique_identifier_hash
@ -400,19 +406,21 @@ async def index_teams_messages(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
messages_to_process.append({ messages_to_process.append(
'document': document, {
'is_new': True, "document": document,
'combined_document_string': combined_document_string, "is_new": True,
'content_hash': content_hash, "combined_document_string": combined_document_string,
'team_name': team_name, "content_hash": content_hash,
'team_id': team_id, "team_name": team_name,
'channel_name': channel_name, "team_id": team_id,
'channel_id': channel_id, "channel_name": channel_name,
'message_id': message_id, "channel_id": channel_id,
'start_date': start_date_str, "message_id": message_id,
'end_date': end_date_str, "start_date": start_date_str,
}) "end_date": end_date_str,
}
)
except Exception as e: except Exception as e:
logger.error( logger.error(
@ -432,7 +440,9 @@ async def index_teams_messages(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -449,30 +459,30 @@ async def index_teams_messages(
await on_heartbeat_callback(documents_indexed) await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
await session.commit() await session.commit()
# Heavy processing (embeddings, chunks) # Heavy processing (embeddings, chunks)
chunks = await create_document_chunks(item['combined_document_string']) chunks = await create_document_chunks(item["combined_document_string"])
doc_embedding = config.embedding_model_instance.embed( doc_embedding = config.embedding_model_instance.embed(
item['combined_document_string'] item["combined_document_string"]
) )
# Update document to READY with actual content # Update document to READY with actual content
document.title = f"{item['team_name']} - {item['channel_name']}" document.title = f"{item['team_name']} - {item['channel_name']}"
document.content = item['combined_document_string'] document.content = item["combined_document_string"]
document.content_hash = item['content_hash'] document.content_hash = item["content_hash"]
document.embedding = doc_embedding document.embedding = doc_embedding
document.document_metadata = { document.document_metadata = {
"team_name": item['team_name'], "team_name": item["team_name"],
"team_id": item['team_id'], "team_id": item["team_id"],
"channel_name": item['channel_name'], "channel_name": item["channel_name"],
"channel_id": item['channel_id'], "channel_id": item["channel_id"],
"start_date": item['start_date'], "start_date": item["start_date"],
"end_date": item['end_date'], "end_date": item["end_date"],
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"connector_id": connector_id, "connector_id": connector_id,
} }
@ -497,7 +507,9 @@ async def index_teams_messages(
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
documents_failed += 1 documents_failed += 1
continue continue
@ -510,9 +522,7 @@ async def index_teams_messages(
) )
try: try:
await session.commit() await session.commit()
logger.info( logger.info("Successfully committed all Teams document changes to database")
"Successfully committed all Teams document changes to database"
)
except Exception as e: except Exception as e:
# Handle any remaining integrity errors gracefully (race conditions, etc.) # Handle any remaining integrity errors gracefully (race conditions, etc.)
if ( if (

View file

@ -184,22 +184,28 @@ async def index_crawled_urls(
if existing_document: if existing_document:
# Document exists - check if it's already being processed # Document exists - check if it's already being processed
if DocumentStatus.is_state(existing_document.status, DocumentStatus.PENDING): if DocumentStatus.is_state(
existing_document.status, DocumentStatus.PENDING
):
logger.info(f"URL {url} already pending. Skipping.") logger.info(f"URL {url} already pending. Skipping.")
documents_skipped += 1 documents_skipped += 1
continue continue
if DocumentStatus.is_state(existing_document.status, DocumentStatus.PROCESSING): if DocumentStatus.is_state(
existing_document.status, DocumentStatus.PROCESSING
):
logger.info(f"URL {url} already processing. Skipping.") logger.info(f"URL {url} already processing. Skipping.")
documents_skipped += 1 documents_skipped += 1
continue continue
# Queue existing document for potential update check # Queue existing document for potential update check
urls_to_process.append({ urls_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'url': url, "is_new": False,
'unique_identifier_hash': unique_identifier_hash, "url": url,
}) "unique_identifier_hash": unique_identifier_hash,
}
)
continue continue
# Create new document with PENDING status (visible in UI immediately) # Create new document with PENDING status (visible in UI immediately)
@ -224,12 +230,14 @@ async def index_crawled_urls(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
urls_to_process.append({ urls_to_process.append(
'document': document, {
'is_new': True, "document": document,
'url': url, "is_new": True,
'unique_identifier_hash': unique_identifier_hash, "url": url,
}) "unique_identifier_hash": unique_identifier_hash,
}
)
except Exception as e: except Exception as e:
logger.error(f"Error in Phase 1 for URL {url}: {e!s}", exc_info=True) logger.error(f"Error in Phase 1 for URL {url}: {e!s}", exc_info=True)
@ -238,7 +246,9 @@ async def index_crawled_urls(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([u for u in urls_to_process if u['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([u for u in urls_to_process if u['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -255,9 +265,9 @@ async def index_crawled_urls(
await on_heartbeat_callback(documents_indexed + documents_updated) await on_heartbeat_callback(documents_indexed + documents_updated)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
url = item['url'] url = item["url"]
is_new = item['is_new'] is_new = item["is_new"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
@ -298,7 +308,9 @@ async def index_crawled_urls(
continue continue
# Format content as structured document for summary generation # Format content as structured document for summary generation
structured_document = crawler.format_to_structured_document(crawl_result) structured_document = crawler.format_to_structured_document(
crawl_result
)
# Generate content hash using a version WITHOUT metadata # Generate content hash using a version WITHOUT metadata
structured_document_for_hash = crawler.format_to_structured_document( structured_document_for_hash = crawler.format_to_structured_document(
@ -339,7 +351,9 @@ async def index_crawled_urls(
f"(existing document ID: {duplicate_by_content.id}). " f"(existing document ID: {duplicate_by_content.id}). "
f"Marking as failed." f"Marking as failed."
) )
document.status = DocumentStatus.failed("Duplicate content exists") document.status = DocumentStatus.failed(
"Duplicate content exists"
)
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
await session.commit() await session.commit()
duplicate_content_count += 1 duplicate_content_count += 1
@ -360,7 +374,10 @@ async def index_crawled_urls(
"document_type": "Crawled URL", "document_type": "Crawled URL",
"crawler_type": crawler_type, "crawler_type": crawler_type,
} }
summary_content, summary_embedding = await generate_document_summary( (
summary_content,
summary_embedding,
) = await generate_document_summary(
structured_document, user_llm, document_metadata_for_summary structured_document, user_llm, document_metadata_for_summary
) )
else: else:
@ -423,7 +440,9 @@ async def index_crawled_urls(
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
await session.commit() await session.commit()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
documents_failed += 1 documents_failed += 1
continue continue
@ -438,7 +457,9 @@ async def index_crawled_urls(
) )
try: try:
await session.commit() await session.commit()
logger.info("Successfully committed all webcrawler document changes to database") logger.info(
"Successfully committed all webcrawler document changes to database"
)
except Exception as e: except Exception as e:
# Handle any remaining integrity errors gracefully # Handle any remaining integrity errors gracefully
if "duplicate key value violates unique constraint" in str(e).lower(): if "duplicate key value violates unique constraint" in str(e).lower():

View file

@ -17,29 +17,30 @@ md = MarkdownifyTransformer()
def safe_set_chunks(document: Document, chunks: list) -> None: def safe_set_chunks(document: Document, chunks: list) -> None:
""" """
Safely assign chunks to a document without triggering lazy loading. Safely assign chunks to a document without triggering lazy loading.
ALWAYS use this instead of `document.chunks = chunks` to avoid ALWAYS use this instead of `document.chunks = chunks` to avoid
SQLAlchemy async errors (MissingGreenlet / greenlet_spawn). SQLAlchemy async errors (MissingGreenlet / greenlet_spawn).
Why this is needed: Why this is needed:
- Direct assignment `document.chunks = chunks` triggers SQLAlchemy to - Direct assignment `document.chunks = chunks` triggers SQLAlchemy to
load the OLD chunks first (for comparison/orphan detection) load the OLD chunks first (for comparison/orphan detection)
- This lazy loading fails in async context with asyncpg driver - This lazy loading fails in async context with asyncpg driver
- set_committed_value bypasses this by setting the value directly - set_committed_value bypasses this by setting the value directly
This function is safe regardless of how the document was loaded This function is safe regardless of how the document was loaded
(with or without selectinload). (with or without selectinload).
Args: Args:
document: The Document object to update document: The Document object to update
chunks: List of Chunk objects to assign chunks: List of Chunk objects to assign
Example: Example:
# Instead of: document.chunks = chunks (DANGEROUS!) # Instead of: document.chunks = chunks (DANGEROUS!)
safe_set_chunks(document, chunks) # Always safe safe_set_chunks(document, chunks) # Always safe
""" """
from sqlalchemy.orm.attributes import set_committed_value from sqlalchemy.orm.attributes import set_committed_value
set_committed_value(document, 'chunks', chunks)
set_committed_value(document, "chunks", chunks)
def get_current_timestamp() -> datetime: def get_current_timestamp() -> datetime:

View file

@ -91,7 +91,9 @@ async def add_circleback_meeting_document(
# Document exists - check if content has changed # Document exists - check if content has changed
if existing_document.content_hash == content_hash: if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status, DocumentStatus.READY
):
existing_document.status = DocumentStatus.ready() existing_document.status = DocumentStatus.ready()
await session.commit() await session.commit()
logger.info(f"Circleback meeting {meeting_id} unchanged. Skipping.") logger.info(f"Circleback meeting {meeting_id} unchanged. Skipping.")
@ -110,7 +112,7 @@ async def add_circleback_meeting_document(
# PHASE 1: Create document with PENDING status # PHASE 1: Create document with PENDING status
# This makes the document visible in the UI immediately # This makes the document visible in the UI immediately
# ======================================================================= # =======================================================================
# Fetch the user who set up the Circleback connector (preferred) # Fetch the user who set up the Circleback connector (preferred)
# or fall back to search space owner if no connector found # or fall back to search space owner if no connector found
created_by_user_id = None created_by_user_id = None
@ -173,7 +175,7 @@ async def add_circleback_meeting_document(
# ======================================================================= # =======================================================================
# PHASE 3: Process the document content # PHASE 3: Process the document content
# ======================================================================= # =======================================================================
# Get LLM for generating summary # Get LLM for generating summary
llm = await get_document_summary_llm(session, search_space_id) llm = await get_document_summary_llm(session, search_space_id)
if not llm: if not llm:
@ -243,7 +245,7 @@ async def add_circleback_meeting_document(
await session.commit() await session.commit()
await session.refresh(document) await session.refresh(document)
if existing_document: if existing_document:
logger.info( logger.info(
f"Updated Circleback meeting document {meeting_id} in search space {search_space_id}" f"Updated Circleback meeting document {meeting_id} in search space {search_space_id}"
@ -267,7 +269,9 @@ async def add_circleback_meeting_document(
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
await session.commit() await session.commit()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
raise db_error raise db_error
except Exception as e: except Exception as e:
await session.rollback() await session.rollback()
@ -279,5 +283,7 @@ async def add_circleback_meeting_document(
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
await session.commit() await session.commit()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
raise RuntimeError(f"Failed to process Circleback meeting: {e!s}") from e raise RuntimeError(f"Failed to process Circleback meeting: {e!s}") from e

View file

@ -1629,16 +1629,16 @@ async def process_file_in_background_with_document(
) -> Document | None: ) -> Document | None:
""" """
Process file and update existing pending document (2-phase pattern). Process file and update existing pending document (2-phase pattern).
This function is Phase 2 of the real-time document status updates: This function is Phase 2 of the real-time document status updates:
- Phase 1 (API): Created document with pending status - Phase 1 (API): Created document with pending status
- Phase 2 (this): Process file and update document to ready/failed - Phase 2 (this): Process file and update document to ready/failed
The document already exists with pending status. This function: The document already exists with pending status. This function:
1. Parses the file content (markdown, audio, or ETL services) 1. Parses the file content (markdown, audio, or ETL services)
2. Updates the document with content, embeddings, and chunks 2. Updates the document with content, embeddings, and chunks
3. Sets status to 'ready' on success 3. Sets status to 'ready' on success
Args: Args:
document: Existing document with pending status document: Existing document with pending status
file_path: Path to the uploaded file file_path: Path to the uploaded file
@ -1650,7 +1650,7 @@ async def process_file_in_background_with_document(
log_entry: Log entry for this task log_entry: Log entry for this task
connector: Optional connector info for Google Drive files connector: Optional connector info for Google Drive files
notification: Optional notification for progress updates notification: Optional notification for progress updates
Returns: Returns:
Updated Document object if successful, None if duplicate content detected Updated Document object if successful, None if duplicate content detected
""" """
@ -1665,13 +1665,18 @@ async def process_file_in_background_with_document(
etl_service = None etl_service = None
# ===== STEP 1: Parse file content based on type ===== # ===== STEP 1: Parse file content based on type =====
# Check if the file is a markdown or text file # Check if the file is a markdown or text file
if filename.lower().endswith((".md", ".markdown", ".txt")): if filename.lower().endswith((".md", ".markdown", ".txt")):
# Update notification: parsing stage # Update notification: parsing stage
if notification: if notification:
await NotificationService.document_processing.notify_processing_progress( await (
session, notification, stage="parsing", stage_message="Reading file" NotificationService.document_processing.notify_processing_progress(
session,
notification,
stage="parsing",
stage_message="Reading file",
)
) )
await task_logger.log_task_progress( await task_logger.log_task_progress(
@ -1695,8 +1700,13 @@ async def process_file_in_background_with_document(
): ):
# Update notification: parsing stage (transcription) # Update notification: parsing stage (transcription)
if notification: if notification:
await NotificationService.document_processing.notify_processing_progress( await (
session, notification, stage="parsing", stage_message="Transcribing audio" NotificationService.document_processing.notify_processing_progress(
session,
notification,
stage="parsing",
stage_message="Transcribing audio",
)
) )
await task_logger.log_task_progress( await task_logger.log_task_progress(
@ -1708,7 +1718,8 @@ async def process_file_in_background_with_document(
# Transcribe audio # Transcribe audio
stt_service_type = ( stt_service_type = (
"local" "local"
if app_config.STT_SERVICE and app_config.STT_SERVICE.startswith("local/") if app_config.STT_SERVICE
and app_config.STT_SERVICE.startswith("local/")
else "external" else "external"
) )
@ -1719,7 +1730,9 @@ async def process_file_in_background_with_document(
transcribed_text = result.get("text", "") transcribed_text = result.get("text", "")
if not transcribed_text: if not transcribed_text:
raise ValueError("Transcription returned empty text") raise ValueError("Transcription returned empty text")
markdown_content = f"# Transcription of {filename}\n\n{transcribed_text}" markdown_content = (
f"# Transcription of {filename}\n\n{transcribed_text}"
)
else: else:
with open(file_path, "rb") as audio_file: with open(file_path, "rb") as audio_file:
transcription_kwargs = { transcription_kwargs = {
@ -1728,12 +1741,18 @@ async def process_file_in_background_with_document(
"api_key": app_config.STT_SERVICE_API_KEY, "api_key": app_config.STT_SERVICE_API_KEY,
} }
if app_config.STT_SERVICE_API_BASE: if app_config.STT_SERVICE_API_BASE:
transcription_kwargs["api_base"] = app_config.STT_SERVICE_API_BASE transcription_kwargs["api_base"] = (
transcription_response = await atranscription(**transcription_kwargs) app_config.STT_SERVICE_API_BASE
)
transcription_response = await atranscription(
**transcription_kwargs
)
transcribed_text = transcription_response.get("text", "") transcribed_text = transcription_response.get("text", "")
if not transcribed_text: if not transcribed_text:
raise ValueError("Transcription returned empty text") raise ValueError("Transcription returned empty text")
markdown_content = f"# Transcription of {filename}\n\n{transcribed_text}" markdown_content = (
f"# Transcription of {filename}\n\n{transcribed_text}"
)
etl_service = "AUDIO_TRANSCRIPTION" etl_service = "AUDIO_TRANSCRIPTION"
# Clean up temp file # Clean up temp file
@ -1742,13 +1761,18 @@ async def process_file_in_background_with_document(
else: else:
# Document files - use ETL service # Document files - use ETL service
from app.services.page_limit_service import PageLimitExceededError, PageLimitService from app.services.page_limit_service import (
PageLimitExceededError,
PageLimitService,
)
page_limit_service = PageLimitService(session) page_limit_service = PageLimitService(session)
# Estimate page count # Estimate page count
try: try:
estimated_pages = page_limit_service.estimate_pages_before_processing(file_path) estimated_pages = page_limit_service.estimate_pages_before_processing(
file_path
)
except Exception: except Exception:
file_size = os.path.getsize(file_path) file_size = os.path.getsize(file_path)
estimated_pages = max(1, file_size // (80 * 1024)) estimated_pages = max(1, file_size // (80 * 1024))
@ -1759,14 +1783,22 @@ async def process_file_in_background_with_document(
if app_config.ETL_SERVICE == "UNSTRUCTURED": if app_config.ETL_SERVICE == "UNSTRUCTURED":
if notification: if notification:
await NotificationService.document_processing.notify_processing_progress( await NotificationService.document_processing.notify_processing_progress(
session, notification, stage="parsing", stage_message="Extracting content" session,
notification,
stage="parsing",
stage_message="Extracting content",
) )
from langchain_unstructured import UnstructuredLoader from langchain_unstructured import UnstructuredLoader
loader = UnstructuredLoader( loader = UnstructuredLoader(
file_path, mode="elements", post_processors=[], languages=["eng"], file_path,
include_orig_elements=False, include_metadata=False, strategy="auto" mode="elements",
post_processors=[],
languages=["eng"],
include_orig_elements=False,
include_metadata=False,
strategy="auto",
) )
docs = await loader.aload() docs = await loader.aload()
markdown_content = await convert_document_to_markdown(docs) markdown_content = await convert_document_to_markdown(docs)
@ -1775,37 +1807,55 @@ async def process_file_in_background_with_document(
etl_service = "UNSTRUCTURED" etl_service = "UNSTRUCTURED"
# Update page usage # Update page usage
await page_limit_service.update_page_usage(user_id, final_page_count, allow_exceed=True) await page_limit_service.update_page_usage(
user_id, final_page_count, allow_exceed=True
)
elif app_config.ETL_SERVICE == "LLAMACLOUD": elif app_config.ETL_SERVICE == "LLAMACLOUD":
if notification: if notification:
await NotificationService.document_processing.notify_processing_progress( await NotificationService.document_processing.notify_processing_progress(
session, notification, stage="parsing", stage_message="Extracting content" session,
notification,
stage="parsing",
stage_message="Extracting content",
) )
result = await parse_with_llamacloud_retry( result = await parse_with_llamacloud_retry(
file_path=file_path, estimated_pages=estimated_pages, file_path=file_path,
task_logger=task_logger, log_entry=log_entry estimated_pages=estimated_pages,
task_logger=task_logger,
log_entry=log_entry,
)
markdown_documents = await result.aget_markdown_documents(
split_by_page=False
) )
markdown_documents = await result.aget_markdown_documents(split_by_page=False)
if not markdown_documents: if not markdown_documents:
raise RuntimeError(f"LlamaCloud parsing returned no documents: {filename}") raise RuntimeError(
f"LlamaCloud parsing returned no documents: {filename}"
)
markdown_content = markdown_documents[0].text markdown_content = markdown_documents[0].text
etl_service = "LLAMACLOUD" etl_service = "LLAMACLOUD"
# Update page usage # Update page usage
await page_limit_service.update_page_usage(user_id, estimated_pages, allow_exceed=True) await page_limit_service.update_page_usage(
user_id, estimated_pages, allow_exceed=True
)
elif app_config.ETL_SERVICE == "DOCLING": elif app_config.ETL_SERVICE == "DOCLING":
if notification: if notification:
await NotificationService.document_processing.notify_processing_progress( await NotificationService.document_processing.notify_processing_progress(
session, notification, stage="parsing", stage_message="Extracting content" session,
notification,
stage="parsing",
stage_message="Extracting content",
) )
# Suppress logging during Docling import # Suppress logging during Docling import
getLogger("docling.pipeline.base_pipeline").setLevel(ERROR) getLogger("docling.pipeline.base_pipeline").setLevel(ERROR)
getLogger("docling.document_converter").setLevel(ERROR) getLogger("docling.document_converter").setLevel(ERROR)
getLogger("docling_core.transforms.chunker.hierarchical_chunker").setLevel(ERROR) getLogger(
"docling_core.transforms.chunker.hierarchical_chunker"
).setLevel(ERROR)
from docling.document_converter import DocumentConverter from docling.document_converter import DocumentConverter
@ -1815,7 +1865,9 @@ async def process_file_in_background_with_document(
etl_service = "DOCLING" etl_service = "DOCLING"
# Update page usage # Update page usage
await page_limit_service.update_page_usage(user_id, estimated_pages, allow_exceed=True) await page_limit_service.update_page_usage(
user_id, estimated_pages, allow_exceed=True
)
else: else:
raise RuntimeError(f"Unknown ETL_SERVICE: {app_config.ETL_SERVICE}") raise RuntimeError(f"Unknown ETL_SERVICE: {app_config.ETL_SERVICE}")
@ -1829,7 +1881,7 @@ async def process_file_in_background_with_document(
# ===== STEP 2: Check for duplicate content ===== # ===== STEP 2: Check for duplicate content =====
content_hash = generate_content_hash(markdown_content, search_space_id) content_hash = generate_content_hash(markdown_content, search_space_id)
existing_by_content = await check_duplicate_document(session, content_hash) existing_by_content = await check_duplicate_document(session, content_hash)
if existing_by_content and existing_by_content.id != document.id: if existing_by_content and existing_by_content.id != document.id:
# Duplicate content found - mark this document as failed # Duplicate content found - mark this document as failed
@ -1846,7 +1898,7 @@ async def process_file_in_background_with_document(
) )
user_llm = await get_user_long_context_llm(session, user_id, search_space_id) user_llm = await get_user_long_context_llm(session, user_id, search_space_id)
if user_llm: if user_llm:
document_metadata = { document_metadata = {
"file_name": filename, "file_name": filename,
@ -1881,10 +1933,10 @@ async def process_file_in_background_with_document(
**(document.document_metadata or {}), **(document.document_metadata or {}),
} }
flag_modified(document, "document_metadata") flag_modified(document, "document_metadata")
# Use safe_set_chunks to avoid async issues # Use safe_set_chunks to avoid async issues
safe_set_chunks(document, chunks) safe_set_chunks(document, chunks)
document.blocknote_document = blocknote_json document.blocknote_document = blocknote_json
document.content_needs_reindexing = False document.content_needs_reindexing = False
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
@ -1922,7 +1974,11 @@ async def process_file_in_background_with_document(
log_entry, log_entry,
error_message, error_message,
str(e), str(e),
{"error_type": type(e).__name__, "filename": filename, "document_id": document.id}, {
"error_type": type(e).__name__,
"filename": filename,
"document_id": document.id,
},
) )
logging.error(f"Error processing file with document: {error_message}") logging.error(f"Error processing file with document: {error_message}")
raise raise

View file

@ -136,11 +136,19 @@ async def add_youtube_video_document(
document = existing_document document = existing_document
is_new_document = False is_new_document = False
# Check if already being processed # Check if already being processed
if DocumentStatus.is_state(existing_document.status, DocumentStatus.PENDING): if DocumentStatus.is_state(
logging.info(f"YouTube video {video_id} already pending. Returning existing.") existing_document.status, DocumentStatus.PENDING
):
logging.info(
f"YouTube video {video_id} already pending. Returning existing."
)
return existing_document return existing_document
if DocumentStatus.is_state(existing_document.status, DocumentStatus.PROCESSING): if DocumentStatus.is_state(
logging.info(f"YouTube video {video_id} already processing. Returning existing.") existing_document.status, DocumentStatus.PROCESSING
):
logging.info(
f"YouTube video {video_id} already processing. Returning existing."
)
return existing_document return existing_document
else: else:
# Create new document with PENDING status (visible in UI immediately) # Create new document with PENDING status (visible in UI immediately)
@ -300,7 +308,9 @@ async def add_youtube_video_document(
"video_id": video_id, "video_id": video_id,
}, },
) )
logging.info(f"Document for YouTube video {video_id} unchanged. Marking as ready.") logging.info(
f"Document for YouTube video {video_id} unchanged. Marking as ready."
)
document.status = DocumentStatus.ready() document.status = DocumentStatus.ready()
await session.commit() await session.commit()
return document return document
@ -408,7 +418,9 @@ async def add_youtube_video_document(
# Mark document as failed if it exists # Mark document as failed if it exists
if document: if document:
try: try:
document.status = DocumentStatus.failed(f"Database error: {str(db_error)[:150]}") document.status = DocumentStatus.failed(
f"Database error: {str(db_error)[:150]}"
)
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
await session.commit() await session.commit()
except Exception: except Exception:

View file

@ -38,7 +38,9 @@ export function DocumentTypeChip({ type, className }: { type: string; className?
className={`inline-flex items-center gap-1.5 rounded bg-muted/40 px-2 py-1 text-xs text-muted-foreground max-w-full overflow-hidden ${className ?? ""}`} className={`inline-flex items-center gap-1.5 rounded bg-muted/40 px-2 py-1 text-xs text-muted-foreground max-w-full overflow-hidden ${className ?? ""}`}
> >
<span className="opacity-80 flex-shrink-0">{icon}</span> <span className="opacity-80 flex-shrink-0">{icon}</span>
<span ref={textRef} className="truncate min-w-0">{fullLabel}</span> <span ref={textRef} className="truncate min-w-0">
{fullLabel}
</span>
</span> </span>
); );

View file

@ -68,9 +68,7 @@ export function DocumentsFilters({
const filteredTypes = useMemo(() => { const filteredTypes = useMemo(() => {
if (!typeSearchQuery.trim()) return uniqueTypes; if (!typeSearchQuery.trim()) return uniqueTypes;
const query = typeSearchQuery.toLowerCase(); const query = typeSearchQuery.toLowerCase();
return uniqueTypes.filter((type) => return uniqueTypes.filter((type) => getDocumentTypeLabel(type).toLowerCase().includes(query));
getDocumentTypeLabel(type).toLowerCase().includes(query)
);
}, [uniqueTypes, typeSearchQuery]); }, [uniqueTypes, typeSearchQuery]);
const typeCounts = useMemo(() => { const typeCounts = useMemo(() => {
@ -156,94 +154,95 @@ export function DocumentsFilters({
{/* Filter Buttons Group */} {/* Filter Buttons Group */}
<div className="flex items-center gap-2 flex-wrap"> <div className="flex items-center gap-2 flex-wrap">
{/* Type Filter */} {/* Type Filter */}
<Popover> <Popover>
<PopoverTrigger asChild> <PopoverTrigger asChild>
<Button <Button
variant="outline" variant="outline"
size="sm" size="sm"
className="h-9 gap-2 border-dashed border-border/60 text-muted-foreground hover:text-foreground hover:border-border" className="h-9 gap-2 border-dashed border-border/60 text-muted-foreground hover:text-foreground hover:border-border"
> >
<FileType size={14} className="text-muted-foreground" /> <FileType size={14} className="text-muted-foreground" />
<span className="hidden sm:inline">Type</span> <span className="hidden sm:inline">Type</span>
{activeTypes.length > 0 && ( {activeTypes.length > 0 && (
<span className="flex h-5 w-5 items-center justify-center rounded-full bg-primary text-[10px] font-medium text-primary-foreground"> <span className="flex h-5 w-5 items-center justify-center rounded-full bg-primary text-[10px] font-medium text-primary-foreground">
{activeTypes.length} {activeTypes.length}
</span> </span>
)} )}
</Button> </Button>
</PopoverTrigger> </PopoverTrigger>
<PopoverContent className="w-64 !p-0 overflow-hidden" align="end"> <PopoverContent className="w-64 !p-0 overflow-hidden" align="end">
<div> <div>
{/* Search input */} {/* Search input */}
<div className="p-2 border-b border-border/50"> <div className="p-2 border-b border-border/50">
<div className="relative"> <div className="relative">
<Search className="absolute left-0.5 top-1/2 -translate-y-1/2 h-4 w-4 text-muted-foreground" /> <Search className="absolute left-0.5 top-1/2 -translate-y-1/2 h-4 w-4 text-muted-foreground" />
<Input <Input
placeholder="Search types..." placeholder="Search types..."
value={typeSearchQuery} value={typeSearchQuery}
onChange={(e) => setTypeSearchQuery(e.target.value)} onChange={(e) => setTypeSearchQuery(e.target.value)}
className="h-6 pl-6 text-sm bg-transparent border-0 focus-visible:ring-0" className="h-6 pl-6 text-sm bg-transparent border-0 focus-visible:ring-0"
/> />
</div>
</div>
<div className="max-h-[300px] overflow-y-auto overflow-x-hidden py-1.5 px-1.5">
{filteredTypes.length === 0 ? (
<div className="py-6 text-center text-sm text-muted-foreground">
No types found
</div> </div>
) : ( </div>
filteredTypes.map((value: DocumentTypeEnum, i) => (
<button <div className="max-h-[300px] overflow-y-auto overflow-x-hidden py-1.5 px-1.5">
key={value} {filteredTypes.length === 0 ? (
type="button" <div className="py-6 text-center text-sm text-muted-foreground">
className="flex w-full items-center gap-2.5 py-2 px-3 rounded-md hover:bg-muted/50 transition-colors cursor-pointer text-left" No types found
onClick={() => onToggleType(value, !activeTypes.includes(value))} </div>
) : (
filteredTypes.map((value: DocumentTypeEnum, i) => (
<button
key={value}
type="button"
className="flex w-full items-center gap-2.5 py-2 px-3 rounded-md hover:bg-muted/50 transition-colors cursor-pointer text-left"
onClick={() => onToggleType(value, !activeTypes.includes(value))}
>
{/* Icon */}
<div className="flex h-7 w-7 shrink-0 items-center justify-center rounded-md bg-muted/50 text-foreground/80">
{getDocumentTypeIcon(value, "h-4 w-4")}
</div>
{/* Text content */}
<div className="flex flex-col min-w-0 flex-1 gap-0.5">
<span className="text-[13px] font-medium text-foreground truncate leading-tight">
{getDocumentTypeLabel(value)}
</span>
<span className="text-[11px] text-muted-foreground leading-tight">
{typeCounts.get(value)} document
{(typeCounts.get(value) ?? 0) !== 1 ? "s" : ""}
</span>
</div>
{/* Checkbox */}
<Checkbox
id={`${id}-${i}`}
checked={activeTypes.includes(value)}
onCheckedChange={(checked: boolean) => onToggleType(value, !!checked)}
className="h-4 w-4 shrink-0 rounded border-muted-foreground/30 data-[state=checked]:bg-primary data-[state=checked]:border-primary"
/>
</button>
))
)}
</div>
{activeTypes.length > 0 && (
<div className="px-3 pt-1.5 pb-1.5 border-t border-border/50">
<Button
variant="ghost"
size="sm"
className="w-full h-7 text-[11px] text-muted-foreground hover:text-foreground"
onClick={() => {
activeTypes.forEach((t) => {
onToggleType(t, false);
});
}}
> >
{/* Icon */} Clear filters
<div className="flex h-7 w-7 shrink-0 items-center justify-center rounded-md bg-muted/50 text-foreground/80"> </Button>
{getDocumentTypeIcon(value, "h-4 w-4")} </div>
</div>
{/* Text content */}
<div className="flex flex-col min-w-0 flex-1 gap-0.5">
<span className="text-[13px] font-medium text-foreground truncate leading-tight">
{getDocumentTypeLabel(value)}
</span>
<span className="text-[11px] text-muted-foreground leading-tight">
{typeCounts.get(value)} document{(typeCounts.get(value) ?? 0) !== 1 ? "s" : ""}
</span>
</div>
{/* Checkbox */}
<Checkbox
id={`${id}-${i}`}
checked={activeTypes.includes(value)}
onCheckedChange={(checked: boolean) => onToggleType(value, !!checked)}
className="h-4 w-4 shrink-0 rounded border-muted-foreground/30 data-[state=checked]:bg-primary data-[state=checked]:border-primary"
/>
</button>
))
)} )}
</div> </div>
{activeTypes.length > 0 && ( </PopoverContent>
<div className="px-3 pt-1.5 pb-1.5 border-t border-border/50"> </Popover>
<Button
variant="ghost"
size="sm"
className="w-full h-7 text-[11px] text-muted-foreground hover:text-foreground"
onClick={() => {
activeTypes.forEach((t) => {
onToggleType(t, false);
});
}}
>
Clear filters
</Button>
</div>
)}
</div>
</PopoverContent>
</Popover>
{/* Bulk Delete Button */} {/* Bulk Delete Button */}
{selectedIds.size > 0 && ( {selectedIds.size > 0 && (
@ -255,22 +254,14 @@ export function DocumentsFilters({
exit={{ opacity: 0, scale: 0.9 }} exit={{ opacity: 0, scale: 0.9 }}
> >
{/* Mobile: icon with count */} {/* Mobile: icon with count */}
<Button <Button variant="destructive" size="sm" className="h-9 gap-1.5 px-2.5 md:hidden">
variant="destructive"
size="sm"
className="h-9 gap-1.5 px-2.5 md:hidden"
>
<Trash size={14} /> <Trash size={14} />
<span className="flex h-5 w-5 items-center justify-center rounded-full bg-destructive-foreground/20 text-[10px] font-medium"> <span className="flex h-5 w-5 items-center justify-center rounded-full bg-destructive-foreground/20 text-[10px] font-medium">
{selectedIds.size} {selectedIds.size}
</span> </span>
</Button> </Button>
{/* Desktop: full button */} {/* Desktop: full button */}
<Button <Button variant="destructive" size="sm" className="h-9 gap-2 hidden md:flex">
variant="destructive"
size="sm"
className="h-9 gap-2 hidden md:flex"
>
<Trash size={14} /> <Trash size={14} />
Delete Delete
<span className="flex h-5 w-5 items-center justify-center rounded-full bg-destructive-foreground/20 text-[10px] font-medium"> <span className="flex h-5 w-5 items-center justify-center rounded-full bg-destructive-foreground/20 text-[10px] font-medium">
@ -288,9 +279,12 @@ export function DocumentsFilters({
<CircleAlert size={18} strokeWidth={2} /> <CircleAlert size={18} strokeWidth={2} />
</div> </div>
<AlertDialogHeader className="flex-1"> <AlertDialogHeader className="flex-1">
<AlertDialogTitle>Delete {selectedIds.size} document{selectedIds.size !== 1 ? "s" : ""}?</AlertDialogTitle> <AlertDialogTitle>
Delete {selectedIds.size} document{selectedIds.size !== 1 ? "s" : ""}?
</AlertDialogTitle>
<AlertDialogDescription> <AlertDialogDescription>
This action cannot be undone. This will permanently delete the selected {selectedIds.size === 1 ? "document" : "documents"} from your search space. This action cannot be undone. This will permanently delete the selected{" "}
{selectedIds.size === 1 ? "document" : "documents"} from your search space.
</AlertDialogDescription> </AlertDialogDescription>
</AlertDialogHeader> </AlertDialogHeader>
</div> </div>

View file

@ -1,7 +1,20 @@
"use client"; "use client";
import { formatDistanceToNow } from "date-fns"; import { formatDistanceToNow } from "date-fns";
import { AlertCircle, Calendar, CheckCircle2, ChevronDown, ChevronUp, Clock, FileText, FileX, Loader2, Network, Plus, User } from "lucide-react"; import {
AlertCircle,
Calendar,
CheckCircle2,
ChevronDown,
ChevronUp,
Clock,
FileText,
FileX,
Loader2,
Network,
Plus,
User,
} from "lucide-react";
import { motion } from "motion/react"; import { motion } from "motion/react";
import { useTranslations } from "next-intl"; import { useTranslations } from "next-intl";
import React, { useRef, useState, useEffect, useCallback } from "react"; import React, { useRef, useState, useEffect, useCallback } from "react";
@ -10,12 +23,7 @@ import { JsonMetadataViewer } from "@/components/json-metadata-viewer";
import { MarkdownViewer } from "@/components/markdown-viewer"; import { MarkdownViewer } from "@/components/markdown-viewer";
import { Button } from "@/components/ui/button"; import { Button } from "@/components/ui/button";
import { Checkbox } from "@/components/ui/checkbox"; import { Checkbox } from "@/components/ui/checkbox";
import { import { Dialog, DialogContent, DialogHeader, DialogTitle } from "@/components/ui/dialog";
Dialog,
DialogContent,
DialogHeader,
DialogTitle,
} from "@/components/ui/dialog";
import { Skeleton } from "@/components/ui/skeleton"; import { Skeleton } from "@/components/ui/skeleton";
import { Spinner } from "@/components/ui/spinner"; import { Spinner } from "@/components/ui/spinner";
import { import {
@ -35,7 +43,7 @@ import type { ColumnVisibility, Document, DocumentStatus } from "./types";
// Status indicator component for document processing status // Status indicator component for document processing status
function StatusIndicator({ status }: { status?: DocumentStatus }) { function StatusIndicator({ status }: { status?: DocumentStatus }) {
const state = status?.state ?? "ready"; const state = status?.state ?? "ready";
switch (state) { switch (state) {
case "pending": case "pending":
return ( return (
@ -176,12 +184,10 @@ function SortableHeader({
> >
{icon && <span className="opacity-60">{icon}</span>} {icon && <span className="opacity-60">{icon}</span>}
{children} {children}
<span className={`transition-opacity ${isActive ? "opacity-100" : "opacity-0 group-hover:opacity-50"}`}> <span
{isActive && sortDesc ? ( className={`transition-opacity ${isActive ? "opacity-100" : "opacity-0 group-hover:opacity-50"}`}
<ChevronDown size={14} /> >
) : ( {isActive && sortDesc ? <ChevronDown size={14} /> : <ChevronUp size={14} />}
<ChevronUp size={14} />
)}
</span> </span>
</button> </button>
); );
@ -300,8 +306,10 @@ export function DocumentsTableShell({
// Only consider selectable documents for "select all" logic // Only consider selectable documents for "select all" logic
const selectableDocs = sorted.filter(isSelectable); const selectableDocs = sorted.filter(isSelectable);
const allSelectedOnPage = selectableDocs.length > 0 && selectableDocs.every((d) => selectedIds.has(d.id)); const allSelectedOnPage =
const someSelectedOnPage = selectableDocs.some((d) => selectedIds.has(d.id)) && !allSelectedOnPage; selectableDocs.length > 0 && selectableDocs.every((d) => selectedIds.has(d.id));
const someSelectedOnPage =
selectableDocs.some((d) => selectedIds.has(d.id)) && !allSelectedOnPage;
const toggleAll = (checked: boolean) => { const toggleAll = (checked: boolean) => {
const next = new Set(selectedIds); const next = new Set(selectedIds);
@ -388,10 +396,7 @@ export function DocumentsTableShell({
</div> </div>
</TableCell> </TableCell>
<TableCell className="w-[35%] py-2.5 max-w-0 border-r border-border/40"> <TableCell className="w-[35%] py-2.5 max-w-0 border-r border-border/40">
<Skeleton <Skeleton className="h-4" style={{ width: `${widthPercent}%` }} />
className="h-4"
style={{ width: `${widthPercent}%` }}
/>
</TableCell> </TableCell>
{columnVisibility.document_type && ( {columnVisibility.document_type && (
<TableCell className="w-[20%] min-w-[120px] max-w-[200px] py-2.5 border-r border-border/40 overflow-hidden"> <TableCell className="w-[20%] min-w-[120px] max-w-[200px] py-2.5 border-r border-border/40 overflow-hidden">
@ -429,24 +434,15 @@ export function DocumentsTableShell({
<div className="flex items-start gap-3"> <div className="flex items-start gap-3">
<Skeleton className="h-4 w-4 mt-0.5 rounded" /> <Skeleton className="h-4 w-4 mt-0.5 rounded" />
<div className="flex-1 min-w-0 space-y-2"> <div className="flex-1 min-w-0 space-y-2">
<Skeleton <Skeleton className="h-4" style={{ width: `${widthPercent}%` }} />
className="h-4"
style={{ width: `${widthPercent}%` }}
/>
<div className="flex flex-wrap items-center gap-2"> <div className="flex flex-wrap items-center gap-2">
<Skeleton className="h-5 w-20 rounded" /> <Skeleton className="h-5 w-20 rounded" />
{columnVisibility.created_by && ( {columnVisibility.created_by && <Skeleton className="h-3 w-14" />}
<Skeleton className="h-3 w-14" /> {columnVisibility.created_at && <Skeleton className="h-3 w-20" />}
)}
{columnVisibility.created_at && (
<Skeleton className="h-3 w-20" />
)}
</div> </div>
</div> </div>
<div className="flex items-center gap-2"> <div className="flex items-center gap-2">
{columnVisibility.status && ( {columnVisibility.status && <Skeleton className="h-5 w-5 rounded-full" />}
<Skeleton className="h-5 w-5 rounded-full" />
)}
<Skeleton className="h-7 w-7 rounded" /> <Skeleton className="h-7 w-7 rounded" />
</div> </div>
</div> </div>
@ -549,9 +545,7 @@ export function DocumentsTableShell({
)} )}
{columnVisibility.status && ( {columnVisibility.status && (
<TableHead className="w-20 text-center"> <TableHead className="w-20 text-center">
<span className="text-sm font-medium text-muted-foreground/70"> <span className="text-sm font-medium text-muted-foreground/70">Status</span>
Status
</span>
</TableHead> </TableHead>
)} )}
<TableHead className="w-10"> <TableHead className="w-10">
@ -580,9 +574,7 @@ export function DocumentsTableShell({
}, },
}} }}
className={`border-b border-border/40 transition-colors ${ className={`border-b border-border/40 transition-colors ${
isSelected isSelected ? "bg-primary/5 hover:bg-primary/8" : "hover:bg-muted/30"
? "bg-primary/5 hover:bg-primary/8"
: "hover:bg-muted/30"
}`} }`}
> >
<TableCell className="w-8 px-0 py-2.5 text-center"> <TableCell className="w-8 px-0 py-2.5 text-center">
@ -591,7 +583,9 @@ export function DocumentsTableShell({
checked={isSelected} checked={isSelected}
onCheckedChange={(v) => canSelect && toggleOne(doc.id, !!v)} onCheckedChange={(v) => canSelect && toggleOne(doc.id, !!v)}
disabled={!canSelect} disabled={!canSelect}
aria-label={canSelect ? "Select row" : "Cannot select while processing"} aria-label={
canSelect ? "Select row" : "Cannot select while processing"
}
className={`border-foreground data-[state=checked]:bg-primary data-[state=checked]:border-primary ${!canSelect ? "opacity-40 cursor-not-allowed" : ""}`} className={`border-foreground data-[state=checked]:bg-primary data-[state=checked]:border-primary ${!canSelect ? "opacity-40 cursor-not-allowed" : ""}`}
/> />
</div> </div>
@ -639,7 +633,9 @@ export function DocumentsTableShell({
<TableCell className="w-32 py-2.5 text-sm text-foreground border-r border-border/40"> <TableCell className="w-32 py-2.5 text-sm text-foreground border-r border-border/40">
<Tooltip> <Tooltip>
<TooltipTrigger asChild> <TooltipTrigger asChild>
<span className="cursor-default">{formatRelativeDate(doc.created_at)}</span> <span className="cursor-default">
{formatRelativeDate(doc.created_at)}
</span>
</TooltipTrigger> </TooltipTrigger>
<TooltipContent side="top"> <TooltipContent side="top">
{formatAbsoluteDate(doc.created_at)} {formatAbsoluteDate(doc.created_at)}
@ -720,9 +716,7 @@ export function DocumentsTableShell({
<div className="flex flex-wrap items-center gap-2"> <div className="flex flex-wrap items-center gap-2">
<DocumentTypeChip type={doc.document_type} /> <DocumentTypeChip type={doc.document_type} />
{columnVisibility.created_by && doc.created_by_name && ( {columnVisibility.created_by && doc.created_by_name && (
<span className="text-xs text-foreground"> <span className="text-xs text-foreground">{doc.created_by_name}</span>
{doc.created_by_name}
</span>
)} )}
{columnVisibility.created_at && ( {columnVisibility.created_at && (
<Tooltip> <Tooltip>

View file

@ -46,7 +46,8 @@ export function RowActions({
); );
// Documents in "pending" or "processing" state should show disabled delete // Documents in "pending" or "processing" state should show disabled delete
const isBeingProcessed = document.status?.state === "pending" || document.status?.state === "processing"; const isBeingProcessed =
document.status?.state === "pending" || document.status?.state === "processing";
// SURFSENSE_DOCS are system-managed and should not show delete at all // SURFSENSE_DOCS are system-managed and should not show delete at all
const shouldShowDelete = !NON_DELETABLE_DOCUMENT_TYPES.includes( const shouldShowDelete = !NON_DELETABLE_DOCUMENT_TYPES.includes(
@ -67,8 +68,9 @@ export function RowActions({
} catch (error: unknown) { } catch (error: unknown) {
console.error("Error deleting document:", error); console.error("Error deleting document:", error);
// Check for 409 Conflict (document started processing after UI loaded) // Check for 409 Conflict (document started processing after UI loaded)
const status = (error as { response?: { status?: number } })?.response?.status const status =
?? (error as { status?: number })?.status; (error as { response?: { status?: number } })?.response?.status ??
(error as { status?: number })?.status;
if (status === 409) { if (status === 409) {
toast.error("Document is now being processed. Please try again later."); toast.error("Document is now being processed. Please try again later.");
} else { } else {
@ -92,7 +94,11 @@ export function RowActions({
// Editable documents: show 3-dot dropdown with edit + delete // Editable documents: show 3-dot dropdown with edit + delete
<DropdownMenu> <DropdownMenu>
<DropdownMenuTrigger asChild> <DropdownMenuTrigger asChild>
<Button variant="ghost" size="icon" className="h-8 w-8 text-muted-foreground hover:text-foreground hover:bg-muted/80"> <Button
variant="ghost"
size="icon"
className="h-8 w-8 text-muted-foreground hover:text-foreground hover:bg-muted/80"
>
<MoreHorizontal className="h-4 w-4" /> <MoreHorizontal className="h-4 w-4" />
<span className="sr-only">Open menu</span> <span className="sr-only">Open menu</span>
</Button> </Button>
@ -101,7 +107,9 @@ export function RowActions({
<DropdownMenuItem <DropdownMenuItem
onClick={() => !isEditDisabled && handleEdit()} onClick={() => !isEditDisabled && handleEdit()}
disabled={isEditDisabled} disabled={isEditDisabled}
className={isEditDisabled ? "text-muted-foreground cursor-not-allowed opacity-50" : ""} className={
isEditDisabled ? "text-muted-foreground cursor-not-allowed opacity-50" : ""
}
> >
<Pencil className="mr-2 h-4 w-4" /> <Pencil className="mr-2 h-4 w-4" />
<span>Edit</span> <span>Edit</span>
@ -110,7 +118,11 @@ export function RowActions({
<DropdownMenuItem <DropdownMenuItem
onClick={() => !isDeleteDisabled && setIsDeleteOpen(true)} onClick={() => !isDeleteDisabled && setIsDeleteOpen(true)}
disabled={isDeleteDisabled} disabled={isDeleteDisabled}
className={isDeleteDisabled ? "text-muted-foreground cursor-not-allowed opacity-50" : "text-destructive focus:text-destructive"} className={
isDeleteDisabled
? "text-muted-foreground cursor-not-allowed opacity-50"
: "text-destructive focus:text-destructive"
}
> >
<Trash2 className="mr-2 h-4 w-4" /> <Trash2 className="mr-2 h-4 w-4" />
<span>Delete</span> <span>Delete</span>
@ -150,7 +162,9 @@ export function RowActions({
<DropdownMenuItem <DropdownMenuItem
onClick={() => !isEditDisabled && handleEdit()} onClick={() => !isEditDisabled && handleEdit()}
disabled={isEditDisabled} disabled={isEditDisabled}
className={isEditDisabled ? "text-muted-foreground cursor-not-allowed opacity-50" : ""} className={
isEditDisabled ? "text-muted-foreground cursor-not-allowed opacity-50" : ""
}
> >
<Pencil className="mr-2 h-4 w-4" /> <Pencil className="mr-2 h-4 w-4" />
<span>Edit</span> <span>Edit</span>
@ -159,7 +173,11 @@ export function RowActions({
<DropdownMenuItem <DropdownMenuItem
onClick={() => !isDeleteDisabled && setIsDeleteOpen(true)} onClick={() => !isDeleteDisabled && setIsDeleteOpen(true)}
disabled={isDeleteDisabled} disabled={isDeleteDisabled}
className={isDeleteDisabled ? "text-muted-foreground cursor-not-allowed opacity-50" : "text-destructive focus:text-destructive"} className={
isDeleteDisabled
? "text-muted-foreground cursor-not-allowed opacity-50"
: "text-destructive focus:text-destructive"
}
> >
<Trash2 className="mr-2 h-4 w-4" /> <Trash2 className="mr-2 h-4 w-4" />
<span>Delete</span> <span>Delete</span>

View file

@ -116,13 +116,15 @@ export default function DocumentsTable() {
created_by_id: item.created_by_id ?? null, created_by_id: item.created_by_id ?? null,
created_by_name: item.created_by_name ?? null, created_by_name: item.created_by_name ?? null,
created_at: item.created_at, created_at: item.created_at,
status: (item as { status?: { state: "ready" | "pending" | "processing" | "failed"; reason?: string } }).status ?? { state: "ready" as const }, status: (
item as {
status?: { state: "ready" | "pending" | "processing" | "failed"; reason?: string };
}
).status ?? { state: "ready" as const },
})) }))
: paginatedRealtimeDocuments; : paginatedRealtimeDocuments;
const displayTotal = isSearchMode const displayTotal = isSearchMode ? searchResponse?.total || 0 : sortedRealtimeDocuments.length;
? searchResponse?.total || 0
: sortedRealtimeDocuments.length;
const loading = isSearchMode ? isSearchLoading : realtimeLoading; const loading = isSearchMode ? isSearchLoading : realtimeLoading;
const error = isSearchMode ? searchError : realtimeError; const error = isSearchMode ? searchError : realtimeError;
@ -149,13 +151,13 @@ export default function DocumentsTable() {
// Filter out pending/processing documents - they cannot be deleted // Filter out pending/processing documents - they cannot be deleted
// For real-time mode, use sortedRealtimeDocuments (which has status) // For real-time mode, use sortedRealtimeDocuments (which has status)
// For search mode, use searchResponse items (need to safely access status) // For search mode, use searchResponse items (need to safely access status)
const allDocs = isSearchMode const allDocs = isSearchMode
? (searchResponse?.items || []).map(item => ({ ? (searchResponse?.items || []).map((item) => ({
id: item.id, id: item.id,
status: (item as { status?: { state: string } }).status, status: (item as { status?: { state: string } }).status,
})) }))
: sortedRealtimeDocuments.map(doc => ({ id: doc.id, status: doc.status })); : sortedRealtimeDocuments.map((doc) => ({ id: doc.id, status: doc.status }));
const selectedDocs = allDocs.filter((doc) => selectedIds.has(doc.id)); const selectedDocs = allDocs.filter((doc) => selectedIds.has(doc.id));
const deletableIds = selectedDocs const deletableIds = selectedDocs
.filter((doc) => doc.status?.state !== "pending" && doc.status?.state !== "processing") .filter((doc) => doc.status?.state !== "pending" && doc.status?.state !== "processing")
@ -163,7 +165,9 @@ export default function DocumentsTable() {
const inProgressCount = selectedIds.size - deletableIds.length; const inProgressCount = selectedIds.size - deletableIds.length;
if (inProgressCount > 0) { if (inProgressCount > 0) {
toast.warning(`${inProgressCount} document(s) are pending or processing and cannot be deleted.`); toast.warning(
`${inProgressCount} document(s) are pending or processing and cannot be deleted.`
);
} }
if (deletableIds.length === 0) { if (deletableIds.length === 0) {
@ -180,8 +184,9 @@ export default function DocumentsTable() {
await deleteDocumentMutation({ id }); await deleteDocumentMutation({ id });
return true; return true;
} catch (error: unknown) { } catch (error: unknown) {
const status = (error as { response?: { status?: number } })?.response?.status const status =
?? (error as { status?: number })?.status; (error as { response?: { status?: number } })?.response?.status ??
(error as { status?: number })?.status;
if (status === 409) conflictCount++; if (status === 409) conflictCount++;
return false; return false;
} }
@ -195,13 +200,13 @@ export default function DocumentsTable() {
} else { } else {
toast.error(t("delete_partial_failed")); toast.error(t("delete_partial_failed"));
} }
// If in search mode, refetch search results to reflect deletion // If in search mode, refetch search results to reflect deletion
if (isSearchMode) { if (isSearchMode) {
await refetchSearch(); await refetchSearch();
} }
// Real-time mode: Electric will sync the deletion automatically // Real-time mode: Electric will sync the deletion automatically
setSelectedIds(new Set()); setSelectedIds(new Set());
} catch (e) { } catch (e) {
console.error(e); console.error(e);
@ -210,21 +215,24 @@ export default function DocumentsTable() {
}; };
// Single document delete handler for RowActions // Single document delete handler for RowActions
const handleDeleteDocument = useCallback(async (id: number): Promise<boolean> => { const handleDeleteDocument = useCallback(
try { async (id: number): Promise<boolean> => {
await deleteDocumentMutation({ id }); try {
toast.success(t("delete_success") || "Document deleted"); await deleteDocumentMutation({ id });
// If in search mode, refetch search results to reflect deletion toast.success(t("delete_success") || "Document deleted");
if (isSearchMode) { // If in search mode, refetch search results to reflect deletion
await refetchSearch(); if (isSearchMode) {
await refetchSearch();
}
// Real-time mode: Electric will sync the deletion automatically
return true;
} catch (e) {
console.error("Error deleting document:", e);
return false;
} }
// Real-time mode: Electric will sync the deletion automatically },
return true; [deleteDocumentMutation, isSearchMode, refetchSearch, t]
} catch (e) { );
console.error("Error deleting document:", e);
return false;
}
}, [deleteDocumentMutation, isSearchMode, refetchSearch, t]);
const handleSortChange = useCallback((key: SortKey) => { const handleSortChange = useCallback((key: SortKey) => {
setSortKey((currentKey) => { setSortKey((currentKey) => {

View file

@ -2,4 +2,3 @@ import { atom } from "jotai";
// Atom to control the connector dialog open state from anywhere in the app // Atom to control the connector dialog open state from anywhere in the app
export const connectorDialogOpenAtom = atom(false); export const connectorDialogOpenAtom = atom(false);

View file

@ -191,7 +191,9 @@ export const ConnectorIndicator: FC<{ hideTrigger?: boolean }> = ({ hideTrigger
{!hideTrigger && ( {!hideTrigger && (
<TooltipIconButton <TooltipIconButton
data-joyride="connector-icon" data-joyride="connector-icon"
tooltip={hasConnectors ? `Manage ${activeConnectorsCount} connectors` : "Connect your data"} tooltip={
hasConnectors ? `Manage ${activeConnectorsCount} connectors` : "Connect your data"
}
side="bottom" side="bottom"
className={cn( className={cn(
"size-[34px] rounded-full p-1 flex items-center justify-center transition-colors relative", "size-[34px] rounded-full p-1 flex items-center justify-center transition-colors relative",

View file

@ -346,13 +346,13 @@ export const useConnectorDialog = () => {
const connectorId = parseInt(params.connectorId, 10); const connectorId = parseInt(params.connectorId, 10);
newConnector = result.data.find((c: SearchSourceConnector) => c.id === connectorId); newConnector = result.data.find((c: SearchSourceConnector) => c.id === connectorId);
// If we found the connector, find the matching OAuth/Composio connector by type // If we found the connector, find the matching OAuth/Composio connector by type
if (newConnector) { if (newConnector) {
const connectorType = newConnector.connector_type; const connectorType = newConnector.connector_type;
oauthConnector = oauthConnector =
OAUTH_CONNECTORS.find((c) => c.connectorType === connectorType) || OAUTH_CONNECTORS.find((c) => c.connectorType === connectorType) ||
COMPOSIO_CONNECTORS.find((c) => c.connectorType === connectorType); COMPOSIO_CONNECTORS.find((c) => c.connectorType === connectorType);
} }
} }
// If we don't have a connector yet, try to find by connector param // If we don't have a connector yet, try to find by connector param
@ -361,12 +361,12 @@ export const useConnectorDialog = () => {
OAUTH_CONNECTORS.find((c) => c.id === params.connector) || OAUTH_CONNECTORS.find((c) => c.id === params.connector) ||
COMPOSIO_CONNECTORS.find((c) => c.id === params.connector); COMPOSIO_CONNECTORS.find((c) => c.id === params.connector);
if (oauthConnector) { if (oauthConnector) {
const oauthConnectorType = oauthConnector.connectorType; const oauthConnectorType = oauthConnector.connectorType;
newConnector = result.data.find( newConnector = result.data.find(
(c: SearchSourceConnector) => c.connector_type === oauthConnectorType (c: SearchSourceConnector) => c.connector_type === oauthConnectorType
); );
} }
} }
if (newConnector && oauthConnector) { if (newConnector && oauthConnector) {
@ -679,11 +679,11 @@ export const useConnectorDialog = () => {
}, },
}); });
const successMessage = const successMessage =
currentConnectorType === "MCP_CONNECTOR" currentConnectorType === "MCP_CONNECTOR"
? `${connector.name} added successfully` ? `${connector.name} added successfully`
: `${connectorTitle} connected and syncing started!`; : `${connectorTitle} connected and syncing started!`;
toast.success(successMessage); toast.success(successMessage);
const url = new URL(window.location.href); const url = new URL(window.location.href);
url.searchParams.delete("modal"); url.searchParams.delete("modal");

View file

@ -8,172 +8,167 @@ import { cn } from "@/lib/utils";
// /////////////////////////////////////////////////////////////////////////// // ///////////////////////////////////////////////////////////////////////////
// Types // Types
export type AnimationVariant = export type AnimationVariant = "circle" | "rectangle" | "gif" | "polygon" | "circle-blur";
| "circle"
| "rectangle"
| "gif"
| "polygon"
| "circle-blur";
export type AnimationStart = export type AnimationStart =
| "top-left" | "top-left"
| "top-right" | "top-right"
| "bottom-left" | "bottom-left"
| "bottom-right" | "bottom-right"
| "center" | "center"
| "top-center" | "top-center"
| "bottom-center" | "bottom-center"
| "bottom-up" | "bottom-up"
| "top-down" | "top-down"
| "left-right" | "left-right"
| "right-left"; | "right-left";
interface Animation { interface Animation {
name: string; name: string;
css: string; css: string;
} }
// /////////////////////////////////////////////////////////////////////////// // ///////////////////////////////////////////////////////////////////////////
// Helper functions // Helper functions
const getPositionCoords = (position: AnimationStart) => { const getPositionCoords = (position: AnimationStart) => {
switch (position) { switch (position) {
case "top-left": case "top-left":
return { cx: "0", cy: "0" }; return { cx: "0", cy: "0" };
case "top-right": case "top-right":
return { cx: "40", cy: "0" }; return { cx: "40", cy: "0" };
case "bottom-left": case "bottom-left":
return { cx: "0", cy: "40" }; return { cx: "0", cy: "40" };
case "bottom-right": case "bottom-right":
return { cx: "40", cy: "40" }; return { cx: "40", cy: "40" };
case "top-center": case "top-center":
return { cx: "20", cy: "0" }; return { cx: "20", cy: "0" };
case "bottom-center": case "bottom-center":
return { cx: "20", cy: "40" }; return { cx: "20", cy: "40" };
case "bottom-up": case "bottom-up":
case "top-down": case "top-down":
case "left-right": case "left-right":
case "right-left": case "right-left":
return { cx: "20", cy: "20" }; return { cx: "20", cy: "20" };
} }
}; };
const generateSVG = (variant: AnimationVariant, start: AnimationStart) => { const generateSVG = (variant: AnimationVariant, start: AnimationStart) => {
if (variant === "circle-blur") { if (variant === "circle-blur") {
if (start === "center") { if (start === "center") {
return `data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 40 40"><defs><filter id="blur"><feGaussianBlur stdDeviation="2"/></filter></defs><circle cx="20" cy="20" r="18" fill="white" filter="url(%23blur)"/></svg>`; return `data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 40 40"><defs><filter id="blur"><feGaussianBlur stdDeviation="2"/></filter></defs><circle cx="20" cy="20" r="18" fill="white" filter="url(%23blur)"/></svg>`;
} }
const positionCoords = getPositionCoords(start); const positionCoords = getPositionCoords(start);
if (!positionCoords) { if (!positionCoords) {
throw new Error(`Invalid start position: ${start}`); throw new Error(`Invalid start position: ${start}`);
} }
const { cx, cy } = positionCoords; const { cx, cy } = positionCoords;
return `data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 40 40"><defs><filter id="blur"><feGaussianBlur stdDeviation="2"/></filter></defs><circle cx="${cx}" cy="${cy}" r="18" fill="white" filter="url(%23blur)"/></svg>`; return `data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 40 40"><defs><filter id="blur"><feGaussianBlur stdDeviation="2"/></filter></defs><circle cx="${cx}" cy="${cy}" r="18" fill="white" filter="url(%23blur)"/></svg>`;
} }
if (start === "center") return; if (start === "center") return;
if (variant === "rectangle") return ""; if (variant === "rectangle") return "";
const positionCoords = getPositionCoords(start); const positionCoords = getPositionCoords(start);
if (!positionCoords) { if (!positionCoords) {
throw new Error(`Invalid start position: ${start}`); throw new Error(`Invalid start position: ${start}`);
} }
const { cx, cy } = positionCoords; const { cx, cy } = positionCoords;
if (variant === "circle") { if (variant === "circle") {
return `data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 40 40"><circle cx="${cx}" cy="${cy}" r="20" fill="white"/></svg>`; return `data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 40 40"><circle cx="${cx}" cy="${cy}" r="20" fill="white"/></svg>`;
} }
return ""; return "";
}; };
const getTransformOrigin = (start: AnimationStart) => { const getTransformOrigin = (start: AnimationStart) => {
switch (start) { switch (start) {
case "top-left": case "top-left":
return "top left"; return "top left";
case "top-right": case "top-right":
return "top right"; return "top right";
case "bottom-left": case "bottom-left":
return "bottom left"; return "bottom left";
case "bottom-right": case "bottom-right":
return "bottom right"; return "bottom right";
case "top-center": case "top-center":
return "top center"; return "top center";
case "bottom-center": case "bottom-center":
return "bottom center"; return "bottom center";
case "bottom-up": case "bottom-up":
case "top-down": case "top-down":
case "left-right": case "left-right":
case "right-left": case "right-left":
return "center"; return "center";
} }
}; };
export const createAnimation = ( export const createAnimation = (
variant: AnimationVariant, variant: AnimationVariant,
start: AnimationStart = "center", start: AnimationStart = "center",
blur = false, blur = false,
url?: string, url?: string
): Animation => { ): Animation => {
const svg = generateSVG(variant, start); const svg = generateSVG(variant, start);
const transformOrigin = getTransformOrigin(start); const transformOrigin = getTransformOrigin(start);
if (variant === "rectangle") { if (variant === "rectangle") {
const getClipPath = (direction: AnimationStart) => { const getClipPath = (direction: AnimationStart) => {
switch (direction) { switch (direction) {
case "bottom-up": case "bottom-up":
return { return {
from: "polygon(0% 100%, 100% 100%, 100% 100%, 0% 100%)", from: "polygon(0% 100%, 100% 100%, 100% 100%, 0% 100%)",
to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)", to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
}; };
case "top-down": case "top-down":
return { return {
from: "polygon(0% 0%, 100% 0%, 100% 0%, 0% 0%)", from: "polygon(0% 0%, 100% 0%, 100% 0%, 0% 0%)",
to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)", to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
}; };
case "left-right": case "left-right":
return { return {
from: "polygon(0% 0%, 0% 0%, 0% 100%, 0% 100%)", from: "polygon(0% 0%, 0% 0%, 0% 100%, 0% 100%)",
to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)", to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
}; };
case "right-left": case "right-left":
return { return {
from: "polygon(100% 0%, 100% 0%, 100% 100%, 100% 100%)", from: "polygon(100% 0%, 100% 0%, 100% 100%, 100% 100%)",
to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)", to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
}; };
case "top-left": case "top-left":
return { return {
from: "polygon(0% 0%, 0% 0%, 0% 0%, 0% 0%)", from: "polygon(0% 0%, 0% 0%, 0% 0%, 0% 0%)",
to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)", to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
}; };
case "top-right": case "top-right":
return { return {
from: "polygon(100% 0%, 100% 0%, 100% 0%, 100% 0%)", from: "polygon(100% 0%, 100% 0%, 100% 0%, 100% 0%)",
to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)", to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
}; };
case "bottom-left": case "bottom-left":
return { return {
from: "polygon(0% 100%, 0% 100%, 0% 100%, 0% 100%)", from: "polygon(0% 100%, 0% 100%, 0% 100%, 0% 100%)",
to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)", to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
}; };
case "bottom-right": case "bottom-right":
return { return {
from: "polygon(100% 100%, 100% 100%, 100% 100%, 100% 100%)", from: "polygon(100% 100%, 100% 100%, 100% 100%, 100% 100%)",
to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)", to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
}; };
default: default:
return { return {
from: "polygon(0% 100%, 100% 100%, 100% 100%, 0% 100%)", from: "polygon(0% 100%, 100% 100%, 100% 100%, 0% 100%)",
to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)", to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
}; };
} }
}; };
const clipPath = getClipPath(start); const clipPath = getClipPath(start);
return { return {
name: `${variant}-${start}${blur ? "-blur" : ""}`, name: `${variant}-${start}${blur ? "-blur" : ""}`,
css: ` css: `
::view-transition-group(root) { ::view-transition-group(root) {
animation-duration: 0.7s; animation-duration: 0.7s;
animation-timing-function: var(--expo-out); animation-timing-function: var(--expo-out);
@ -218,12 +213,12 @@ export const createAnimation = (
} }
} }
`, `,
}; };
} }
if (variant === "circle" && start == "center") { if (variant === "circle" && start == "center") {
return { return {
name: `${variant}-${start}${blur ? "-blur" : ""}`, name: `${variant}-${start}${blur ? "-blur" : ""}`,
css: ` css: `
::view-transition-group(root) { ::view-transition-group(root) {
animation-duration: 0.7s; animation-duration: 0.7s;
animation-timing-function: var(--expo-out); animation-timing-function: var(--expo-out);
@ -268,12 +263,12 @@ export const createAnimation = (
} }
} }
`, `,
}; };
} }
if (variant === "gif") { if (variant === "gif") {
return { return {
name: `${variant}-${start}`, name: `${variant}-${start}`,
css: ` css: `
::view-transition-group(root) { ::view-transition-group(root) {
animation-timing-function: var(--expo-in); animation-timing-function: var(--expo-in);
} }
@ -302,14 +297,14 @@ export const createAnimation = (
mask-size: 2000vmax; mask-size: 2000vmax;
} }
}`, }`,
}; };
} }
if (variant === "circle-blur") { if (variant === "circle-blur") {
if (start === "center") { if (start === "center") {
return { return {
name: `${variant}-${start}`, name: `${variant}-${start}`,
css: ` css: `
::view-transition-group(root) { ::view-transition-group(root) {
animation-timing-function: var(--expo-out); animation-timing-function: var(--expo-out);
} }
@ -334,12 +329,12 @@ export const createAnimation = (
} }
} }
`, `,
}; };
} }
return { return {
name: `${variant}-${start}`, name: `${variant}-${start}`,
css: ` css: `
::view-transition-group(root) { ::view-transition-group(root) {
animation-timing-function: var(--expo-out); animation-timing-function: var(--expo-out);
} }
@ -364,41 +359,41 @@ export const createAnimation = (
} }
} }
`, `,
}; };
} }
if (variant === "polygon") { if (variant === "polygon") {
const getPolygonClipPaths = (position: AnimationStart) => { const getPolygonClipPaths = (position: AnimationStart) => {
switch (position) { switch (position) {
case "top-left": case "top-left":
return { return {
darkFrom: "polygon(50% -71%, -50% 71%, -50% 71%, 50% -71%)", darkFrom: "polygon(50% -71%, -50% 71%, -50% 71%, 50% -71%)",
darkTo: "polygon(50% -71%, -50% 71%, 50% 171%, 171% 50%)", darkTo: "polygon(50% -71%, -50% 71%, 50% 171%, 171% 50%)",
lightFrom: "polygon(171% 50%, 50% 171%, 50% 171%, 171% 50%)", lightFrom: "polygon(171% 50%, 50% 171%, 50% 171%, 171% 50%)",
lightTo: "polygon(171% 50%, 50% 171%, -50% 71%, 50% -71%)", lightTo: "polygon(171% 50%, 50% 171%, -50% 71%, 50% -71%)",
}; };
case "top-right": case "top-right":
return { return {
darkFrom: "polygon(150% -71%, 250% 71%, 250% 71%, 150% -71%)", darkFrom: "polygon(150% -71%, 250% 71%, 250% 71%, 150% -71%)",
darkTo: "polygon(150% -71%, 250% 71%, 50% 171%, -71% 50%)", darkTo: "polygon(150% -71%, 250% 71%, 50% 171%, -71% 50%)",
lightFrom: "polygon(-71% 50%, 50% 171%, 50% 171%, -71% 50%)", lightFrom: "polygon(-71% 50%, 50% 171%, 50% 171%, -71% 50%)",
lightTo: "polygon(-71% 50%, 50% 171%, 250% 71%, 150% -71%)", lightTo: "polygon(-71% 50%, 50% 171%, 250% 71%, 150% -71%)",
}; };
default: default:
return { return {
darkFrom: "polygon(50% -71%, -50% 71%, -50% 71%, 50% -71%)", darkFrom: "polygon(50% -71%, -50% 71%, -50% 71%, 50% -71%)",
darkTo: "polygon(50% -71%, -50% 71%, 50% 171%, 171% 50%)", darkTo: "polygon(50% -71%, -50% 71%, 50% 171%, 171% 50%)",
lightFrom: "polygon(171% 50%, 50% 171%, 50% 171%, 171% 50%)", lightFrom: "polygon(171% 50%, 50% 171%, 50% 171%, 171% 50%)",
lightTo: "polygon(171% 50%, 50% 171%, -50% 71%, 50% -71%)", lightTo: "polygon(171% 50%, 50% 171%, -50% 71%, 50% -71%)",
}; };
} }
}; };
const clipPaths = getPolygonClipPaths(start); const clipPaths = getPolygonClipPaths(start);
return { return {
name: `${variant}-${start}${blur ? "-blur" : ""}`, name: `${variant}-${start}${blur ? "-blur" : ""}`,
css: ` css: `
::view-transition-group(root) { ::view-transition-group(root) {
animation-duration: 0.7s; animation-duration: 0.7s;
animation-timing-function: var(--expo-out); animation-timing-function: var(--expo-out);
@ -443,35 +438,35 @@ export const createAnimation = (
} }
} }
`, `,
}; };
} }
// Handle circle variants with start positions using clip-path // Handle circle variants with start positions using clip-path
if (variant === "circle" && start !== "center") { if (variant === "circle" && start !== "center") {
const getClipPathPosition = (position: AnimationStart) => { const getClipPathPosition = (position: AnimationStart) => {
switch (position) { switch (position) {
case "top-left": case "top-left":
return "0% 0%"; return "0% 0%";
case "top-right": case "top-right":
return "100% 0%"; return "100% 0%";
case "bottom-left": case "bottom-left":
return "0% 100%"; return "0% 100%";
case "bottom-right": case "bottom-right":
return "100% 100%"; return "100% 100%";
case "top-center": case "top-center":
return "50% 0%"; return "50% 0%";
case "bottom-center": case "bottom-center":
return "50% 100%"; return "50% 100%";
default: default:
return "50% 50%"; return "50% 50%";
} }
}; };
const clipPosition = getClipPathPosition(start); const clipPosition = getClipPathPosition(start);
return { return {
name: `${variant}-${start}${blur ? "-blur" : ""}`, name: `${variant}-${start}${blur ? "-blur" : ""}`,
css: ` css: `
::view-transition-group(root) { ::view-transition-group(root) {
animation-duration: 1s; animation-duration: 1s;
animation-timing-function: var(--expo-out); animation-timing-function: var(--expo-out);
@ -516,12 +511,12 @@ export const createAnimation = (
} }
} }
`, `,
}; };
} }
return { return {
name: `${variant}-${start}${blur ? "-blur" : ""}`, name: `${variant}-${start}${blur ? "-blur" : ""}`,
css: ` css: `
::view-transition-group(root) { ::view-transition-group(root) {
animation-timing-function: var(--expo-in); animation-timing-function: var(--expo-in);
} }
@ -549,237 +544,229 @@ export const createAnimation = (
} }
} }
`, `,
}; };
}; };
// /////////////////////////////////////////////////////////////////////////// // ///////////////////////////////////////////////////////////////////////////
// Custom hook for theme toggle functionality // Custom hook for theme toggle functionality
export const useThemeToggle = ({ export const useThemeToggle = ({
variant = "circle", variant = "circle",
start = "center", start = "center",
blur = false, blur = false,
gifUrl = "", gifUrl = "",
}: { }: {
variant?: AnimationVariant; variant?: AnimationVariant;
start?: AnimationStart; start?: AnimationStart;
blur?: boolean; blur?: boolean;
gifUrl?: string; gifUrl?: string;
} = {}) => { } = {}) => {
const { theme, setTheme, resolvedTheme } = useTheme(); const { theme, setTheme, resolvedTheme } = useTheme();
const [isDark, setIsDark] = useState(false); const [isDark, setIsDark] = useState(false);
// Sync isDark state with resolved theme after hydration // Sync isDark state with resolved theme after hydration
useEffect(() => { useEffect(() => {
setIsDark(resolvedTheme === "dark"); setIsDark(resolvedTheme === "dark");
}, [resolvedTheme]); }, [resolvedTheme]);
const styleId = "theme-transition-styles"; const styleId = "theme-transition-styles";
const updateStyles = useCallback((css: string) => { const updateStyles = useCallback((css: string) => {
if (typeof window === "undefined") return; if (typeof window === "undefined") return;
let styleElement = document.getElementById(styleId) as HTMLStyleElement; let styleElement = document.getElementById(styleId) as HTMLStyleElement;
if (!styleElement) { if (!styleElement) {
styleElement = document.createElement("style"); styleElement = document.createElement("style");
styleElement.id = styleId; styleElement.id = styleId;
document.head.appendChild(styleElement); document.head.appendChild(styleElement);
} }
styleElement.textContent = css; styleElement.textContent = css;
}, []); }, []);
const toggleTheme = useCallback(() => { const toggleTheme = useCallback(() => {
setIsDark(!isDark); setIsDark(!isDark);
const animation = createAnimation(variant, start, blur, gifUrl); const animation = createAnimation(variant, start, blur, gifUrl);
updateStyles(animation.css); updateStyles(animation.css);
if (typeof window === "undefined") return; if (typeof window === "undefined") return;
const switchTheme = () => { const switchTheme = () => {
setTheme(theme === "light" ? "dark" : "light"); setTheme(theme === "light" ? "dark" : "light");
}; };
if (!document.startViewTransition) { if (!document.startViewTransition) {
switchTheme(); switchTheme();
return; return;
} }
document.startViewTransition(switchTheme); document.startViewTransition(switchTheme);
}, [theme, setTheme, variant, start, blur, gifUrl, updateStyles, isDark]); }, [theme, setTheme, variant, start, blur, gifUrl, updateStyles, isDark]);
const setCrazyLightTheme = useCallback(() => { const setCrazyLightTheme = useCallback(() => {
setIsDark(false); setIsDark(false);
const animation = createAnimation(variant, start, blur, gifUrl); const animation = createAnimation(variant, start, blur, gifUrl);
updateStyles(animation.css); updateStyles(animation.css);
if (typeof window === "undefined") return; if (typeof window === "undefined") return;
const switchTheme = () => { const switchTheme = () => {
setTheme("light"); setTheme("light");
}; };
if (!document.startViewTransition) { if (!document.startViewTransition) {
switchTheme(); switchTheme();
return; return;
} }
document.startViewTransition(switchTheme); document.startViewTransition(switchTheme);
}, [setTheme, variant, start, blur, gifUrl, updateStyles]); }, [setTheme, variant, start, blur, gifUrl, updateStyles]);
const setCrazyDarkTheme = useCallback(() => { const setCrazyDarkTheme = useCallback(() => {
setIsDark(true); setIsDark(true);
const animation = createAnimation(variant, start, blur, gifUrl); const animation = createAnimation(variant, start, blur, gifUrl);
updateStyles(animation.css); updateStyles(animation.css);
if (typeof window === "undefined") return; if (typeof window === "undefined") return;
const switchTheme = () => { const switchTheme = () => {
setTheme("dark"); setTheme("dark");
}; };
if (!document.startViewTransition) { if (!document.startViewTransition) {
switchTheme(); switchTheme();
return; return;
} }
document.startViewTransition(switchTheme); document.startViewTransition(switchTheme);
}, [setTheme, variant, start, blur, gifUrl, updateStyles]); }, [setTheme, variant, start, blur, gifUrl, updateStyles]);
const setCrazySystemTheme = useCallback(() => { const setCrazySystemTheme = useCallback(() => {
if (typeof window === "undefined") return; if (typeof window === "undefined") return;
const prefersDark = window.matchMedia( const prefersDark = window.matchMedia("(prefers-color-scheme: dark)").matches;
"(prefers-color-scheme: dark)", setIsDark(prefersDark);
).matches;
setIsDark(prefersDark);
const animation = createAnimation(variant, start, blur, gifUrl); const animation = createAnimation(variant, start, blur, gifUrl);
updateStyles(animation.css); updateStyles(animation.css);
const switchTheme = () => { const switchTheme = () => {
setTheme("system"); setTheme("system");
}; };
if (!document.startViewTransition) { if (!document.startViewTransition) {
switchTheme(); switchTheme();
return; return;
} }
document.startViewTransition(switchTheme); document.startViewTransition(switchTheme);
}, [setTheme, variant, start, blur, gifUrl, updateStyles]); }, [setTheme, variant, start, blur, gifUrl, updateStyles]);
return { return {
isDark, isDark,
setIsDark, setIsDark,
toggleTheme, toggleTheme,
setCrazyLightTheme, setCrazyLightTheme,
setCrazyDarkTheme, setCrazyDarkTheme,
setCrazySystemTheme, setCrazySystemTheme,
}; };
}; };
// /////////////////////////////////////////////////////////////////////////// // ///////////////////////////////////////////////////////////////////////////
// Theme Toggle Button Component (Sun/Moon Style) // Theme Toggle Button Component (Sun/Moon Style)
export const ThemeToggleButton = ({ export const ThemeToggleButton = ({
className = "", className = "",
variant = "circle", variant = "circle",
start = "center", start = "center",
blur = false, blur = false,
gifUrl = "", gifUrl = "",
}: { }: {
className?: string; className?: string;
variant?: AnimationVariant; variant?: AnimationVariant;
start?: AnimationStart; start?: AnimationStart;
blur?: boolean; blur?: boolean;
gifUrl?: string; gifUrl?: string;
}) => { }) => {
const { isDark, toggleTheme } = useThemeToggle({ const { isDark, toggleTheme } = useThemeToggle({
variant, variant,
start, start,
blur, blur,
gifUrl, gifUrl,
}); });
const clipId = useId(); const clipId = useId();
const clipPathId = `theme-toggle-clip-${clipId}`; const clipPathId = `theme-toggle-clip-${clipId}`;
return ( return (
<button <button
type="button" type="button"
className={cn( className={cn(
"size-10 cursor-pointer rounded-full p-2 transition-all duration-300 active:scale-95 bg-transparent", "size-10 cursor-pointer rounded-full p-2 transition-all duration-300 active:scale-95 bg-transparent",
isDark ? "text-white" : "text-black", isDark ? "text-white" : "text-black",
className, className
)} )}
onClick={toggleTheme} onClick={toggleTheme}
aria-label="Toggle theme" aria-label="Toggle theme"
> >
<span className="sr-only">Toggle theme</span> <span className="sr-only">Toggle theme</span>
<svg <svg
xmlns="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg"
aria-hidden="true" aria-hidden="true"
fill="currentColor" fill="currentColor"
strokeLinecap="round" strokeLinecap="round"
viewBox="0 0 32 32" viewBox="0 0 32 32"
> >
<clipPath id={clipPathId}> <clipPath id={clipPathId}>
<motion.path <motion.path
animate={{ y: isDark ? 10 : 0, x: isDark ? -12 : 0 }} animate={{ y: isDark ? 10 : 0, x: isDark ? -12 : 0 }}
transition={{ ease: "easeInOut", duration: 0.35 }} transition={{ ease: "easeInOut", duration: 0.35 }}
d="M0-5h30a1 1 0 0 0 9 13v24H0Z" d="M0-5h30a1 1 0 0 0 9 13v24H0Z"
/> />
</clipPath> </clipPath>
<g clipPath={`url(#${clipPathId})`}> <g clipPath={`url(#${clipPathId})`}>
<motion.circle <motion.circle
animate={{ r: isDark ? 10 : 8 }} animate={{ r: isDark ? 10 : 8 }}
transition={{ ease: "easeInOut", duration: 0.35 }} transition={{ ease: "easeInOut", duration: 0.35 }}
cx="16" cx="16"
cy="16" cy="16"
/> />
<motion.g <motion.g
animate={{ animate={{
rotate: isDark ? -100 : 0, rotate: isDark ? -100 : 0,
scale: isDark ? 0.5 : 1, scale: isDark ? 0.5 : 1,
opacity: isDark ? 0 : 1, opacity: isDark ? 0 : 1,
}} }}
transition={{ ease: "easeInOut", duration: 0.35 }} transition={{ ease: "easeInOut", duration: 0.35 }}
stroke="currentColor" stroke="currentColor"
strokeWidth="1.5" strokeWidth="1.5"
> >
<path d="M16 5.5v-4" /> <path d="M16 5.5v-4" />
<path d="M16 30.5v-4" /> <path d="M16 30.5v-4" />
<path d="M1.5 16h4" /> <path d="M1.5 16h4" />
<path d="M26.5 16h4" /> <path d="M26.5 16h4" />
<path d="m23.4 8.6 2.8-2.8" /> <path d="m23.4 8.6 2.8-2.8" />
<path d="m5.7 26.3 2.9-2.9" /> <path d="m5.7 26.3 2.9-2.9" />
<path d="m5.8 5.8 2.8 2.8" /> <path d="m5.8 5.8 2.8 2.8" />
<path d="m23.4 23.4 2.9 2.9" /> <path d="m23.4 23.4 2.9 2.9" />
</motion.g> </motion.g>
</g> </g>
</svg> </svg>
</button> </button>
); );
}; };
// /////////////////////////////////////////////////////////////////////////// // ///////////////////////////////////////////////////////////////////////////
// Backwards compatible export (alias for ThemeToggleButton with default settings) // Backwards compatible export (alias for ThemeToggleButton with default settings)
export function ThemeTogglerComponent() { export function ThemeTogglerComponent() {
return ( return <ThemeToggleButton variant="circle" start="top-right" className="size-8" />;
<ThemeToggleButton
variant="circle"
start="top-right"
className="size-8"
/>
);
} }
/** /**

View file

@ -144,7 +144,7 @@ export function useDocuments(
(doc: DocumentElectric): DocumentDisplay => ({ (doc: DocumentElectric): DocumentDisplay => ({
...doc, ...doc,
created_by_name: doc.created_by_id created_by_name: doc.created_by_id
? userCacheRef.current.get(doc.created_by_id) ?? null ? (userCacheRef.current.get(doc.created_by_id) ?? null)
: null, : null,
status: doc.status ?? { state: "ready" }, status: doc.status ?? { state: "ready" },
}), }),
@ -232,7 +232,15 @@ export function useDocuments(
const handle = await client.syncShape({ const handle = await client.syncShape({
table: "documents", table: "documents",
where: `search_space_id = ${spaceId}`, where: `search_space_id = ${spaceId}`,
columns: ["id", "document_type", "search_space_id", "title", "created_by_id", "created_at", "status"], columns: [
"id",
"document_type",
"search_space_id",
"title",
"created_by_id",
"created_at",
"status",
],
primaryKey: ["id"], primaryKey: ["id"],
}); });
@ -258,7 +266,10 @@ export function useDocuments(
// Set up live query // Set up live query
const db = client.db as { const db = client.db as {
live?: { live?: {
query: <T>(sql: string, params?: (number | string)[]) => Promise<{ query: <T>(
sql: string,
params?: (number | string)[]
) => Promise<{
subscribe: (cb: (result: { rows: T[] }) => void) => void; subscribe: (cb: (result: { rows: T[] }) => void) => void;
unsubscribe?: () => void; unsubscribe?: () => void;
}>; }>;
@ -297,8 +308,7 @@ export function useDocuments(
if (!mounted || !result.rows) return; if (!mounted || !result.rows) return;
// DEBUG: Log first few raw documents to see what's coming from Electric // DEBUG: Log first few raw documents to see what's coming from Electric
console.log("[useDocuments] Raw data sample:", result.rows.slice(0, 3)); console.log("[useDocuments] Raw data sample:", result.rows.slice(0, 3));
const validItems = result.rows.filter(isValidDocument); const validItems = result.rows.filter(isValidDocument);
const isFullySynced = syncHandleRef.current?.isUpToDate ?? false; const isFullySynced = syncHandleRef.current?.isUpToDate ?? false;
@ -309,8 +319,9 @@ export function useDocuments(
// Fetch user names for new users (non-blocking) // Fetch user names for new users (non-blocking)
const unknownUserIds = validItems const unknownUserIds = validItems
.filter((doc): doc is DocumentElectric & { created_by_id: string } => .filter(
doc.created_by_id !== null && !userCacheRef.current.has(doc.created_by_id) (doc): doc is DocumentElectric & { created_by_id: string } =>
doc.created_by_id !== null && !userCacheRef.current.has(doc.created_by_id)
) )
.map((doc) => doc.created_by_id); .map((doc) => doc.created_by_id);
@ -326,7 +337,7 @@ export function useDocuments(
prev.map((doc) => ({ prev.map((doc) => ({
...doc, ...doc,
created_by_name: doc.created_by_id created_by_name: doc.created_by_id
? userCacheRef.current.get(doc.created_by_id) ?? null ? (userCacheRef.current.get(doc.created_by_id) ?? null)
: null, : null,
})) }))
); );
@ -358,7 +369,9 @@ export function useDocuments(
// Case 2: Electric is fully synced - TRUST IT COMPLETELY (handles bulk deletes) // Case 2: Electric is fully synced - TRUST IT COMPLETELY (handles bulk deletes)
if (isFullySynced) { if (isFullySynced) {
const liveDocs = deduplicateAndSort(validItems.map(electricToDisplayDoc)); const liveDocs = deduplicateAndSort(validItems.map(electricToDisplayDoc));
console.log(`[useDocuments] Synced update: ${liveDocs.length} docs (was ${prev.length})`); console.log(
`[useDocuments] Synced update: ${liveDocs.length} docs (was ${prev.length})`
);
return liveDocs; return liveDocs;
} }

View file

@ -444,9 +444,9 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
// in use-inbox.ts generating different sync keys on each render. // in use-inbox.ts generating different sync keys on each render.
// That's now fixed (rounded to midnight UTC in getSyncCutoffDate). // That's now fixed (rounded to midnight UTC in getSyncCutoffDate).
// We can safely use shapeKey for fast incremental sync. // We can safely use shapeKey for fast incremental sync.
const shapeKey = `${userId}_v${SYNC_VERSION}_${table}_${where?.replace(/[^a-zA-Z0-9]/g, "_") || "all"}`; const shapeKey = `${userId}_v${SYNC_VERSION}_${table}_${where?.replace(/[^a-zA-Z0-9]/g, "_") || "all"}`;
// Type assertion to PGlite with electric extension // Type assertion to PGlite with electric extension
const pgWithElectric = db as unknown as { const pgWithElectric = db as unknown as {
electric: { electric: {
@ -495,9 +495,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
// Parse the WHERE clause to build a DELETE statement // Parse the WHERE clause to build a DELETE statement
// The WHERE clause is already validated and formatted // The WHERE clause is already validated and formatted
await tx.exec(`DELETE FROM ${table} WHERE ${validatedWhere}`); await tx.exec(`DELETE FROM ${table} WHERE ${validatedWhere}`);
debugLog( debugLog(`[Electric] 🗑️ Cleared ${table} rows matching: ${validatedWhere}`);
`[Electric] 🗑️ Cleared ${table} rows matching: ${validatedWhere}`
);
} else { } else {
// No WHERE clause means we're syncing the entire table // No WHERE clause means we're syncing the entire table
await tx.exec(`DELETE FROM ${table}`); await tx.exec(`DELETE FROM ${table}`);
@ -514,10 +512,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
}, },
}; };
debugLog( debugLog("[Electric] syncShapeToTable config:", JSON.stringify(shapeConfig, null, 2));
"[Electric] syncShapeToTable config:",
JSON.stringify(shapeConfig, null, 2)
);
let shape: { unsubscribe: () => void; isUpToDate: boolean; stream: unknown }; let shape: { unsubscribe: () => void; isUpToDate: boolean; stream: unknown };
try { try {
@ -550,9 +545,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
retryError instanceof Error ? retryError.message : String(retryError); retryError instanceof Error ? retryError.message : String(retryError);
if (retryMessage.includes("Already syncing")) { if (retryMessage.includes("Already syncing")) {
// Still syncing - create a placeholder handle that indicates the table is being synced // Still syncing - create a placeholder handle that indicates the table is being synced
debugWarn( debugWarn(`[Electric] ${table} still syncing, creating placeholder handle`);
`[Electric] ${table} still syncing, creating placeholder handle`
);
const placeholderHandle: SyncHandle = { const placeholderHandle: SyncHandle = {
unsubscribe: () => { unsubscribe: () => {
debugLog(`[Electric] Placeholder unsubscribe for: ${cacheKey}`); debugLog(`[Electric] Placeholder unsubscribe for: ${cacheKey}`);
@ -656,9 +649,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
// Also check stream's isUpToDate property immediately // Also check stream's isUpToDate property immediately
if (stream?.isUpToDate) { if (stream?.isUpToDate) {
debugLog( debugLog(`[Electric] ✅ Stream isUpToDate is true immediately for ${table}`);
`[Electric] ✅ Stream isUpToDate is true immediately for ${table}`
);
resolveInitialSync(); resolveInitialSync();
} }
} }
@ -671,9 +662,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
} }
if (shape.isUpToDate || stream?.isUpToDate) { if (shape.isUpToDate || stream?.isUpToDate) {
debugLog( debugLog(`[Electric] ✅ Sync completed (detected via polling) for ${table}`);
`[Electric] ✅ Sync completed (detected via polling) for ${table}`
);
clearInterval(pollInterval); clearInterval(pollInterval);
resolveInitialSync(); resolveInitialSync();
} }