chore: ran linting

This commit is contained in:
Anish Sarkar 2026-02-06 05:35:15 +05:30
parent 00a617ef17
commit aa66928154
44 changed files with 2025 additions and 1658 deletions

View file

@ -13,8 +13,6 @@ Changes:
from collections.abc import Sequence from collections.abc import Sequence
import sqlalchemy as sa
from alembic import op from alembic import op
# revision identifiers, used by Alembic. # revision identifiers, used by Alembic.
@ -77,4 +75,3 @@ def downgrade() -> None:
END$$; END$$;
""" """
) )

View file

@ -285,24 +285,28 @@ async def _analyze_gmail_messages_phase1(
if existing_document: if existing_document:
if existing_document.content_hash == content_hash: if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status, DocumentStatus.READY
):
existing_document.status = DocumentStatus.ready() existing_document.status = DocumentStatus.ready()
documents_skipped += 1 documents_skipped += 1
continue continue
# Queue existing document for update (will be set to processing in Phase 2) # Queue existing document for update (will be set to processing in Phase 2)
messages_to_process.append({ messages_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'markdown_content': markdown_content, "is_new": False,
'content_hash': content_hash, "markdown_content": markdown_content,
'message_id': message_id, "content_hash": content_hash,
'thread_id': thread_id, "message_id": message_id,
'subject': subject, "thread_id": thread_id,
'sender': sender, "subject": subject,
'date_str': date_str, "sender": sender,
'label_ids': label_ids, "date_str": date_str,
}) "label_ids": label_ids,
}
)
continue continue
# Document doesn't exist by unique_identifier_hash # Document doesn't exist by unique_identifier_hash
@ -350,18 +354,20 @@ async def _analyze_gmail_messages_phase1(
) )
session.add(document) session.add(document)
messages_to_process.append({ messages_to_process.append(
'document': document, {
'is_new': True, "document": document,
'markdown_content': markdown_content, "is_new": True,
'content_hash': content_hash, "markdown_content": markdown_content,
'message_id': message_id, "content_hash": content_hash,
'thread_id': thread_id, "message_id": message_id,
'subject': subject, "thread_id": thread_id,
'sender': sender, "subject": subject,
'date_str': date_str, "sender": sender,
'label_ids': label_ids, "date_str": date_str,
}) "label_ids": label_ids,
}
)
except Exception as e: except Exception as e:
logger.error(f"Error in Phase 1 for message: {e!s}", exc_info=True) logger.error(f"Error in Phase 1 for message: {e!s}", exc_info=True)
@ -398,7 +404,7 @@ async def _process_gmail_messages_phase2(
await on_heartbeat_callback(documents_indexed) await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
@ -411,37 +417,35 @@ async def _process_gmail_messages_phase2(
if user_llm: if user_llm:
document_metadata_for_summary = { document_metadata_for_summary = {
"message_id": item['message_id'], "message_id": item["message_id"],
"thread_id": item['thread_id'], "thread_id": item["thread_id"],
"subject": item['subject'], "subject": item["subject"],
"sender": item['sender'], "sender": item["sender"],
"document_type": "Gmail Message (Composio)", "document_type": "Gmail Message (Composio)",
} }
summary_content, summary_embedding = await generate_document_summary( summary_content, summary_embedding = await generate_document_summary(
item['markdown_content'], user_llm, document_metadata_for_summary item["markdown_content"], user_llm, document_metadata_for_summary
) )
else: else:
summary_content = ( summary_content = f"Gmail: {item['subject']}\n\nFrom: {item['sender']}\nDate: {item['date_str']}"
f"Gmail: {item['subject']}\n\nFrom: {item['sender']}\nDate: {item['date_str']}"
)
summary_embedding = config.embedding_model_instance.embed( summary_embedding = config.embedding_model_instance.embed(
summary_content summary_content
) )
chunks = await create_document_chunks(item['markdown_content']) chunks = await create_document_chunks(item["markdown_content"])
# Update document to READY with actual content # Update document to READY with actual content
document.title = item['subject'] document.title = item["subject"]
document.content = summary_content document.content = summary_content
document.content_hash = item['content_hash'] document.content_hash = item["content_hash"]
document.embedding = summary_embedding document.embedding = summary_embedding
document.document_metadata = { document.document_metadata = {
"message_id": item['message_id'], "message_id": item["message_id"],
"thread_id": item['thread_id'], "thread_id": item["thread_id"],
"subject": item['subject'], "subject": item["subject"],
"sender": item['sender'], "sender": item["sender"],
"date": item['date_str'], "date": item["date_str"],
"labels": item['label_ids'], "labels": item["label_ids"],
"connector_id": connector_id, "connector_id": connector_id,
"source": "composio", "source": "composio",
} }
@ -465,7 +469,9 @@ async def _process_gmail_messages_phase2(
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
documents_failed += 1 documents_failed += 1
continue continue
@ -571,7 +577,9 @@ async def index_composio_gmail(
) )
all_messages.extend(messages) all_messages.extend(messages)
logger.info(f"Fetched {len(messages)} messages (total: {len(all_messages)})") logger.info(
f"Fetched {len(messages)} messages (total: {len(all_messages)})"
)
if not next_token or len(messages) < current_batch_size: if not next_token or len(messages) < current_batch_size:
break break
@ -616,7 +624,7 @@ async def index_composio_gmail(
) )
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
new_documents_count = len([m for m in messages_to_process if m['is_new']]) new_documents_count = len([m for m in messages_to_process if m["is_new"]])
if new_documents_count > 0: if new_documents_count > 0:
logger.info(f"Phase 1: Committing {new_documents_count} pending documents") logger.info(f"Phase 1: Committing {new_documents_count} pending documents")
await session.commit() await session.commit()
@ -645,9 +653,7 @@ async def index_composio_gmail(
await update_connector_last_indexed(session, connector, update_last_indexed) await update_connector_last_indexed(session, connector, update_last_indexed)
# Final commit to ensure all documents are persisted # Final commit to ensure all documents are persisted
logger.info( logger.info(f"Final commit: Total {documents_indexed} Gmail messages processed")
f"Final commit: Total {documents_indexed} Gmail messages processed"
)
try: try:
await session.commit() await session.commit()
logger.info( logger.info(

View file

@ -268,7 +268,9 @@ async def index_composio_google_calendar(
documents_indexed = 0 documents_indexed = 0
documents_skipped = 0 documents_skipped = 0
documents_failed = 0 # Track events that failed processing documents_failed = 0 # Track events that failed processing
duplicate_content_count = 0 # Track events skipped due to duplicate content_hash duplicate_content_count = (
0 # Track events skipped due to duplicate content_hash
)
last_heartbeat_time = time.time() last_heartbeat_time = time.time()
# ======================================================================= # =======================================================================
@ -317,23 +319,27 @@ async def index_composio_google_calendar(
if existing_document: if existing_document:
if existing_document.content_hash == content_hash: if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status, DocumentStatus.READY
):
existing_document.status = DocumentStatus.ready() existing_document.status = DocumentStatus.ready()
documents_skipped += 1 documents_skipped += 1
continue continue
# Queue existing document for update (will be set to processing in Phase 2) # Queue existing document for update (will be set to processing in Phase 2)
events_to_process.append({ events_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'markdown_content': markdown_content, "is_new": False,
'content_hash': content_hash, "markdown_content": markdown_content,
'event_id': event_id, "content_hash": content_hash,
'summary': summary, "event_id": event_id,
'start_time': start_time, "summary": summary,
'end_time': end_time, "start_time": start_time,
'location': location, "end_time": end_time,
}) "location": location,
}
)
continue continue
# Document doesn't exist by unique_identifier_hash # Document doesn't exist by unique_identifier_hash
@ -383,17 +389,19 @@ async def index_composio_google_calendar(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
events_to_process.append({ events_to_process.append(
'document': document, {
'is_new': True, "document": document,
'markdown_content': markdown_content, "is_new": True,
'content_hash': content_hash, "markdown_content": markdown_content,
'event_id': event_id, "content_hash": content_hash,
'summary': summary, "event_id": event_id,
'start_time': start_time, "summary": summary,
'end_time': end_time, "start_time": start_time,
'location': location, "end_time": end_time,
}) "location": location,
}
)
except Exception as e: except Exception as e:
logger.error(f"Error in Phase 1 for event: {e!s}", exc_info=True) logger.error(f"Error in Phase 1 for event: {e!s}", exc_info=True)
@ -402,7 +410,9 @@ async def index_composio_google_calendar(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([e for e in events_to_process if e['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([e for e in events_to_process if e['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -419,7 +429,7 @@ async def index_composio_google_calendar(
await on_heartbeat_callback(documents_indexed) await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
@ -432,35 +442,40 @@ async def index_composio_google_calendar(
if user_llm: if user_llm:
document_metadata_for_summary = { document_metadata_for_summary = {
"event_id": item['event_id'], "event_id": item["event_id"],
"summary": item['summary'], "summary": item["summary"],
"start_time": item['start_time'], "start_time": item["start_time"],
"document_type": "Google Calendar Event (Composio)", "document_type": "Google Calendar Event (Composio)",
} }
summary_content, summary_embedding = await generate_document_summary( (
item['markdown_content'], user_llm, document_metadata_for_summary summary_content,
summary_embedding,
) = await generate_document_summary(
item["markdown_content"],
user_llm,
document_metadata_for_summary,
) )
else: else:
summary_content = f"Calendar: {item['summary']}\n\nStart: {item['start_time']}\nEnd: {item['end_time']}" summary_content = f"Calendar: {item['summary']}\n\nStart: {item['start_time']}\nEnd: {item['end_time']}"
if item['location']: if item["location"]:
summary_content += f"\nLocation: {item['location']}" summary_content += f"\nLocation: {item['location']}"
summary_embedding = config.embedding_model_instance.embed( summary_embedding = config.embedding_model_instance.embed(
summary_content summary_content
) )
chunks = await create_document_chunks(item['markdown_content']) chunks = await create_document_chunks(item["markdown_content"])
# Update document to READY with actual content # Update document to READY with actual content
document.title = item['summary'] document.title = item["summary"]
document.content = summary_content document.content = summary_content
document.content_hash = item['content_hash'] document.content_hash = item["content_hash"]
document.embedding = summary_embedding document.embedding = summary_embedding
document.document_metadata = { document.document_metadata = {
"event_id": item['event_id'], "event_id": item["event_id"],
"summary": item['summary'], "summary": item["summary"],
"start_time": item['start_time'], "start_time": item["start_time"],
"end_time": item['end_time'], "end_time": item["end_time"],
"location": item['location'], "location": item["location"],
"connector_id": connector_id, "connector_id": connector_id,
"source": "composio", "source": "composio",
} }
@ -484,7 +499,9 @@ async def index_composio_google_calendar(
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
documents_failed += 1 documents_failed += 1
continue continue

View file

@ -938,13 +938,15 @@ async def _index_composio_drive_delta_sync(
if existing_document: if existing_document:
# Queue existing document for update # Queue existing document for update
files_to_process.append({ files_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'file_id': file_id, "is_new": False,
'file_name': file_name, "file_id": file_id,
'mime_type': mime_type, "file_name": file_name,
}) "mime_type": mime_type,
}
)
continue continue
# Create new document with PENDING status # Create new document with PENDING status
@ -974,13 +976,15 @@ async def _index_composio_drive_delta_sync(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
files_to_process.append({ files_to_process.append(
'document': document, {
'is_new': True, "document": document,
'file_id': file_id, "is_new": True,
'file_name': file_name, "file_id": file_id,
'mime_type': mime_type, "file_name": file_name,
}) "mime_type": mime_type,
}
)
except Exception as e: except Exception as e:
logger.error(f"Error in Phase 1 for change: {e!s}", exc_info=True) logger.error(f"Error in Phase 1 for change: {e!s}", exc_info=True)
@ -989,7 +993,9 @@ async def _index_composio_drive_delta_sync(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([f for f in files_to_process if f['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([f for f in files_to_process if f['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -1005,7 +1011,7 @@ async def _index_composio_drive_delta_sync(
await on_heartbeat_callback(documents_indexed) await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit # Set to PROCESSING and commit
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
@ -1013,11 +1019,13 @@ async def _index_composio_drive_delta_sync(
# Get file content # Get file content
content, content_error = await composio_connector.get_drive_file_content( content, content_error = await composio_connector.get_drive_file_content(
item['file_id'], original_mime_type=item['mime_type'] item["file_id"], original_mime_type=item["mime_type"]
) )
if content_error or not content: if content_error or not content:
logger.warning(f"Could not get content for file {item['file_name']}: {content_error}") logger.warning(
f"Could not get content for file {item['file_name']}: {content_error}"
)
markdown_content = f"# {item['file_name']}\n\n" markdown_content = f"# {item['file_name']}\n\n"
markdown_content += f"**File ID:** {item['file_id']}\n" markdown_content += f"**File ID:** {item['file_id']}\n"
markdown_content += f"**Type:** {item['mime_type']}\n" markdown_content += f"**Type:** {item['mime_type']}\n"
@ -1031,9 +1039,9 @@ async def _index_composio_drive_delta_sync(
else: else:
markdown_content = await _process_file_content( markdown_content = await _process_file_content(
content=content, content=content,
file_name=item['file_name'], file_name=item["file_name"],
file_id=item['file_id'], file_id=item["file_id"],
mime_type=item['mime_type'], mime_type=item["mime_type"],
search_space_id=search_space_id, search_space_id=search_space_id,
user_id=user_id, user_id=user_id,
session=session, session=session,
@ -1045,14 +1053,14 @@ async def _index_composio_drive_delta_sync(
content_hash = generate_content_hash(markdown_content, search_space_id) content_hash = generate_content_hash(markdown_content, search_space_id)
# For existing documents, check if content changed # For existing documents, check if content changed
if not item['is_new'] and document.content_hash == content_hash: if not item["is_new"] and document.content_hash == content_hash:
if not DocumentStatus.is_state(document.status, DocumentStatus.READY): if not DocumentStatus.is_state(document.status, DocumentStatus.READY):
document.status = DocumentStatus.ready() document.status = DocumentStatus.ready()
documents_skipped += 1 documents_skipped += 1
continue continue
# Check for duplicate content hash (for new documents) # Check for duplicate content hash (for new documents)
if item['is_new']: if item["is_new"]:
with session.no_autoflush: with session.no_autoflush:
duplicate_by_content = await check_duplicate_document_by_hash( duplicate_by_content = await check_duplicate_document_by_hash(
session, content_hash session, content_hash
@ -1067,13 +1075,15 @@ async def _index_composio_drive_delta_sync(
continue continue
# Heavy processing (LLM, embeddings, chunks) # Heavy processing (LLM, embeddings, chunks)
user_llm = await get_user_long_context_llm(session, user_id, search_space_id) user_llm = await get_user_long_context_llm(
session, user_id, search_space_id
)
if user_llm: if user_llm:
document_metadata_for_summary = { document_metadata_for_summary = {
"file_id": item['file_id'], "file_id": item["file_id"],
"file_name": item['file_name'], "file_name": item["file_name"],
"mime_type": item['mime_type'], "mime_type": item["mime_type"],
"document_type": "Google Drive File (Composio)", "document_type": "Google Drive File (Composio)",
} }
summary_content, summary_embedding = await generate_document_summary( summary_content, summary_embedding = await generate_document_summary(
@ -1081,20 +1091,22 @@ async def _index_composio_drive_delta_sync(
) )
else: else:
summary_content = f"Google Drive File: {item['file_name']}\n\nType: {item['mime_type']}" summary_content = f"Google Drive File: {item['file_name']}\n\nType: {item['mime_type']}"
summary_embedding = config.embedding_model_instance.embed(summary_content) summary_embedding = config.embedding_model_instance.embed(
summary_content
)
chunks = await create_document_chunks(markdown_content) chunks = await create_document_chunks(markdown_content)
# Update document to READY # Update document to READY
document.title = item['file_name'] document.title = item["file_name"]
document.content = summary_content document.content = summary_content
document.content_hash = content_hash document.content_hash = content_hash
document.embedding = summary_embedding document.embedding = summary_embedding
document.document_metadata = { document.document_metadata = {
"file_id": item['file_id'], "file_id": item["file_id"],
"file_name": item['file_name'], "file_name": item["file_name"],
"FILE_NAME": item['file_name'], "FILE_NAME": item["file_name"],
"mime_type": item['mime_type'], "mime_type": item["mime_type"],
"connector_id": connector_id, "connector_id": connector_id,
"source": "composio", "source": "composio",
} }
@ -1117,7 +1129,9 @@ async def _index_composio_drive_delta_sync(
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
documents_failed += 1 documents_failed += 1
continue continue
@ -1329,13 +1343,15 @@ async def _index_composio_drive_full_scan(
if existing_document: if existing_document:
# Queue existing document for update (will be set to processing in Phase 2) # Queue existing document for update (will be set to processing in Phase 2)
files_to_process.append({ files_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'file_id': file_id, "is_new": False,
'file_name': file_name, "file_id": file_id,
'mime_type': mime_type, "file_name": file_name,
}) "mime_type": mime_type,
}
)
continue continue
# Create new document with PENDING status (visible in UI immediately) # Create new document with PENDING status (visible in UI immediately)
@ -1365,13 +1381,15 @@ async def _index_composio_drive_full_scan(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
files_to_process.append({ files_to_process.append(
'document': document, {
'is_new': True, "document": document,
'file_id': file_id, "is_new": True,
'file_name': file_name, "file_id": file_id,
'mime_type': mime_type, "file_name": file_name,
}) "mime_type": mime_type,
}
)
except Exception as e: except Exception as e:
logger.error(f"Error in Phase 1 for file: {e!s}", exc_info=True) logger.error(f"Error in Phase 1 for file: {e!s}", exc_info=True)
@ -1380,7 +1398,9 @@ async def _index_composio_drive_full_scan(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([f for f in files_to_process if f['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([f for f in files_to_process if f['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -1397,7 +1417,7 @@ async def _index_composio_drive_full_scan(
await on_heartbeat_callback(documents_indexed) await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
@ -1405,11 +1425,13 @@ async def _index_composio_drive_full_scan(
# Get file content (pass mime_type for Google Workspace export handling) # Get file content (pass mime_type for Google Workspace export handling)
content, content_error = await composio_connector.get_drive_file_content( content, content_error = await composio_connector.get_drive_file_content(
item['file_id'], original_mime_type=item['mime_type'] item["file_id"], original_mime_type=item["mime_type"]
) )
if content_error or not content: if content_error or not content:
logger.warning(f"Could not get content for file {item['file_name']}: {content_error}") logger.warning(
f"Could not get content for file {item['file_name']}: {content_error}"
)
markdown_content = f"# {item['file_name']}\n\n" markdown_content = f"# {item['file_name']}\n\n"
markdown_content += f"**File ID:** {item['file_id']}\n" markdown_content += f"**File ID:** {item['file_id']}\n"
markdown_content += f"**Type:** {item['mime_type']}\n" markdown_content += f"**Type:** {item['mime_type']}\n"
@ -1424,9 +1446,9 @@ async def _index_composio_drive_full_scan(
# Process content based on file type # Process content based on file type
markdown_content = await _process_file_content( markdown_content = await _process_file_content(
content=content, content=content,
file_name=item['file_name'], file_name=item["file_name"],
file_id=item['file_id'], file_id=item["file_id"],
mime_type=item['mime_type'], mime_type=item["mime_type"],
search_space_id=search_space_id, search_space_id=search_space_id,
user_id=user_id, user_id=user_id,
session=session, session=session,
@ -1438,7 +1460,7 @@ async def _index_composio_drive_full_scan(
content_hash = generate_content_hash(markdown_content, search_space_id) content_hash = generate_content_hash(markdown_content, search_space_id)
# For existing documents, check if content changed # For existing documents, check if content changed
if not item['is_new'] and document.content_hash == content_hash: if not item["is_new"] and document.content_hash == content_hash:
# Ensure status is ready # Ensure status is ready
if not DocumentStatus.is_state(document.status, DocumentStatus.READY): if not DocumentStatus.is_state(document.status, DocumentStatus.READY):
document.status = DocumentStatus.ready() document.status = DocumentStatus.ready()
@ -1446,7 +1468,7 @@ async def _index_composio_drive_full_scan(
continue continue
# Check for duplicate content hash (for new documents) # Check for duplicate content hash (for new documents)
if item['is_new']: if item["is_new"]:
with session.no_autoflush: with session.no_autoflush:
duplicate_by_content = await check_duplicate_document_by_hash( duplicate_by_content = await check_duplicate_document_by_hash(
session, content_hash session, content_hash
@ -1462,13 +1484,15 @@ async def _index_composio_drive_full_scan(
continue continue
# Heavy processing (LLM, embeddings, chunks) # Heavy processing (LLM, embeddings, chunks)
user_llm = await get_user_long_context_llm(session, user_id, search_space_id) user_llm = await get_user_long_context_llm(
session, user_id, search_space_id
)
if user_llm: if user_llm:
document_metadata_for_summary = { document_metadata_for_summary = {
"file_id": item['file_id'], "file_id": item["file_id"],
"file_name": item['file_name'], "file_name": item["file_name"],
"mime_type": item['mime_type'], "mime_type": item["mime_type"],
"document_type": "Google Drive File (Composio)", "document_type": "Google Drive File (Composio)",
} }
summary_content, summary_embedding = await generate_document_summary( summary_content, summary_embedding = await generate_document_summary(
@ -1476,20 +1500,22 @@ async def _index_composio_drive_full_scan(
) )
else: else:
summary_content = f"Google Drive File: {item['file_name']}\n\nType: {item['mime_type']}" summary_content = f"Google Drive File: {item['file_name']}\n\nType: {item['mime_type']}"
summary_embedding = config.embedding_model_instance.embed(summary_content) summary_embedding = config.embedding_model_instance.embed(
summary_content
)
chunks = await create_document_chunks(markdown_content) chunks = await create_document_chunks(markdown_content)
# Update document to READY with actual content # Update document to READY with actual content
document.title = item['file_name'] document.title = item["file_name"]
document.content = summary_content document.content = summary_content
document.content_hash = content_hash document.content_hash = content_hash
document.embedding = summary_embedding document.embedding = summary_embedding
document.document_metadata = { document.document_metadata = {
"file_id": item['file_id'], "file_id": item["file_id"],
"file_name": item['file_name'], "file_name": item["file_name"],
"FILE_NAME": item['file_name'], "FILE_NAME": item["file_name"],
"mime_type": item['mime_type'], "mime_type": item["mime_type"],
"connector_id": connector_id, "connector_id": connector_id,
"source": "composio", "source": "composio",
} }
@ -1515,7 +1541,9 @@ async def _index_composio_drive_full_scan(
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
documents_failed += 1 documents_failed += 1
continue continue

View file

@ -147,7 +147,10 @@ class DocumentStatus:
reason: Human-readable failure reason reason: Human-readable failure reason
**extra_details: Optional additional details (duplicate_of, error_code, etc.) **extra_details: Optional additional details (duplicate_of, error_code, etc.)
""" """
status = {"state": DocumentStatus.FAILED, "reason": reason[:500]} # Truncate long reasons status = {
"state": DocumentStatus.FAILED,
"reason": reason[:500],
} # Truncate long reasons
if extra_details: if extra_details:
status.update(extra_details) status.update(extra_details)
return status return status
@ -866,7 +869,7 @@ class Document(BaseModel, TimestampMixin):
JSONB, JSONB,
nullable=False, nullable=False,
default=DocumentStatus.ready, default=DocumentStatus.ready,
server_default=text("'{\"state\": \"ready\"}'::jsonb"), server_default=text('\'{"state": "ready"}\'::jsonb'),
index=True, index=True,
) )

View file

@ -144,7 +144,9 @@ async def create_documents_file_upload(
raise HTTPException(status_code=400, detail="No files provided") raise HTTPException(status_code=400, detail="No files provided")
created_documents: list[Document] = [] created_documents: list[Document] = []
files_to_process: list[tuple[Document, str, str]] = [] # (document, temp_path, filename) files_to_process: list[
tuple[Document, str, str]
] = [] # (document, temp_path, filename)
skipped_duplicates = 0 skipped_duplicates = 0
# ===== PHASE 1: Create pending documents for all files ===== # ===== PHASE 1: Create pending documents for all files =====
@ -201,7 +203,9 @@ async def create_documents_file_upload(
) )
session.add(document) session.add(document)
created_documents.append(document) created_documents.append(document)
files_to_process.append((document, temp_path, file.filename or "unknown")) files_to_process.append(
(document, temp_path, file.filename or "unknown")
)
except Exception as e: except Exception as e:
raise HTTPException( raise HTTPException(
@ -351,7 +355,7 @@ async def read_documents(
# Parse status from JSONB # Parse status from JSONB
status_data = None status_data = None
if hasattr(doc, 'status') and doc.status: if hasattr(doc, "status") and doc.status:
status_data = DocumentStatusSchema( status_data = DocumentStatusSchema(
state=doc.status.get("state", "ready"), state=doc.status.get("state", "ready"),
reason=doc.status.get("reason"), reason=doc.status.get("reason"),
@ -506,7 +510,7 @@ async def search_documents(
# Parse status from JSONB # Parse status from JSONB
status_data = None status_data = None
if hasattr(doc, 'status') and doc.status: if hasattr(doc, "status") and doc.status:
status_data = DocumentStatusSchema( status_data = DocumentStatusSchema(
state=doc.status.get("state", "ready"), state=doc.status.get("state", "ready"),
reason=doc.status.get("reason"), reason=doc.status.get("reason"),

View file

@ -43,6 +43,7 @@ class DocumentUpdate(DocumentBase):
class DocumentStatusSchema(BaseModel): class DocumentStatusSchema(BaseModel):
"""Document processing status.""" """Document processing status."""
state: str # "ready", "processing", "failed" state: str # "ready", "processing", "failed"
reason: str | None = None reason: str | None = None
@ -59,8 +60,12 @@ class DocumentRead(BaseModel):
updated_at: datetime | None updated_at: datetime | None
search_space_id: int search_space_id: int
created_by_id: UUID | None = None # User who created/uploaded this document created_by_id: UUID | None = None # User who created/uploaded this document
created_by_name: str | None = None # Display name or email of the user who created this document created_by_name: str | None = (
status: DocumentStatusSchema | None = None # Processing status (ready, processing, failed) None # Display name or email of the user who created this document
)
status: DocumentStatusSchema | None = (
None # Processing status (ready, processing, failed)
)
model_config = ConfigDict(from_attributes=True) model_config = ConfigDict(from_attributes=True)

View file

@ -1465,11 +1465,7 @@ class ConnectorService:
issue_key = metadata.get("issue_key", "") issue_key = metadata.get("issue_key", "")
issue_title = metadata.get("issue_title", "Untitled Issue") issue_title = metadata.get("issue_title", "Untitled Issue")
status = metadata.get("status", "") status = metadata.get("status", "")
title = ( title = f"{issue_key} - {issue_title}" if issue_key else issue_title
f"{issue_key} - {issue_title}"
if issue_key
else issue_title
)
if status: if status:
title += f" ({status})" title += f" ({status})"
return title return title
@ -2387,11 +2383,7 @@ class ConnectorService:
def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str: def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
event_name = metadata.get("event_name", "Untitled Event") event_name = metadata.get("event_name", "Untitled Event")
start_time = metadata.get("start_time", "") start_time = metadata.get("start_time", "")
return ( return f"{event_name} ({start_time})" if start_time else event_name
f"{event_name} ({start_time})"
if start_time
else event_name
)
def _url_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str: def _url_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
return metadata.get("event_url", "") or "" return metadata.get("event_url", "") or ""

View file

@ -669,11 +669,15 @@ async def _process_file_with_document(
file_size = os.path.getsize(temp_path) file_size = os.path.getsize(temp_path)
logger.info(f"[_process_file_with_document] File size: {file_size} bytes") logger.info(f"[_process_file_with_document] File size: {file_size} bytes")
except Exception as e: except Exception as e:
logger.warning(f"[_process_file_with_document] Could not get file size: {e}") logger.warning(
f"[_process_file_with_document] Could not get file size: {e}"
)
file_size = None file_size = None
# Create notification for document processing # Create notification for document processing
logger.info(f"[_process_file_with_document] Creating notification for: {filename}") logger.info(
f"[_process_file_with_document] Creating notification for: {filename}"
)
notification = ( notification = (
await NotificationService.document_processing.notify_processing_started( await NotificationService.document_processing.notify_processing_started(
session=session, session=session,
@ -822,7 +826,9 @@ async def _process_file_with_document(
if os.path.exists(temp_path): if os.path.exists(temp_path):
try: try:
os.unlink(temp_path) os.unlink(temp_path)
logger.info(f"[_process_file_with_document] Cleaned up temp file: {temp_path}") logger.info(
f"[_process_file_with_document] Cleaned up temp file: {temp_path}"
)
except Exception as cleanup_error: except Exception as cleanup_error:
logger.warning( logger.warning(
f"[_process_file_with_document] Failed to clean up temp file: {cleanup_error}" f"[_process_file_with_document] Failed to clean up temp file: {cleanup_error}"

View file

@ -154,9 +154,7 @@ async def _cleanup_stale_notifications():
f"Found {len(stale_notification_ids)} stale connector indexing notifications " f"Found {len(stale_notification_ids)} stale connector indexing notifications "
f"(no Redis heartbeat key): {stale_notification_ids}" f"(no Redis heartbeat key): {stale_notification_ids}"
) )
logger.info( logger.info(f"Connector IDs for document cleanup: {stale_connector_ids}")
f"Connector IDs for document cleanup: {stale_connector_ids}"
)
# O(1) Batch UPDATE notifications using JSONB || operator # O(1) Batch UPDATE notifications using JSONB || operator
# This merges the update data into existing notification_metadata # This merges the update data into existing notification_metadata

View file

@ -140,7 +140,9 @@ async def index_airtable_records(
log_entry, success_msg, {"bases_count": 0} log_entry, success_msg, {"bases_count": 0}
) )
# CRITICAL: Update timestamp even when no bases found so Electric SQL syncs # CRITICAL: Update timestamp even when no bases found so Electric SQL syncs
await update_connector_last_indexed(session, connector, update_last_indexed) await update_connector_last_indexed(
session, connector, update_last_indexed
)
await session.commit() await session.commit()
return 0, None # Return None (not error) when no items found return 0, None # Return None (not error) when no items found
@ -277,22 +279,28 @@ async def index_airtable_records(
# Document exists - check if content has changed # Document exists - check if content has changed
if existing_document.content_hash == content_hash: if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status = DocumentStatus.ready() existing_document.status, DocumentStatus.READY
):
existing_document.status = (
DocumentStatus.ready()
)
documents_skipped += 1 documents_skipped += 1
continue continue
# Queue existing document for update (will be set to processing in Phase 2) # Queue existing document for update (will be set to processing in Phase 2)
records_to_process.append({ records_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'markdown_content': markdown_content, "is_new": False,
'content_hash': content_hash, "markdown_content": markdown_content,
'record_id': record_id, "content_hash": content_hash,
'record': record, "record_id": record_id,
'base_name': base_name, "record": record,
'table_name': table_name, "base_name": base_name,
}) "table_name": table_name,
}
)
continue continue
# Document doesn't exist by unique_identifier_hash # Document doesn't exist by unique_identifier_hash
@ -339,25 +347,31 @@ async def index_airtable_records(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
records_to_process.append({ records_to_process.append(
'document': document, {
'is_new': True, "document": document,
'markdown_content': markdown_content, "is_new": True,
'content_hash': content_hash, "markdown_content": markdown_content,
'record_id': record_id, "content_hash": content_hash,
'record': record, "record_id": record_id,
'base_name': base_name, "record": record,
'table_name': table_name, "base_name": base_name,
}) "table_name": table_name,
}
)
except Exception as e: except Exception as e:
logger.error(f"Error in Phase 1 for record: {e!s}", exc_info=True) logger.error(
f"Error in Phase 1 for record: {e!s}", exc_info=True
)
documents_failed += 1 documents_failed += 1
continue continue
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([r for r in records_to_process if r['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([r for r in records_to_process if r['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -374,7 +388,7 @@ async def index_airtable_records(
await on_heartbeat_callback(documents_indexed) await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
@ -387,13 +401,18 @@ async def index_airtable_records(
if user_llm: if user_llm:
document_metadata_for_summary = { document_metadata_for_summary = {
"record_id": item['record_id'], "record_id": item["record_id"],
"created_time": item['record'].get("CREATED_TIME()", ""), "created_time": item["record"].get("CREATED_TIME()", ""),
"document_type": "Airtable Record", "document_type": "Airtable Record",
"connector_type": "Airtable", "connector_type": "Airtable",
} }
summary_content, summary_embedding = await generate_document_summary( (
item['markdown_content'], user_llm, document_metadata_for_summary summary_content,
summary_embedding,
) = await generate_document_summary(
item["markdown_content"],
user_llm,
document_metadata_for_summary,
) )
else: else:
# Fallback to simple summary if no LLM configured # Fallback to simple summary if no LLM configured
@ -402,18 +421,18 @@ async def index_airtable_records(
summary_content summary_content
) )
chunks = await create_document_chunks(item['markdown_content']) chunks = await create_document_chunks(item["markdown_content"])
# Update document to READY with actual content # Update document to READY with actual content
document.title = item['record_id'] document.title = item["record_id"]
document.content = summary_content document.content = summary_content
document.content_hash = item['content_hash'] document.content_hash = item["content_hash"]
document.embedding = summary_embedding document.embedding = summary_embedding
document.document_metadata = { document.document_metadata = {
"record_id": item['record_id'], "record_id": item["record_id"],
"created_time": item['record'].get("CREATED_TIME()", ""), "created_time": item["record"].get("CREATED_TIME()", ""),
"base_name": item['base_name'], "base_name": item["base_name"],
"table_name": item['table_name'], "table_name": item["table_name"],
"connector_id": connector_id, "connector_id": connector_id,
} }
safe_set_chunks(document, chunks) safe_set_chunks(document, chunks)
@ -430,13 +449,17 @@ async def index_airtable_records(
await session.commit() await session.commit()
except Exception as e: except Exception as e:
logger.error(f"Error processing Airtable record: {e!s}", exc_info=True) logger.error(
f"Error processing Airtable record: {e!s}", exc_info=True
)
# Mark document as failed with reason (visible in UI) # Mark document as failed with reason (visible in UI)
try: try:
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
documents_failed += 1 documents_failed += 1
continue continue
@ -446,7 +469,9 @@ async def index_airtable_records(
total_processed = documents_indexed total_processed = documents_indexed
# Final commit to ensure all documents are persisted (safety net) # Final commit to ensure all documents are persisted (safety net)
logger.info(f"Final commit: Total {documents_indexed} Airtable records processed") logger.info(
f"Final commit: Total {documents_indexed} Airtable records processed"
)
try: try:
await session.commit() await session.commit()
logger.info( logger.info(

View file

@ -53,7 +53,8 @@ def safe_set_chunks(document: Document, chunks: list) -> None:
safe_set_chunks(document, chunks) # Always safe safe_set_chunks(document, chunks) # Always safe
""" """
from sqlalchemy.orm.attributes import set_committed_value from sqlalchemy.orm.attributes import set_committed_value
set_committed_value(document, 'chunks', chunks)
set_committed_value(document, "chunks", chunks)
async def check_duplicate_document_by_hash( async def check_duplicate_document_by_hash(

View file

@ -261,7 +261,9 @@ async def index_bookstack_pages(
# Document exists - check if content has changed # Document exists - check if content has changed
if existing_document.content_hash == content_hash: if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status, DocumentStatus.READY
):
existing_document.status = DocumentStatus.ready() existing_document.status = DocumentStatus.ready()
logger.info( logger.info(
f"Document for BookStack page {page_name} unchanged. Skipping." f"Document for BookStack page {page_name} unchanged. Skipping."
@ -270,20 +272,22 @@ async def index_bookstack_pages(
continue continue
# Queue existing document for update (will be set to processing in Phase 2) # Queue existing document for update (will be set to processing in Phase 2)
pages_to_process.append({ pages_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'page_id': page_id, "is_new": False,
'page_name': page_name, "page_id": page_id,
'page_slug': page_slug, "page_name": page_name,
'book_id': book_id, "page_slug": page_slug,
'book_slug': book_slug, "book_id": book_id,
'chapter_id': chapter_id, "book_slug": book_slug,
'page_url': page_url, "chapter_id": chapter_id,
'page_content': page_content, "page_url": page_url,
'full_content': full_content, "page_content": page_content,
'content_hash': content_hash, "full_content": full_content,
}) "content_hash": content_hash,
}
)
continue continue
# Document doesn't exist by unique_identifier_hash # Document doesn't exist by unique_identifier_hash
@ -331,20 +335,22 @@ async def index_bookstack_pages(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
pages_to_process.append({ pages_to_process.append(
'document': document, {
'is_new': True, "document": document,
'page_id': page_id, "is_new": True,
'page_name': page_name, "page_id": page_id,
'page_slug': page_slug, "page_name": page_name,
'book_id': book_id, "page_slug": page_slug,
'book_slug': book_slug, "book_id": book_id,
'chapter_id': chapter_id, "book_slug": book_slug,
'page_url': page_url, "chapter_id": chapter_id,
'page_content': page_content, "page_url": page_url,
'full_content': full_content, "page_content": page_content,
'content_hash': content_hash, "full_content": full_content,
}) "content_hash": content_hash,
}
)
except Exception as e: except Exception as e:
logger.error(f"Error in Phase 1 for page: {e!s}", exc_info=True) logger.error(f"Error in Phase 1 for page: {e!s}", exc_info=True)
@ -353,7 +359,9 @@ async def index_bookstack_pages(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([p for p in pages_to_process if p['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([p for p in pages_to_process if p['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -370,7 +378,7 @@ async def index_bookstack_pages(
await on_heartbeat_callback(documents_indexed) await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
@ -383,23 +391,23 @@ async def index_bookstack_pages(
# Build document metadata # Build document metadata
doc_metadata = { doc_metadata = {
"page_id": item['page_id'], "page_id": item["page_id"],
"page_name": item['page_name'], "page_name": item["page_name"],
"page_slug": item['page_slug'], "page_slug": item["page_slug"],
"book_id": item['book_id'], "book_id": item["book_id"],
"book_slug": item['book_slug'], "book_slug": item["book_slug"],
"chapter_id": item['chapter_id'], "chapter_id": item["chapter_id"],
"base_url": bookstack_base_url, "base_url": bookstack_base_url,
"page_url": item['page_url'], "page_url": item["page_url"],
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"connector_id": connector_id, "connector_id": connector_id,
} }
if user_llm: if user_llm:
summary_metadata = { summary_metadata = {
"page_name": item['page_name'], "page_name": item["page_name"],
"page_id": item['page_id'], "page_id": item["page_id"],
"book_id": item['book_id'], "book_id": item["book_id"],
"document_type": "BookStack Page", "document_type": "BookStack Page",
"connector_type": "BookStack", "connector_type": "BookStack",
} }
@ -407,17 +415,15 @@ async def index_bookstack_pages(
summary_content, summary_content,
summary_embedding, summary_embedding,
) = await generate_document_summary( ) = await generate_document_summary(
item['full_content'], user_llm, summary_metadata item["full_content"], user_llm, summary_metadata
) )
else: else:
# Fallback to simple summary if no LLM configured # Fallback to simple summary if no LLM configured
summary_content = ( summary_content = f"BookStack Page: {item['page_name']}\n\nBook ID: {item['book_id']}\n\n"
f"BookStack Page: {item['page_name']}\n\nBook ID: {item['book_id']}\n\n" if item["page_content"]:
)
if item['page_content']:
# Take first 1000 characters of content for summary # Take first 1000 characters of content for summary
content_preview = item['page_content'][:1000] content_preview = item["page_content"][:1000]
if len(item['page_content']) > 1000: if len(item["page_content"]) > 1000:
content_preview += "..." content_preview += "..."
summary_content += f"Content Preview: {content_preview}\n\n" summary_content += f"Content Preview: {content_preview}\n\n"
summary_embedding = config.embedding_model_instance.embed( summary_embedding = config.embedding_model_instance.embed(
@ -425,12 +431,12 @@ async def index_bookstack_pages(
) )
# Process chunks - using the full page content # Process chunks - using the full page content
chunks = await create_document_chunks(item['full_content']) chunks = await create_document_chunks(item["full_content"])
# Update document to READY with actual content # Update document to READY with actual content
document.title = item['page_name'] document.title = item["page_name"]
document.content = summary_content document.content = summary_content
document.content_hash = item['content_hash'] document.content_hash = item["content_hash"]
document.embedding = summary_embedding document.embedding = summary_embedding
document.document_metadata = doc_metadata document.document_metadata = doc_metadata
safe_set_chunks(document, chunks) safe_set_chunks(document, chunks)
@ -456,7 +462,9 @@ async def index_bookstack_pages(
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
skipped_pages.append( skipped_pages.append(
f"{item.get('page_name', 'Unknown')} (processing error)" f"{item.get('page_name', 'Unknown')} (processing error)"
) )
@ -473,7 +481,9 @@ async def index_bookstack_pages(
) )
try: try:
await session.commit() await session.commit()
logger.info("Successfully committed all BookStack document changes to database") logger.info(
"Successfully committed all BookStack document changes to database"
)
except Exception as e: except Exception as e:
# Handle any remaining integrity errors gracefully (race conditions, etc.) # Handle any remaining integrity errors gracefully (race conditions, etc.)
if ( if (

View file

@ -260,7 +260,9 @@ async def index_clickup_tasks(
# Document exists - check if content has changed # Document exists - check if content has changed
if existing_document.content_hash == content_hash: if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status, DocumentStatus.READY
):
existing_document.status = DocumentStatus.ready() existing_document.status = DocumentStatus.ready()
logger.info( logger.info(
f"Document for ClickUp task {task_name} unchanged. Skipping." f"Document for ClickUp task {task_name} unchanged. Skipping."
@ -272,22 +274,24 @@ async def index_clickup_tasks(
logger.info( logger.info(
f"Content changed for ClickUp task {task_name}. Queuing for update." f"Content changed for ClickUp task {task_name}. Queuing for update."
) )
tasks_to_process.append({ tasks_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'task_content': task_content, "is_new": False,
'content_hash': content_hash, "task_content": task_content,
'task_id': task_id, "content_hash": content_hash,
'task_name': task_name, "task_id": task_id,
'task_status': task_status, "task_name": task_name,
'task_priority': task_priority, "task_status": task_status,
'task_list_name': task_list_name, "task_priority": task_priority,
'task_space_name': task_space_name, "task_list_name": task_list_name,
'task_assignees': task_assignees, "task_space_name": task_space_name,
'task_due_date': task_due_date, "task_assignees": task_assignees,
'task_created': task_created, "task_due_date": task_due_date,
'task_updated': task_updated, "task_created": task_created,
}) "task_updated": task_updated,
}
)
continue continue
# Document doesn't exist by unique_identifier_hash # Document doesn't exist by unique_identifier_hash
@ -335,22 +339,24 @@ async def index_clickup_tasks(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
tasks_to_process.append({ tasks_to_process.append(
'document': document, {
'is_new': True, "document": document,
'task_content': task_content, "is_new": True,
'content_hash': content_hash, "task_content": task_content,
'task_id': task_id, "content_hash": content_hash,
'task_name': task_name, "task_id": task_id,
'task_status': task_status, "task_name": task_name,
'task_priority': task_priority, "task_status": task_status,
'task_list_name': task_list_name, "task_priority": task_priority,
'task_space_name': task_space_name, "task_list_name": task_list_name,
'task_assignees': task_assignees, "task_space_name": task_space_name,
'task_due_date': task_due_date, "task_assignees": task_assignees,
'task_created': task_created, "task_due_date": task_due_date,
'task_updated': task_updated, "task_created": task_created,
}) "task_updated": task_updated,
}
)
except Exception as e: except Exception as e:
logger.error( logger.error(
@ -362,7 +368,9 @@ async def index_clickup_tasks(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([t for t in tasks_to_process if t['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([t for t in tasks_to_process if t['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -379,7 +387,7 @@ async def index_clickup_tasks(
await on_heartbeat_callback(documents_indexed) await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
@ -392,13 +400,13 @@ async def index_clickup_tasks(
if user_llm: if user_llm:
document_metadata_for_summary = { document_metadata_for_summary = {
"task_id": item['task_id'], "task_id": item["task_id"],
"task_name": item['task_name'], "task_name": item["task_name"],
"task_status": item['task_status'], "task_status": item["task_status"],
"task_priority": item['task_priority'], "task_priority": item["task_priority"],
"task_list": item['task_list_name'], "task_list": item["task_list_name"],
"task_space": item['task_space_name'], "task_space": item["task_space_name"],
"assignees": len(item['task_assignees']), "assignees": len(item["task_assignees"]),
"document_type": "ClickUp Task", "document_type": "ClickUp Task",
"connector_type": "ClickUp", "connector_type": "ClickUp",
} }
@ -406,30 +414,30 @@ async def index_clickup_tasks(
summary_content, summary_content,
summary_embedding, summary_embedding,
) = await generate_document_summary( ) = await generate_document_summary(
item['task_content'], user_llm, document_metadata_for_summary item["task_content"], user_llm, document_metadata_for_summary
) )
else: else:
summary_content = item['task_content'] summary_content = item["task_content"]
summary_embedding = config.embedding_model_instance.embed( summary_embedding = config.embedding_model_instance.embed(
item['task_content'] item["task_content"]
) )
chunks = await create_document_chunks(item['task_content']) chunks = await create_document_chunks(item["task_content"])
# Update document to READY with actual content # Update document to READY with actual content
document.title = item['task_name'] document.title = item["task_name"]
document.content = summary_content document.content = summary_content
document.content_hash = item['content_hash'] document.content_hash = item["content_hash"]
document.embedding = summary_embedding document.embedding = summary_embedding
document.document_metadata = { document.document_metadata = {
"task_id": item['task_id'], "task_id": item["task_id"],
"task_name": item['task_name'], "task_name": item["task_name"],
"task_status": item['task_status'], "task_status": item["task_status"],
"task_priority": item['task_priority'], "task_priority": item["task_priority"],
"task_assignees": item['task_assignees'], "task_assignees": item["task_assignees"],
"task_due_date": item['task_due_date'], "task_due_date": item["task_due_date"],
"task_created": item['task_created'], "task_created": item["task_created"],
"task_updated": item['task_updated'], "task_updated": item["task_updated"],
"connector_id": connector_id, "connector_id": connector_id,
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
} }
@ -456,7 +464,9 @@ async def index_clickup_tasks(
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
documents_failed += 1 documents_failed += 1
continue continue

View file

@ -262,23 +262,27 @@ async def index_confluence_pages(
# Document exists - check if content has changed # Document exists - check if content has changed
if existing_document.content_hash == content_hash: if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status, DocumentStatus.READY
):
existing_document.status = DocumentStatus.ready() existing_document.status = DocumentStatus.ready()
documents_skipped += 1 documents_skipped += 1
continue continue
# Queue existing document for update (will be set to processing in Phase 2) # Queue existing document for update (will be set to processing in Phase 2)
pages_to_process.append({ pages_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'full_content': full_content, "is_new": False,
'page_content': page_content, "full_content": full_content,
'content_hash': content_hash, "page_content": page_content,
'page_id': page_id, "content_hash": content_hash,
'page_title': page_title, "page_id": page_id,
'space_id': space_id, "page_title": page_title,
'comment_count': comment_count, "space_id": space_id,
}) "comment_count": comment_count,
}
)
continue continue
# Document doesn't exist by unique_identifier_hash # Document doesn't exist by unique_identifier_hash
@ -323,17 +327,19 @@ async def index_confluence_pages(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
pages_to_process.append({ pages_to_process.append(
'document': document, {
'is_new': True, "document": document,
'full_content': full_content, "is_new": True,
'page_content': page_content, "full_content": full_content,
'content_hash': content_hash, "page_content": page_content,
'page_id': page_id, "content_hash": content_hash,
'page_title': page_title, "page_id": page_id,
'space_id': space_id, "page_title": page_title,
'comment_count': comment_count, "space_id": space_id,
}) "comment_count": comment_count,
}
)
except Exception as e: except Exception as e:
logger.error(f"Error in Phase 1 for page: {e!s}", exc_info=True) logger.error(f"Error in Phase 1 for page: {e!s}", exc_info=True)
@ -342,7 +348,9 @@ async def index_confluence_pages(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([p for p in pages_to_process if p['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([p for p in pages_to_process if p['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -359,7 +367,7 @@ async def index_confluence_pages(
await on_heartbeat_callback(documents_indexed) await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
@ -372,10 +380,10 @@ async def index_confluence_pages(
if user_llm: if user_llm:
document_metadata = { document_metadata = {
"page_title": item['page_title'], "page_title": item["page_title"],
"page_id": item['page_id'], "page_id": item["page_id"],
"space_id": item['space_id'], "space_id": item["space_id"],
"comment_count": item['comment_count'], "comment_count": item["comment_count"],
"document_type": "Confluence Page", "document_type": "Confluence Page",
"connector_type": "Confluence", "connector_type": "Confluence",
} }
@ -383,17 +391,15 @@ async def index_confluence_pages(
summary_content, summary_content,
summary_embedding, summary_embedding,
) = await generate_document_summary( ) = await generate_document_summary(
item['full_content'], user_llm, document_metadata item["full_content"], user_llm, document_metadata
) )
else: else:
# Fallback to simple summary if no LLM configured # Fallback to simple summary if no LLM configured
summary_content = ( summary_content = f"Confluence Page: {item['page_title']}\n\nSpace ID: {item['space_id']}\n\n"
f"Confluence Page: {item['page_title']}\n\nSpace ID: {item['space_id']}\n\n" if item["page_content"]:
)
if item['page_content']:
# Take first 1000 characters of content for summary # Take first 1000 characters of content for summary
content_preview = item['page_content'][:1000] content_preview = item["page_content"][:1000]
if len(item['page_content']) > 1000: if len(item["page_content"]) > 1000:
content_preview += "..." content_preview += "..."
summary_content += f"Content Preview: {content_preview}\n\n" summary_content += f"Content Preview: {content_preview}\n\n"
summary_content += f"Comments: {item['comment_count']}" summary_content += f"Comments: {item['comment_count']}"
@ -402,18 +408,18 @@ async def index_confluence_pages(
) )
# Process chunks - using the full page content with comments # Process chunks - using the full page content with comments
chunks = await create_document_chunks(item['full_content']) chunks = await create_document_chunks(item["full_content"])
# Update document to READY with actual content # Update document to READY with actual content
document.title = item['page_title'] document.title = item["page_title"]
document.content = summary_content document.content = summary_content
document.content_hash = item['content_hash'] document.content_hash = item["content_hash"]
document.embedding = summary_embedding document.embedding = summary_embedding
document.document_metadata = { document.document_metadata = {
"page_id": item['page_id'], "page_id": item["page_id"],
"page_title": item['page_title'], "page_title": item["page_title"],
"space_id": item['space_id'], "space_id": item["space_id"],
"comment_count": item['comment_count'], "comment_count": item["comment_count"],
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"connector_id": connector_id, "connector_id": connector_id,
} }
@ -440,7 +446,9 @@ async def index_confluence_pages(
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
documents_failed += 1 documents_failed += 1
continue # Skip this page and continue with others continue # Skip this page and continue with others

View file

@ -352,9 +352,7 @@ async def index_discord_messages(
try: try:
channels = await discord_client.get_text_channels(guild_id) channels = await discord_client.get_text_channels(guild_id)
if not channels: if not channels:
logger.info( logger.info(f"No channels found in guild {guild_name}. Skipping.")
f"No channels found in guild {guild_name}. Skipping."
)
skipped_channels.append(f"{guild_name} (no channels)") skipped_channels.append(f"{guild_name} (no channels)")
else: else:
for channel in channels: for channel in channels:
@ -456,25 +454,31 @@ async def index_discord_messages(
# Document exists - check if content has changed # Document exists - check if content has changed
if existing_document.content_hash == content_hash: if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status = DocumentStatus.ready() existing_document.status, DocumentStatus.READY
):
existing_document.status = (
DocumentStatus.ready()
)
documents_skipped += 1 documents_skipped += 1
continue continue
# Queue existing document for update (will be set to processing in Phase 2) # Queue existing document for update (will be set to processing in Phase 2)
messages_to_process.append({ messages_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'combined_document_string': combined_document_string, "is_new": False,
'content_hash': content_hash, "combined_document_string": combined_document_string,
'guild_name': guild_name, "content_hash": content_hash,
'guild_id': guild_id, "guild_name": guild_name,
'channel_name': channel_name, "guild_id": guild_id,
'channel_id': channel_id, "channel_name": channel_name,
'message_id': msg_id, "channel_id": channel_id,
'message_timestamp': msg_timestamp, "message_id": msg_id,
'message_user_name': msg_user_name, "message_timestamp": msg_timestamp,
}) "message_user_name": msg_user_name,
}
)
continue continue
# Document doesn't exist by unique_identifier_hash # Document doesn't exist by unique_identifier_hash
@ -522,19 +526,21 @@ async def index_discord_messages(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
messages_to_process.append({ messages_to_process.append(
'document': document, {
'is_new': True, "document": document,
'combined_document_string': combined_document_string, "is_new": True,
'content_hash': content_hash, "combined_document_string": combined_document_string,
'guild_name': guild_name, "content_hash": content_hash,
'guild_id': guild_id, "guild_name": guild_name,
'channel_name': channel_name, "guild_id": guild_id,
'channel_id': channel_id, "channel_name": channel_name,
'message_id': msg_id, "channel_id": channel_id,
'message_timestamp': msg_timestamp, "message_id": msg_id,
'message_user_name': msg_user_name, "message_timestamp": msg_timestamp,
}) "message_user_name": msg_user_name,
}
)
except Exception as e: except Exception as e:
logger.error( logger.error(
@ -547,7 +553,9 @@ async def index_discord_messages(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -564,31 +572,31 @@ async def index_discord_messages(
await on_heartbeat_callback(documents_indexed) await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
await session.commit() await session.commit()
# Heavy processing (embeddings, chunks) # Heavy processing (embeddings, chunks)
chunks = await create_document_chunks(item['combined_document_string']) chunks = await create_document_chunks(item["combined_document_string"])
doc_embedding = config.embedding_model_instance.embed( doc_embedding = config.embedding_model_instance.embed(
item['combined_document_string'] item["combined_document_string"]
) )
# Update document to READY with actual content # Update document to READY with actual content
document.title = f"{item['guild_name']}#{item['channel_name']}" document.title = f"{item['guild_name']}#{item['channel_name']}"
document.content = item['combined_document_string'] document.content = item["combined_document_string"]
document.content_hash = item['content_hash'] document.content_hash = item["content_hash"]
document.embedding = doc_embedding document.embedding = doc_embedding
document.document_metadata = { document.document_metadata = {
"guild_name": item['guild_name'], "guild_name": item["guild_name"],
"guild_id": item['guild_id'], "guild_id": item["guild_id"],
"channel_name": item['channel_name'], "channel_name": item["channel_name"],
"channel_id": item['channel_id'], "channel_id": item["channel_id"],
"message_id": item['message_id'], "message_id": item["message_id"],
"message_timestamp": item['message_timestamp'], "message_timestamp": item["message_timestamp"],
"message_user_name": item['message_user_name'], "message_user_name": item["message_user_name"],
"indexed_at": datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S"), "indexed_at": datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S"),
"connector_id": connector_id, "connector_id": connector_id,
} }
@ -612,7 +620,9 @@ async def index_discord_messages(
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
documents_failed += 1 documents_failed += 1
continue continue

View file

@ -253,7 +253,9 @@ async def index_elasticsearch_documents(
# If content is unchanged, skip. Otherwise queue for update. # If content is unchanged, skip. Otherwise queue for update.
if existing_doc.content_hash == content_hash: if existing_doc.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_doc.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_doc.status, DocumentStatus.READY
):
existing_doc.status = DocumentStatus.ready() existing_doc.status = DocumentStatus.ready()
logger.info( logger.info(
f"Skipping ES doc {doc_id} — already indexed (doc id {existing_doc.id})" f"Skipping ES doc {doc_id} — already indexed (doc id {existing_doc.id})"
@ -262,17 +264,19 @@ async def index_elasticsearch_documents(
continue continue
# Queue existing document for update (will be set to processing in Phase 2) # Queue existing document for update (will be set to processing in Phase 2)
docs_to_process.append({ docs_to_process.append(
'document': existing_doc, {
'is_new': False, "document": existing_doc,
'doc_id': doc_id, "is_new": False,
'title': title, "doc_id": doc_id,
'content': content, "title": title,
'content_hash': content_hash, "content": content,
'unique_identifier_hash': unique_identifier_hash, "content_hash": content_hash,
'hit': hit, "unique_identifier_hash": unique_identifier_hash,
'source': source, "hit": hit,
}) "source": source,
}
)
hits_collected += 1 hits_collected += 1
continue continue
@ -310,17 +314,19 @@ async def index_elasticsearch_documents(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
docs_to_process.append({ docs_to_process.append(
'document': document, {
'is_new': True, "document": document,
'doc_id': doc_id, "is_new": True,
'title': title, "doc_id": doc_id,
'content': content, "title": title,
'content_hash': content_hash, "content": content,
'unique_identifier_hash': unique_identifier_hash, "content_hash": content_hash,
'hit': hit, "unique_identifier_hash": unique_identifier_hash,
'source': source, "hit": hit,
}) "source": source,
}
)
hits_collected += 1 hits_collected += 1
except Exception as e: except Exception as e:
@ -330,7 +336,9 @@ async def index_elasticsearch_documents(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([d for d in docs_to_process if d['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([d for d in docs_to_process if d['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -347,7 +355,7 @@ async def index_elasticsearch_documents(
await on_heartbeat_callback(documents_processed) await on_heartbeat_callback(documents_processed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
@ -355,9 +363,9 @@ async def index_elasticsearch_documents(
# Build metadata # Build metadata
metadata = { metadata = {
"elasticsearch_id": item['doc_id'], "elasticsearch_id": item["doc_id"],
"elasticsearch_index": item['hit'].get("_index", index_name), "elasticsearch_index": item["hit"].get("_index", index_name),
"elasticsearch_score": item['hit'].get("_score"), "elasticsearch_score": item["hit"].get("_score"),
"indexed_at": datetime.now().isoformat(), "indexed_at": datetime.now().isoformat(),
"source": "ELASTICSEARCH_CONNECTOR", "source": "ELASTICSEARCH_CONNECTOR",
"connector_id": connector_id, "connector_id": connector_id,
@ -366,17 +374,17 @@ async def index_elasticsearch_documents(
# Add any additional metadata fields specified in config # Add any additional metadata fields specified in config
if "ELASTICSEARCH_METADATA_FIELDS" in config: if "ELASTICSEARCH_METADATA_FIELDS" in config:
for field in config["ELASTICSEARCH_METADATA_FIELDS"]: for field in config["ELASTICSEARCH_METADATA_FIELDS"]:
if field in item['source']: if field in item["source"]:
metadata[f"es_{field}"] = item['source'][field] metadata[f"es_{field}"] = item["source"][field]
# Create chunks # Create chunks
chunks = await create_document_chunks(item['content']) chunks = await create_document_chunks(item["content"])
# Update document to READY with actual content # Update document to READY with actual content
document.title = item['title'] document.title = item["title"]
document.content = item['content'] document.content = item["content"]
document.content_hash = item['content_hash'] document.content_hash = item["content_hash"]
document.unique_identifier_hash = item['unique_identifier_hash'] document.unique_identifier_hash = item["unique_identifier_hash"]
document.document_metadata = metadata document.document_metadata = metadata
safe_set_chunks(document, chunks) safe_set_chunks(document, chunks)
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
@ -399,7 +407,9 @@ async def index_elasticsearch_documents(
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
documents_failed += 1 documents_failed += 1
continue continue
@ -411,10 +421,14 @@ async def index_elasticsearch_documents(
) )
# Final commit for any remaining documents not yet committed in batches # Final commit for any remaining documents not yet committed in batches
logger.info(f"Final commit: Total {documents_processed} Elasticsearch documents processed") logger.info(
f"Final commit: Total {documents_processed} Elasticsearch documents processed"
)
try: try:
await session.commit() await session.commit()
logger.info("Successfully committed all Elasticsearch document changes to database") logger.info(
"Successfully committed all Elasticsearch document changes to database"
)
except Exception as e: except Exception as e:
# Handle any remaining integrity errors gracefully (race conditions, etc.) # Handle any remaining integrity errors gracefully (race conditions, etc.)
if ( if (

View file

@ -17,7 +17,7 @@ from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from app.config import config from app.config import config
from app.connectors.github_connector import GitHubConnector, RepositoryDigest from app.connectors.github_connector import GitHubConnector
from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
from app.services.llm_service import get_user_long_context_llm from app.services.llm_service import get_user_long_context_llm
from app.services.task_logging_service import TaskLoggingService from app.services.task_logging_service import TaskLoggingService
@ -237,7 +237,9 @@ async def index_github_repos(
# Document exists - check if content has changed # Document exists - check if content has changed
if existing_document.content_hash == content_hash: if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status, DocumentStatus.READY
):
existing_document.status = DocumentStatus.ready() existing_document.status = DocumentStatus.ready()
logger.info(f"Repository {repo_full_name} unchanged. Skipping.") logger.info(f"Repository {repo_full_name} unchanged. Skipping.")
documents_skipped += 1 documents_skipped += 1
@ -247,14 +249,16 @@ async def index_github_repos(
logger.info( logger.info(
f"Content changed for repository {repo_full_name}. Queuing for update." f"Content changed for repository {repo_full_name}. Queuing for update."
) )
repos_to_process.append({ repos_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'digest': digest, "is_new": False,
'content_hash': content_hash, "digest": digest,
'repo_full_name': repo_full_name, "content_hash": content_hash,
'unique_identifier_hash': unique_identifier_hash, "repo_full_name": repo_full_name,
}) "unique_identifier_hash": unique_identifier_hash,
}
)
continue continue
# Document doesn't exist by unique_identifier_hash # Document doesn't exist by unique_identifier_hash
@ -298,14 +302,16 @@ async def index_github_repos(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
repos_to_process.append({ repos_to_process.append(
'document': document, {
'is_new': True, "document": document,
'digest': digest, "is_new": True,
'content_hash': content_hash, "digest": digest,
'repo_full_name': repo_full_name, "content_hash": content_hash,
'unique_identifier_hash': unique_identifier_hash, "repo_full_name": repo_full_name,
}) "unique_identifier_hash": unique_identifier_hash,
}
)
except Exception as repo_err: except Exception as repo_err:
logger.error( logger.error(
@ -317,7 +323,9 @@ async def index_github_repos(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([r for r in repos_to_process if r['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([r for r in repos_to_process if r['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -334,9 +342,9 @@ async def index_github_repos(
await on_heartbeat_callback(documents_indexed) await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
digest = item['digest'] digest = item["digest"]
repo_full_name = item['repo_full_name'] repo_full_name = item["repo_full_name"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
@ -353,7 +361,9 @@ async def index_github_repos(
"document_type": "GitHub Repository", "document_type": "GitHub Repository",
"connector_type": "GitHub", "connector_type": "GitHub",
"ingestion_method": "gitingest", "ingestion_method": "gitingest",
"file_tree": digest.tree[:2000] if len(digest.tree) > 2000 else digest.tree, "file_tree": digest.tree[:2000]
if len(digest.tree) > 2000
else digest.tree,
"estimated_tokens": digest.estimated_tokens, "estimated_tokens": digest.estimated_tokens,
} }
@ -377,13 +387,17 @@ async def index_github_repos(
f"## Summary\n{digest.summary}\n\n" f"## Summary\n{digest.summary}\n\n"
f"## File Structure\n{digest.tree[:3000]}" f"## File Structure\n{digest.tree[:3000]}"
) )
summary_embedding = config.embedding_model_instance.embed(summary_text) summary_embedding = config.embedding_model_instance.embed(
summary_text
)
# Chunk the full digest content for granular search # Chunk the full digest content for granular search
try: try:
chunks_data = await create_document_chunks(digest.content) chunks_data = await create_document_chunks(digest.content)
except Exception as chunk_err: except Exception as chunk_err:
logger.error(f"Failed to chunk repository {repo_full_name}: {chunk_err}") logger.error(
f"Failed to chunk repository {repo_full_name}: {chunk_err}"
)
chunks_data = await _simple_chunk_content(digest.content) chunks_data = await _simple_chunk_content(digest.content)
# Update document to READY with actual content # Update document to READY with actual content
@ -401,7 +415,7 @@ async def index_github_repos(
document.title = repo_full_name document.title = repo_full_name
document.content = summary_text document.content = summary_text
document.content_hash = item['content_hash'] document.content_hash = item["content_hash"]
document.embedding = summary_embedding document.embedding = summary_embedding
document.document_metadata = doc_metadata document.document_metadata = doc_metadata
safe_set_chunks(document, chunks_data) safe_set_chunks(document, chunks_data)
@ -433,7 +447,9 @@ async def index_github_repos(
document.status = DocumentStatus.failed(str(repo_err)) document.status = DocumentStatus.failed(str(repo_err))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
errors.append(f"Failed processing {repo_full_name}: {repo_err}") errors.append(f"Failed processing {repo_full_name}: {repo_err}")
documents_failed += 1 documents_failed += 1
continue continue
@ -442,7 +458,9 @@ async def index_github_repos(
await update_connector_last_indexed(session, connector, update_last_indexed) await update_connector_last_indexed(session, connector, update_last_indexed)
# Final commit # Final commit
logger.info(f"Final commit: Total {documents_processed} GitHub repositories processed") logger.info(
f"Final commit: Total {documents_processed} GitHub repositories processed"
)
try: try:
await session.commit() await session.commit()
logger.info( logger.info(

View file

@ -345,25 +345,29 @@ async def index_google_calendar_events(
# Document exists - check if content has changed # Document exists - check if content has changed
if existing_document.content_hash == content_hash: if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status, DocumentStatus.READY
):
existing_document.status = DocumentStatus.ready() existing_document.status = DocumentStatus.ready()
documents_skipped += 1 documents_skipped += 1
continue continue
# Queue existing document for update (will be set to processing in Phase 2) # Queue existing document for update (will be set to processing in Phase 2)
events_to_process.append({ events_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'event_markdown': event_markdown, "is_new": False,
'content_hash': content_hash, "event_markdown": event_markdown,
'event_id': event_id, "content_hash": content_hash,
'event_summary': event_summary, "event_id": event_id,
'calendar_id': calendar_id, "event_summary": event_summary,
'start_time': start_time, "calendar_id": calendar_id,
'end_time': end_time, "start_time": start_time,
'location': location, "end_time": end_time,
'description': description, "location": location,
}) "description": description,
}
)
continue continue
# Document doesn't exist by unique_identifier_hash # Document doesn't exist by unique_identifier_hash
@ -411,19 +415,21 @@ async def index_google_calendar_events(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
events_to_process.append({ events_to_process.append(
'document': document, {
'is_new': True, "document": document,
'event_markdown': event_markdown, "is_new": True,
'content_hash': content_hash, "event_markdown": event_markdown,
'event_id': event_id, "content_hash": content_hash,
'event_summary': event_summary, "event_id": event_id,
'calendar_id': calendar_id, "event_summary": event_summary,
'start_time': start_time, "calendar_id": calendar_id,
'end_time': end_time, "start_time": start_time,
'location': location, "end_time": end_time,
'description': description, "location": location,
}) "description": description,
}
)
except Exception as e: except Exception as e:
logger.error(f"Error in Phase 1 for event: {e!s}", exc_info=True) logger.error(f"Error in Phase 1 for event: {e!s}", exc_info=True)
@ -432,7 +438,9 @@ async def index_google_calendar_events(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([e for e in events_to_process if e['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([e for e in events_to_process if e['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -449,7 +457,7 @@ async def index_google_calendar_events(
await on_heartbeat_callback(documents_indexed) await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
@ -462,48 +470,53 @@ async def index_google_calendar_events(
if user_llm: if user_llm:
document_metadata_for_summary = { document_metadata_for_summary = {
"event_id": item['event_id'], "event_id": item["event_id"],
"event_summary": item['event_summary'], "event_summary": item["event_summary"],
"calendar_id": item['calendar_id'], "calendar_id": item["calendar_id"],
"start_time": item['start_time'], "start_time": item["start_time"],
"end_time": item['end_time'], "end_time": item["end_time"],
"location": item['location'] or "No location", "location": item["location"] or "No location",
"document_type": "Google Calendar Event", "document_type": "Google Calendar Event",
"connector_type": "Google Calendar", "connector_type": "Google Calendar",
} }
summary_content, summary_embedding = await generate_document_summary( (
item['event_markdown'], user_llm, document_metadata_for_summary summary_content,
summary_embedding,
) = await generate_document_summary(
item["event_markdown"], user_llm, document_metadata_for_summary
) )
else: else:
summary_content = f"Google Calendar Event: {item['event_summary']}\n\n" summary_content = (
f"Google Calendar Event: {item['event_summary']}\n\n"
)
summary_content += f"Calendar: {item['calendar_id']}\n" summary_content += f"Calendar: {item['calendar_id']}\n"
summary_content += f"Start: {item['start_time']}\n" summary_content += f"Start: {item['start_time']}\n"
summary_content += f"End: {item['end_time']}\n" summary_content += f"End: {item['end_time']}\n"
if item['location']: if item["location"]:
summary_content += f"Location: {item['location']}\n" summary_content += f"Location: {item['location']}\n"
if item['description']: if item["description"]:
desc_preview = item['description'][:1000] desc_preview = item["description"][:1000]
if len(item['description']) > 1000: if len(item["description"]) > 1000:
desc_preview += "..." desc_preview += "..."
summary_content += f"Description: {desc_preview}\n" summary_content += f"Description: {desc_preview}\n"
summary_embedding = config.embedding_model_instance.embed( summary_embedding = config.embedding_model_instance.embed(
summary_content summary_content
) )
chunks = await create_document_chunks(item['event_markdown']) chunks = await create_document_chunks(item["event_markdown"])
# Update document to READY with actual content # Update document to READY with actual content
document.title = item['event_summary'] document.title = item["event_summary"]
document.content = summary_content document.content = summary_content
document.content_hash = item['content_hash'] document.content_hash = item["content_hash"]
document.embedding = summary_embedding document.embedding = summary_embedding
document.document_metadata = { document.document_metadata = {
"event_id": item['event_id'], "event_id": item["event_id"],
"event_summary": item['event_summary'], "event_summary": item["event_summary"],
"calendar_id": item['calendar_id'], "calendar_id": item["calendar_id"],
"start_time": item['start_time'], "start_time": item["start_time"],
"end_time": item['end_time'], "end_time": item["end_time"],
"location": item['location'], "location": item["location"],
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"connector_id": connector_id, "connector_id": connector_id,
} }
@ -527,7 +540,9 @@ async def index_google_calendar_events(
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
documents_failed += 1 documents_failed += 1
continue continue

View file

@ -533,7 +533,9 @@ async def _index_full_scan(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([f for f in files_to_process if f[1] and f[1].id is None])} pending documents") logger.info(
f"Phase 1: Committing {len([f for f in files_to_process if f[1] and f[1].id is None])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -568,9 +570,7 @@ async def _index_full_scan(
if documents_indexed % 10 == 0 and documents_indexed > 0: if documents_indexed % 10 == 0 and documents_indexed > 0:
await session.commit() await session.commit()
logger.info( logger.info(f"Committed batch: {documents_indexed} files indexed so far")
f"Committed batch: {documents_indexed} files indexed so far"
)
logger.info( logger.info(
f"Full scan complete: {documents_indexed} indexed, {documents_skipped} skipped, {documents_failed} failed" f"Full scan complete: {documents_indexed} indexed, {documents_skipped} skipped, {documents_failed} failed"
@ -676,7 +676,7 @@ async def _index_with_delta_sync(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing pending documents") logger.info("Phase 1: Committing pending documents")
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -685,7 +685,7 @@ async def _index_with_delta_sync(
# ======================================================================= # =======================================================================
logger.info(f"Phase 2: Processing {len(changes_to_process)} changes") logger.info(f"Phase 2: Processing {len(changes_to_process)} changes")
for change, file, pending_doc in changes_to_process: for _, file, pending_doc in changes_to_process:
# Check if it's time for a heartbeat update # Check if it's time for a heartbeat update
if on_heartbeat_callback: if on_heartbeat_callback:
current_time = time.time() current_time = time.time()
@ -786,7 +786,9 @@ async def _create_pending_document_for_file(
if content_unchanged: if content_unchanged:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status, DocumentStatus.READY
):
existing_document.status = DocumentStatus.ready() existing_document.status = DocumentStatus.ready()
return None, True return None, True
@ -1042,12 +1044,13 @@ async def _process_single_file(
processed_doc = await check_document_by_unique_identifier( processed_doc = await check_document_by_unique_identifier(
session, unique_identifier_hash session, unique_identifier_hash
) )
if processed_doc: # Ensure status is READY
# Ensure status is READY if processed_doc and not DocumentStatus.is_state(
if not DocumentStatus.is_state(processed_doc.status, DocumentStatus.READY): processed_doc.status, DocumentStatus.READY
processed_doc.status = DocumentStatus.ready() ):
processed_doc.updated_at = get_current_timestamp() processed_doc.status = DocumentStatus.ready()
await session.commit() processed_doc.updated_at = get_current_timestamp()
await session.commit()
logger.info(f"Successfully indexed Google Drive file: {file_name}") logger.info(f"Successfully indexed Google Drive file: {file_name}")
return 1, 0, 0 return 1, 0, 0
@ -1061,7 +1064,9 @@ async def _process_single_file(
pending_document.updated_at = get_current_timestamp() pending_document.updated_at = get_current_timestamp()
await session.commit() await session.commit()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
return 0, 0, 1 return 0, 0, 1

View file

@ -228,7 +228,9 @@ async def index_google_gmail_messages(
documents_indexed = 0 documents_indexed = 0
documents_skipped = 0 documents_skipped = 0
documents_failed = 0 # Track messages that failed processing documents_failed = 0 # Track messages that failed processing
duplicate_content_count = 0 # Track messages skipped due to duplicate content_hash duplicate_content_count = (
0 # Track messages skipped due to duplicate content_hash
)
# Heartbeat tracking - update notification periodically to prevent appearing stuck # Heartbeat tracking - update notification periodically to prevent appearing stuck
last_heartbeat_time = time.time() last_heartbeat_time = time.time()
@ -294,23 +296,27 @@ async def index_google_gmail_messages(
# Document exists - check if content has changed # Document exists - check if content has changed
if existing_document.content_hash == content_hash: if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status, DocumentStatus.READY
):
existing_document.status = DocumentStatus.ready() existing_document.status = DocumentStatus.ready()
documents_skipped += 1 documents_skipped += 1
continue continue
# Queue existing document for update (will be set to processing in Phase 2) # Queue existing document for update (will be set to processing in Phase 2)
messages_to_process.append({ messages_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'markdown_content': markdown_content, "is_new": False,
'content_hash': content_hash, "markdown_content": markdown_content,
'message_id': message_id, "content_hash": content_hash,
'thread_id': thread_id, "message_id": message_id,
'subject': subject, "thread_id": thread_id,
'sender': sender, "subject": subject,
'date_str': date_str, "sender": sender,
}) "date_str": date_str,
}
)
continue continue
# Document doesn't exist by unique_identifier_hash # Document doesn't exist by unique_identifier_hash
@ -356,17 +362,19 @@ async def index_google_gmail_messages(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
messages_to_process.append({ messages_to_process.append(
'document': document, {
'is_new': True, "document": document,
'markdown_content': markdown_content, "is_new": True,
'content_hash': content_hash, "markdown_content": markdown_content,
'message_id': message_id, "content_hash": content_hash,
'thread_id': thread_id, "message_id": message_id,
'subject': subject, "thread_id": thread_id,
'sender': sender, "subject": subject,
'date_str': date_str, "sender": sender,
}) "date_str": date_str,
}
)
except Exception as e: except Exception as e:
logger.error(f"Error in Phase 1 for message: {e!s}", exc_info=True) logger.error(f"Error in Phase 1 for message: {e!s}", exc_info=True)
@ -375,7 +383,9 @@ async def index_google_gmail_messages(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -392,7 +402,7 @@ async def index_google_gmail_messages(
await on_heartbeat_callback(documents_indexed) await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
@ -405,16 +415,21 @@ async def index_google_gmail_messages(
if user_llm: if user_llm:
document_metadata_for_summary = { document_metadata_for_summary = {
"message_id": item['message_id'], "message_id": item["message_id"],
"thread_id": item['thread_id'], "thread_id": item["thread_id"],
"subject": item['subject'], "subject": item["subject"],
"sender": item['sender'], "sender": item["sender"],
"date": item['date_str'], "date": item["date_str"],
"document_type": "Gmail Message", "document_type": "Gmail Message",
"connector_type": "Google Gmail", "connector_type": "Google Gmail",
} }
summary_content, summary_embedding = await generate_document_summary( (
item['markdown_content'], user_llm, document_metadata_for_summary summary_content,
summary_embedding,
) = await generate_document_summary(
item["markdown_content"],
user_llm,
document_metadata_for_summary,
) )
else: else:
summary_content = f"Google Gmail Message: {item['subject']}\n\n" summary_content = f"Google Gmail Message: {item['subject']}\n\n"
@ -424,19 +439,19 @@ async def index_google_gmail_messages(
summary_content summary_content
) )
chunks = await create_document_chunks(item['markdown_content']) chunks = await create_document_chunks(item["markdown_content"])
# Update document to READY with actual content # Update document to READY with actual content
document.title = item['subject'] document.title = item["subject"]
document.content = summary_content document.content = summary_content
document.content_hash = item['content_hash'] document.content_hash = item["content_hash"]
document.embedding = summary_embedding document.embedding = summary_embedding
document.document_metadata = { document.document_metadata = {
"message_id": item['message_id'], "message_id": item["message_id"],
"thread_id": item['thread_id'], "thread_id": item["thread_id"],
"subject": item['subject'], "subject": item["subject"],
"sender": item['sender'], "sender": item["sender"],
"date": item['date_str'], "date": item["date_str"],
"connector_id": connector_id, "connector_id": connector_id,
} }
safe_set_chunks(document, chunks) safe_set_chunks(document, chunks)
@ -459,7 +474,9 @@ async def index_google_gmail_messages(
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
documents_failed += 1 documents_failed += 1
continue continue

View file

@ -239,23 +239,27 @@ async def index_jira_issues(
# Document exists - check if content has changed # Document exists - check if content has changed
if existing_document.content_hash == content_hash: if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status, DocumentStatus.READY
):
existing_document.status = DocumentStatus.ready() existing_document.status = DocumentStatus.ready()
documents_skipped += 1 documents_skipped += 1
continue continue
# Queue existing document for update (will be set to processing in Phase 2) # Queue existing document for update (will be set to processing in Phase 2)
issues_to_process.append({ issues_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'issue_content': issue_content, "is_new": False,
'content_hash': content_hash, "issue_content": issue_content,
'issue_id': issue_id, "content_hash": content_hash,
'issue_identifier': issue_identifier, "issue_id": issue_id,
'issue_title': issue_title, "issue_identifier": issue_identifier,
'formatted_issue': formatted_issue, "issue_title": issue_title,
'comment_count': comment_count, "formatted_issue": formatted_issue,
}) "comment_count": comment_count,
}
)
continue continue
# Document doesn't exist by unique_identifier_hash # Document doesn't exist by unique_identifier_hash
@ -301,17 +305,19 @@ async def index_jira_issues(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
issues_to_process.append({ issues_to_process.append(
'document': document, {
'is_new': True, "document": document,
'issue_content': issue_content, "is_new": True,
'content_hash': content_hash, "issue_content": issue_content,
'issue_id': issue_id, "content_hash": content_hash,
'issue_identifier': issue_identifier, "issue_id": issue_id,
'issue_title': issue_title, "issue_identifier": issue_identifier,
'formatted_issue': formatted_issue, "issue_title": issue_title,
'comment_count': comment_count, "formatted_issue": formatted_issue,
}) "comment_count": comment_count,
}
)
except Exception as e: except Exception as e:
logger.error(f"Error in Phase 1 for issue: {e!s}", exc_info=True) logger.error(f"Error in Phase 1 for issue: {e!s}", exc_info=True)
@ -320,7 +326,9 @@ async def index_jira_issues(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([i for i in issues_to_process if i['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([i for i in issues_to_process if i['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -337,7 +345,7 @@ async def index_jira_issues(
await on_heartbeat_callback(documents_indexed) await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
@ -350,11 +358,11 @@ async def index_jira_issues(
if user_llm: if user_llm:
document_metadata = { document_metadata = {
"issue_key": item['issue_identifier'], "issue_key": item["issue_identifier"],
"issue_title": item['issue_title'], "issue_title": item["issue_title"],
"status": item['formatted_issue'].get("status", "Unknown"), "status": item["formatted_issue"].get("status", "Unknown"),
"priority": item['formatted_issue'].get("priority", "Unknown"), "priority": item["formatted_issue"].get("priority", "Unknown"),
"comment_count": item['comment_count'], "comment_count": item["comment_count"],
"document_type": "Jira Issue", "document_type": "Jira Issue",
"connector_type": "Jira", "connector_type": "Jira",
} }
@ -362,34 +370,32 @@ async def index_jira_issues(
summary_content, summary_content,
summary_embedding, summary_embedding,
) = await generate_document_summary( ) = await generate_document_summary(
item['issue_content'], user_llm, document_metadata item["issue_content"], user_llm, document_metadata
) )
else: else:
# Fallback to simple summary if no LLM configured # Fallback to simple summary if no LLM configured
summary_content = f"Jira Issue {item['issue_identifier']}: {item['issue_title']}\n\nStatus: {item['formatted_issue'].get('status', 'Unknown')}\n\n" summary_content = f"Jira Issue {item['issue_identifier']}: {item['issue_title']}\n\nStatus: {item['formatted_issue'].get('status', 'Unknown')}\n\n"
if item['formatted_issue'].get("description"): if item["formatted_issue"].get("description"):
summary_content += ( summary_content += f"Description: {item['formatted_issue'].get('description')}\n\n"
f"Description: {item['formatted_issue'].get('description')}\n\n"
)
summary_content += f"Comments: {item['comment_count']}" summary_content += f"Comments: {item['comment_count']}"
summary_embedding = config.embedding_model_instance.embed( summary_embedding = config.embedding_model_instance.embed(
summary_content summary_content
) )
# Process chunks - using the full issue content with comments # Process chunks - using the full issue content with comments
chunks = await create_document_chunks(item['issue_content']) chunks = await create_document_chunks(item["issue_content"])
# Update document to READY with actual content # Update document to READY with actual content
document.title = f"{item['issue_identifier']}: {item['issue_title']}" document.title = f"{item['issue_identifier']}: {item['issue_title']}"
document.content = summary_content document.content = summary_content
document.content_hash = item['content_hash'] document.content_hash = item["content_hash"]
document.embedding = summary_embedding document.embedding = summary_embedding
document.document_metadata = { document.document_metadata = {
"issue_id": item['issue_id'], "issue_id": item["issue_id"],
"issue_identifier": item['issue_identifier'], "issue_identifier": item["issue_identifier"],
"issue_title": item['issue_title'], "issue_title": item["issue_title"],
"state": item['formatted_issue'].get("status", "Unknown"), "state": item["formatted_issue"].get("status", "Unknown"),
"comment_count": item['comment_count'], "comment_count": item["comment_count"],
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"connector_id": connector_id, "connector_id": connector_id,
} }
@ -416,7 +422,9 @@ async def index_jira_issues(
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
documents_failed += 1 documents_failed += 1
continue # Skip this issue and continue with others continue # Skip this issue and continue with others

View file

@ -272,7 +272,9 @@ async def index_linear_issues(
# Document exists - check if content has changed # Document exists - check if content has changed
if existing_document.content_hash == content_hash: if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status, DocumentStatus.READY
):
existing_document.status = DocumentStatus.ready() existing_document.status = DocumentStatus.ready()
logger.info( logger.info(
f"Document for Linear issue {issue_identifier} unchanged. Skipping." f"Document for Linear issue {issue_identifier} unchanged. Skipping."
@ -281,19 +283,21 @@ async def index_linear_issues(
continue continue
# Queue existing document for update (will be set to processing in Phase 2) # Queue existing document for update (will be set to processing in Phase 2)
issues_to_process.append({ issues_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'issue_content': issue_content, "is_new": False,
'content_hash': content_hash, "issue_content": issue_content,
'issue_id': issue_id, "content_hash": content_hash,
'issue_identifier': issue_identifier, "issue_id": issue_id,
'issue_title': issue_title, "issue_identifier": issue_identifier,
'state': state, "issue_title": issue_title,
'description': description, "state": state,
'comment_count': comment_count, "description": description,
'priority': priority, "comment_count": comment_count,
}) "priority": priority,
}
)
continue continue
# Document doesn't exist by unique_identifier_hash # Document doesn't exist by unique_identifier_hash
@ -338,19 +342,21 @@ async def index_linear_issues(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
issues_to_process.append({ issues_to_process.append(
'document': document, {
'is_new': True, "document": document,
'issue_content': issue_content, "is_new": True,
'content_hash': content_hash, "issue_content": issue_content,
'issue_id': issue_id, "content_hash": content_hash,
'issue_identifier': issue_identifier, "issue_id": issue_id,
'issue_title': issue_title, "issue_identifier": issue_identifier,
'state': state, "issue_title": issue_title,
'description': description, "state": state,
'comment_count': comment_count, "description": description,
'priority': priority, "comment_count": comment_count,
}) "priority": priority,
}
)
except Exception as e: except Exception as e:
logger.error(f"Error in Phase 1 for issue: {e!s}", exc_info=True) logger.error(f"Error in Phase 1 for issue: {e!s}", exc_info=True)
@ -359,7 +365,9 @@ async def index_linear_issues(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([i for i in issues_to_process if i['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([i for i in issues_to_process if i['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -376,7 +384,7 @@ async def index_linear_issues(
await on_heartbeat_callback(documents_indexed) await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
@ -389,20 +397,23 @@ async def index_linear_issues(
if user_llm: if user_llm:
document_metadata_for_summary = { document_metadata_for_summary = {
"issue_id": item['issue_identifier'], "issue_id": item["issue_identifier"],
"issue_title": item['issue_title'], "issue_title": item["issue_title"],
"state": item['state'], "state": item["state"],
"priority": item['priority'], "priority": item["priority"],
"comment_count": item['comment_count'], "comment_count": item["comment_count"],
"document_type": "Linear Issue", "document_type": "Linear Issue",
"connector_type": "Linear", "connector_type": "Linear",
} }
summary_content, summary_embedding = await generate_document_summary( (
item['issue_content'], user_llm, document_metadata_for_summary summary_content,
summary_embedding,
) = await generate_document_summary(
item["issue_content"], user_llm, document_metadata_for_summary
) )
else: else:
# Fallback to simple summary if no LLM configured # Fallback to simple summary if no LLM configured
description = item['description'] description = item["description"]
if description and len(description) > 1000: if description and len(description) > 1000:
description = description[:997] + "..." description = description[:997] + "..."
summary_content = f"Linear Issue {item['issue_identifier']}: {item['issue_title']}\n\nStatus: {item['state']}\n\n" summary_content = f"Linear Issue {item['issue_identifier']}: {item['issue_title']}\n\nStatus: {item['state']}\n\n"
@ -413,19 +424,19 @@ async def index_linear_issues(
summary_content summary_content
) )
chunks = await create_document_chunks(item['issue_content']) chunks = await create_document_chunks(item["issue_content"])
# Update document to READY with actual content # Update document to READY with actual content
document.title = f"{item['issue_identifier']}: {item['issue_title']}" document.title = f"{item['issue_identifier']}: {item['issue_title']}"
document.content = summary_content document.content = summary_content
document.content_hash = item['content_hash'] document.content_hash = item["content_hash"]
document.embedding = summary_embedding document.embedding = summary_embedding
document.document_metadata = { document.document_metadata = {
"issue_id": item['issue_id'], "issue_id": item["issue_id"],
"issue_identifier": item['issue_identifier'], "issue_identifier": item["issue_identifier"],
"issue_title": item['issue_title'], "issue_title": item["issue_title"],
"state": item['state'], "state": item["state"],
"comment_count": item['comment_count'], "comment_count": item["comment_count"],
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"connector_id": connector_id, "connector_id": connector_id,
} }
@ -452,7 +463,9 @@ async def index_linear_issues(
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
skipped_issues.append( skipped_issues.append(
f"{item.get('issue_identifier', 'Unknown')} (processing error)" f"{item.get('issue_identifier', 'Unknown')} (processing error)"
) )
@ -466,7 +479,9 @@ async def index_linear_issues(
logger.info(f"Final commit: Total {documents_indexed} Linear issues processed") logger.info(f"Final commit: Total {documents_indexed} Linear issues processed")
try: try:
await session.commit() await session.commit()
logger.info("Successfully committed all Linear document changes to database") logger.info(
"Successfully committed all Linear document changes to database"
)
except Exception as e: except Exception as e:
# Handle any remaining integrity errors gracefully (race conditions, etc.) # Handle any remaining integrity errors gracefully (race conditions, etc.)
if ( if (

View file

@ -305,7 +305,9 @@ async def index_luma_events(
# Document exists - check if content has changed # Document exists - check if content has changed
if existing_document.content_hash == content_hash: if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status, DocumentStatus.READY
):
existing_document.status = DocumentStatus.ready() existing_document.status = DocumentStatus.ready()
logger.info( logger.info(
f"Document for Luma event {event_name} unchanged. Skipping." f"Document for Luma event {event_name} unchanged. Skipping."
@ -314,23 +316,25 @@ async def index_luma_events(
continue continue
# Queue existing document for update (will be set to processing in Phase 2) # Queue existing document for update (will be set to processing in Phase 2)
events_to_process.append({ events_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'event_id': event_id, "is_new": False,
'event_name': event_name, "event_id": event_id,
'event_url': event_url, "event_name": event_name,
'event_markdown': event_markdown, "event_url": event_url,
'content_hash': content_hash, "event_markdown": event_markdown,
'start_at': start_at, "content_hash": content_hash,
'end_at': end_at, "start_at": start_at,
'timezone': timezone, "end_at": end_at,
'location': location, "timezone": timezone,
'city': city, "location": location,
'host_names': host_names, "city": city,
'description': description, "host_names": host_names,
'cover_url': cover_url, "description": description,
}) "cover_url": cover_url,
}
)
continue continue
# Document doesn't exist by unique_identifier_hash # Document doesn't exist by unique_identifier_hash
@ -380,23 +384,25 @@ async def index_luma_events(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
events_to_process.append({ events_to_process.append(
'document': document, {
'is_new': True, "document": document,
'event_id': event_id, "is_new": True,
'event_name': event_name, "event_id": event_id,
'event_url': event_url, "event_name": event_name,
'event_markdown': event_markdown, "event_url": event_url,
'content_hash': content_hash, "event_markdown": event_markdown,
'start_at': start_at, "content_hash": content_hash,
'end_at': end_at, "start_at": start_at,
'timezone': timezone, "end_at": end_at,
'location': location, "timezone": timezone,
'city': city, "location": location,
'host_names': host_names, "city": city,
'description': description, "host_names": host_names,
'cover_url': cover_url, "description": description,
}) "cover_url": cover_url,
}
)
except Exception as e: except Exception as e:
logger.error(f"Error in Phase 1 for event: {e!s}", exc_info=True) logger.error(f"Error in Phase 1 for event: {e!s}", exc_info=True)
@ -405,7 +411,9 @@ async def index_luma_events(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([e for e in events_to_process if e['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([e for e in events_to_process if e['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -422,7 +430,7 @@ async def index_luma_events(
await on_heartbeat_callback(documents_indexed) await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
@ -435,15 +443,15 @@ async def index_luma_events(
if user_llm: if user_llm:
document_metadata_for_summary = { document_metadata_for_summary = {
"event_id": item['event_id'], "event_id": item["event_id"],
"event_name": item['event_name'], "event_name": item["event_name"],
"event_url": item['event_url'], "event_url": item["event_url"],
"start_at": item['start_at'], "start_at": item["start_at"],
"end_at": item['end_at'], "end_at": item["end_at"],
"timezone": item['timezone'], "timezone": item["timezone"],
"location": item['location'] or "No location", "location": item["location"] or "No location",
"city": item['city'], "city": item["city"],
"hosts": item['host_names'], "hosts": item["host_names"],
"document_type": "Luma Event", "document_type": "Luma Event",
"connector_type": "Luma", "connector_type": "Luma",
} }
@ -451,26 +459,26 @@ async def index_luma_events(
summary_content, summary_content,
summary_embedding, summary_embedding,
) = await generate_document_summary( ) = await generate_document_summary(
item['event_markdown'], user_llm, document_metadata_for_summary item["event_markdown"], user_llm, document_metadata_for_summary
) )
else: else:
# Fallback to simple summary if no LLM configured # Fallback to simple summary if no LLM configured
summary_content = f"Luma Event: {item['event_name']}\n\n" summary_content = f"Luma Event: {item['event_name']}\n\n"
if item['event_url']: if item["event_url"]:
summary_content += f"URL: {item['event_url']}\n" summary_content += f"URL: {item['event_url']}\n"
summary_content += f"Start: {item['start_at']}\n" summary_content += f"Start: {item['start_at']}\n"
summary_content += f"End: {item['end_at']}\n" summary_content += f"End: {item['end_at']}\n"
if item['timezone']: if item["timezone"]:
summary_content += f"Timezone: {item['timezone']}\n" summary_content += f"Timezone: {item['timezone']}\n"
if item['location']: if item["location"]:
summary_content += f"Location: {item['location']}\n" summary_content += f"Location: {item['location']}\n"
if item['city']: if item["city"]:
summary_content += f"City: {item['city']}\n" summary_content += f"City: {item['city']}\n"
if item['host_names']: if item["host_names"]:
summary_content += f"Hosts: {item['host_names']}\n" summary_content += f"Hosts: {item['host_names']}\n"
if item['description']: if item["description"]:
desc_preview = item['description'][:1000] desc_preview = item["description"][:1000]
if len(item['description']) > 1000: if len(item["description"]) > 1000:
desc_preview += "..." desc_preview += "..."
summary_content += f"Description: {desc_preview}\n" summary_content += f"Description: {desc_preview}\n"
@ -478,24 +486,24 @@ async def index_luma_events(
summary_content summary_content
) )
chunks = await create_document_chunks(item['event_markdown']) chunks = await create_document_chunks(item["event_markdown"])
# Update document to READY with actual content # Update document to READY with actual content
document.title = item['event_name'] document.title = item["event_name"]
document.content = summary_content document.content = summary_content
document.content_hash = item['content_hash'] document.content_hash = item["content_hash"]
document.embedding = summary_embedding document.embedding = summary_embedding
document.document_metadata = { document.document_metadata = {
"event_id": item['event_id'], "event_id": item["event_id"],
"event_name": item['event_name'], "event_name": item["event_name"],
"event_url": item['event_url'], "event_url": item["event_url"],
"start_at": item['start_at'], "start_at": item["start_at"],
"end_at": item['end_at'], "end_at": item["end_at"],
"timezone": item['timezone'], "timezone": item["timezone"],
"location": item['location'], "location": item["location"],
"city": item['city'], "city": item["city"],
"hosts": item['host_names'], "hosts": item["host_names"],
"cover_url": item['cover_url'], "cover_url": item["cover_url"],
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"connector_id": connector_id, "connector_id": connector_id,
} }
@ -522,7 +530,9 @@ async def index_luma_events(
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
skipped_events.append( skipped_events.append(
f"{item.get('event_name', 'Unknown')} (processing error)" f"{item.get('event_name', 'Unknown')} (processing error)"
) )

View file

@ -354,20 +354,24 @@ async def index_notion_pages(
# Document exists - check if content has changed # Document exists - check if content has changed
if existing_document.content_hash == content_hash: if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status, DocumentStatus.READY
):
existing_document.status = DocumentStatus.ready() existing_document.status = DocumentStatus.ready()
documents_skipped += 1 documents_skipped += 1
continue continue
# Queue existing document for update (will be set to processing in Phase 2) # Queue existing document for update (will be set to processing in Phase 2)
pages_to_process.append({ pages_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'markdown_content': markdown_content, "is_new": False,
'content_hash': content_hash, "markdown_content": markdown_content,
'page_id': page_id, "content_hash": content_hash,
'page_title': page_title, "page_id": page_id,
}) "page_title": page_title,
}
)
continue continue
# Document doesn't exist by unique_identifier_hash # Document doesn't exist by unique_identifier_hash
@ -410,14 +414,16 @@ async def index_notion_pages(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
pages_to_process.append({ pages_to_process.append(
'document': document, {
'is_new': True, "document": document,
'markdown_content': markdown_content, "is_new": True,
'content_hash': content_hash, "markdown_content": markdown_content,
'page_id': page_id, "content_hash": content_hash,
'page_title': page_title, "page_id": page_id,
}) "page_title": page_title,
}
)
except Exception as e: except Exception as e:
logger.error(f"Error in Phase 1 for page: {e!s}", exc_info=True) logger.error(f"Error in Phase 1 for page: {e!s}", exc_info=True)
@ -426,7 +432,9 @@ async def index_notion_pages(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([p for p in pages_to_process if p['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([p for p in pages_to_process if p['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -443,7 +451,7 @@ async def index_notion_pages(
await on_heartbeat_callback(documents_indexed) await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
@ -456,13 +464,18 @@ async def index_notion_pages(
if user_llm: if user_llm:
document_metadata_for_summary = { document_metadata_for_summary = {
"page_title": item['page_title'], "page_title": item["page_title"],
"page_id": item['page_id'], "page_id": item["page_id"],
"document_type": "Notion Page", "document_type": "Notion Page",
"connector_type": "Notion", "connector_type": "Notion",
} }
summary_content, summary_embedding = await generate_document_summary( (
item['markdown_content'], user_llm, document_metadata_for_summary summary_content,
summary_embedding,
) = await generate_document_summary(
item["markdown_content"],
user_llm,
document_metadata_for_summary,
) )
else: else:
# Fallback to simple summary if no LLM configured # Fallback to simple summary if no LLM configured
@ -471,16 +484,16 @@ async def index_notion_pages(
summary_content summary_content
) )
chunks = await create_document_chunks(item['markdown_content']) chunks = await create_document_chunks(item["markdown_content"])
# Update document to READY with actual content # Update document to READY with actual content
document.title = item['page_title'] document.title = item["page_title"]
document.content = summary_content document.content = summary_content
document.content_hash = item['content_hash'] document.content_hash = item["content_hash"]
document.embedding = summary_embedding document.embedding = summary_embedding
document.document_metadata = { document.document_metadata = {
"page_title": item['page_title'], "page_title": item["page_title"],
"page_id": item['page_id'], "page_id": item["page_id"],
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"connector_id": connector_id, "connector_id": connector_id,
} }
@ -504,7 +517,9 @@ async def index_notion_pages(
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
skipped_pages.append(f"{item['page_title']} (processing error)") skipped_pages.append(f"{item['page_title']} (processing error)")
documents_failed += 1 documents_failed += 1
continue continue

View file

@ -382,27 +382,31 @@ async def index_obsidian_vault(
# Document exists - check if content has changed # Document exists - check if content has changed
if existing_document.content_hash == content_hash: if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status, DocumentStatus.READY
):
existing_document.status = DocumentStatus.ready() existing_document.status = DocumentStatus.ready()
logger.debug(f"Note {title} unchanged, skipping") logger.debug(f"Note {title} unchanged, skipping")
skipped_count += 1 skipped_count += 1
continue continue
# Queue existing document for update (will be set to processing in Phase 2) # Queue existing document for update (will be set to processing in Phase 2)
files_to_process.append({ files_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'file_info': file_info, "is_new": False,
'content': content, "file_info": file_info,
'body_content': body_content, "content": content,
'frontmatter': frontmatter, "body_content": body_content,
'wiki_links': wiki_links, "frontmatter": frontmatter,
'tags': tags, "wiki_links": wiki_links,
'title': title, "tags": tags,
'relative_path': relative_path, "title": title,
'content_hash': content_hash, "relative_path": relative_path,
'unique_identifier_hash': unique_identifier_hash, "content_hash": content_hash,
}) "unique_identifier_hash": unique_identifier_hash,
}
)
continue continue
# Document doesn't exist by unique_identifier_hash # Document doesn't exist by unique_identifier_hash
@ -445,20 +449,22 @@ async def index_obsidian_vault(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
files_to_process.append({ files_to_process.append(
'document': document, {
'is_new': True, "document": document,
'file_info': file_info, "is_new": True,
'content': content, "file_info": file_info,
'body_content': body_content, "content": content,
'frontmatter': frontmatter, "body_content": body_content,
'wiki_links': wiki_links, "frontmatter": frontmatter,
'tags': tags, "wiki_links": wiki_links,
'title': title, "tags": tags,
'relative_path': relative_path, "title": title,
'content_hash': content_hash, "relative_path": relative_path,
'unique_identifier_hash': unique_identifier_hash, "content_hash": content_hash,
}) "unique_identifier_hash": unique_identifier_hash,
}
)
except Exception as e: except Exception as e:
logger.exception( logger.exception(
@ -469,7 +475,9 @@ async def index_obsidian_vault(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([f for f in files_to_process if f['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([f for f in files_to_process if f['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -491,22 +499,22 @@ async def index_obsidian_vault(
await on_heartbeat_callback(indexed_count) await on_heartbeat_callback(indexed_count)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
await session.commit() await session.commit()
# Extract data from item # Extract data from item
title = item['title'] title = item["title"]
relative_path = item['relative_path'] relative_path = item["relative_path"]
content = item['content'] content = item["content"]
body_content = item['body_content'] body_content = item["body_content"]
frontmatter = item['frontmatter'] frontmatter = item["frontmatter"]
wiki_links = item['wiki_links'] wiki_links = item["wiki_links"]
tags = item['tags'] tags = item["tags"]
content_hash = item['content_hash'] content_hash = item["content_hash"]
file_info = item['file_info'] file_info = item["file_info"]
# Build metadata # Build metadata
document_metadata = { document_metadata = {
@ -584,7 +592,9 @@ async def index_obsidian_vault(
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
failed_count += 1 failed_count += 1
continue continue
@ -592,9 +602,7 @@ async def index_obsidian_vault(
await update_connector_last_indexed(session, connector, update_last_indexed) await update_connector_last_indexed(session, connector, update_last_indexed)
# Final commit for any remaining documents not yet committed in batches # Final commit for any remaining documents not yet committed in batches
logger.info( logger.info(f"Final commit: Total {indexed_count} Obsidian notes processed")
f"Final commit: Total {indexed_count} Obsidian notes processed"
)
try: try:
await session.commit() await session.commit()
logger.info( logger.info(

View file

@ -314,7 +314,9 @@ async def index_slack_messages(
# Document exists - check if content has changed # Document exists - check if content has changed
if existing_document.content_hash == content_hash: if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status, DocumentStatus.READY
):
existing_document.status = DocumentStatus.ready() existing_document.status = DocumentStatus.ready()
logger.info( logger.info(
f"Document for Slack message {msg_ts} in channel {channel_name} unchanged. Skipping." f"Document for Slack message {msg_ts} in channel {channel_name} unchanged. Skipping."
@ -323,18 +325,20 @@ async def index_slack_messages(
continue continue
# Queue existing document for update (will be set to processing in Phase 2) # Queue existing document for update (will be set to processing in Phase 2)
messages_to_process.append({ messages_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'combined_document_string': combined_document_string, "is_new": False,
'content_hash': content_hash, "combined_document_string": combined_document_string,
'channel_name': channel_name, "content_hash": content_hash,
'channel_id': channel_id, "channel_name": channel_name,
'msg_ts': msg_ts, "channel_id": channel_id,
'start_date': start_date_str, "msg_ts": msg_ts,
'end_date': end_date_str, "start_date": start_date_str,
'message_count': len(formatted_messages), "end_date": end_date_str,
}) "message_count": len(formatted_messages),
}
)
continue continue
# Document doesn't exist by unique_identifier_hash # Document doesn't exist by unique_identifier_hash
@ -377,18 +381,20 @@ async def index_slack_messages(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
messages_to_process.append({ messages_to_process.append(
'document': document, {
'is_new': True, "document": document,
'combined_document_string': combined_document_string, "is_new": True,
'content_hash': content_hash, "combined_document_string": combined_document_string,
'channel_name': channel_name, "content_hash": content_hash,
'channel_id': channel_id, "channel_name": channel_name,
'msg_ts': msg_ts, "channel_id": channel_id,
'start_date': start_date_str, "msg_ts": msg_ts,
'end_date': end_date_str, "start_date": start_date_str,
'message_count': len(formatted_messages), "end_date": end_date_str,
}) "message_count": len(formatted_messages),
}
)
logger.info( logger.info(
f"Phase 1: Collected {len(formatted_messages)} messages from channel {channel_name}" f"Phase 1: Collected {len(formatted_messages)} messages from channel {channel_name}"
@ -409,7 +415,9 @@ async def index_slack_messages(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -426,29 +434,29 @@ async def index_slack_messages(
await on_heartbeat_callback(documents_indexed) await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
await session.commit() await session.commit()
# Heavy processing (embeddings, chunks) # Heavy processing (embeddings, chunks)
chunks = await create_document_chunks(item['combined_document_string']) chunks = await create_document_chunks(item["combined_document_string"])
doc_embedding = config.embedding_model_instance.embed( doc_embedding = config.embedding_model_instance.embed(
item['combined_document_string'] item["combined_document_string"]
) )
# Update document to READY with actual content # Update document to READY with actual content
document.title = item['channel_name'] document.title = item["channel_name"]
document.content = item['combined_document_string'] document.content = item["combined_document_string"]
document.content_hash = item['content_hash'] document.content_hash = item["content_hash"]
document.embedding = doc_embedding document.embedding = doc_embedding
document.document_metadata = { document.document_metadata = {
"channel_name": item['channel_name'], "channel_name": item["channel_name"],
"channel_id": item['channel_id'], "channel_id": item["channel_id"],
"start_date": item['start_date'], "start_date": item["start_date"],
"end_date": item['end_date'], "end_date": item["end_date"],
"message_count": item['message_count'], "message_count": item["message_count"],
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"connector_id": connector_id, "connector_id": connector_id,
} }
@ -475,7 +483,9 @@ async def index_slack_messages(
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
documents_failed += 1 documents_failed += 1
continue continue

View file

@ -332,25 +332,31 @@ async def index_teams_messages(
# Document exists - check if content has changed # Document exists - check if content has changed
if existing_document.content_hash == content_hash: if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status = DocumentStatus.ready() existing_document.status, DocumentStatus.READY
):
existing_document.status = (
DocumentStatus.ready()
)
documents_skipped += 1 documents_skipped += 1
continue continue
# Queue existing document for update (will be set to processing in Phase 2) # Queue existing document for update (will be set to processing in Phase 2)
messages_to_process.append({ messages_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'combined_document_string': combined_document_string, "is_new": False,
'content_hash': content_hash, "combined_document_string": combined_document_string,
'team_name': team_name, "content_hash": content_hash,
'team_id': team_id, "team_name": team_name,
'channel_name': channel_name, "team_id": team_id,
'channel_id': channel_id, "channel_name": channel_name,
'message_id': message_id, "channel_id": channel_id,
'start_date': start_date_str, "message_id": message_id,
'end_date': end_date_str, "start_date": start_date_str,
}) "end_date": end_date_str,
}
)
continue continue
# Document doesn't exist by unique_identifier_hash # Document doesn't exist by unique_identifier_hash
@ -400,19 +406,21 @@ async def index_teams_messages(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
messages_to_process.append({ messages_to_process.append(
'document': document, {
'is_new': True, "document": document,
'combined_document_string': combined_document_string, "is_new": True,
'content_hash': content_hash, "combined_document_string": combined_document_string,
'team_name': team_name, "content_hash": content_hash,
'team_id': team_id, "team_name": team_name,
'channel_name': channel_name, "team_id": team_id,
'channel_id': channel_id, "channel_name": channel_name,
'message_id': message_id, "channel_id": channel_id,
'start_date': start_date_str, "message_id": message_id,
'end_date': end_date_str, "start_date": start_date_str,
}) "end_date": end_date_str,
}
)
except Exception as e: except Exception as e:
logger.error( logger.error(
@ -432,7 +440,9 @@ async def index_teams_messages(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -449,30 +459,30 @@ async def index_teams_messages(
await on_heartbeat_callback(documents_indexed) await on_heartbeat_callback(documents_indexed)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing() document.status = DocumentStatus.processing()
await session.commit() await session.commit()
# Heavy processing (embeddings, chunks) # Heavy processing (embeddings, chunks)
chunks = await create_document_chunks(item['combined_document_string']) chunks = await create_document_chunks(item["combined_document_string"])
doc_embedding = config.embedding_model_instance.embed( doc_embedding = config.embedding_model_instance.embed(
item['combined_document_string'] item["combined_document_string"]
) )
# Update document to READY with actual content # Update document to READY with actual content
document.title = f"{item['team_name']} - {item['channel_name']}" document.title = f"{item['team_name']} - {item['channel_name']}"
document.content = item['combined_document_string'] document.content = item["combined_document_string"]
document.content_hash = item['content_hash'] document.content_hash = item["content_hash"]
document.embedding = doc_embedding document.embedding = doc_embedding
document.document_metadata = { document.document_metadata = {
"team_name": item['team_name'], "team_name": item["team_name"],
"team_id": item['team_id'], "team_id": item["team_id"],
"channel_name": item['channel_name'], "channel_name": item["channel_name"],
"channel_id": item['channel_id'], "channel_id": item["channel_id"],
"start_date": item['start_date'], "start_date": item["start_date"],
"end_date": item['end_date'], "end_date": item["end_date"],
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"connector_id": connector_id, "connector_id": connector_id,
} }
@ -497,7 +507,9 @@ async def index_teams_messages(
document.status = DocumentStatus.failed(str(e)) document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
documents_failed += 1 documents_failed += 1
continue continue
@ -510,9 +522,7 @@ async def index_teams_messages(
) )
try: try:
await session.commit() await session.commit()
logger.info( logger.info("Successfully committed all Teams document changes to database")
"Successfully committed all Teams document changes to database"
)
except Exception as e: except Exception as e:
# Handle any remaining integrity errors gracefully (race conditions, etc.) # Handle any remaining integrity errors gracefully (race conditions, etc.)
if ( if (

View file

@ -184,22 +184,28 @@ async def index_crawled_urls(
if existing_document: if existing_document:
# Document exists - check if it's already being processed # Document exists - check if it's already being processed
if DocumentStatus.is_state(existing_document.status, DocumentStatus.PENDING): if DocumentStatus.is_state(
existing_document.status, DocumentStatus.PENDING
):
logger.info(f"URL {url} already pending. Skipping.") logger.info(f"URL {url} already pending. Skipping.")
documents_skipped += 1 documents_skipped += 1
continue continue
if DocumentStatus.is_state(existing_document.status, DocumentStatus.PROCESSING): if DocumentStatus.is_state(
existing_document.status, DocumentStatus.PROCESSING
):
logger.info(f"URL {url} already processing. Skipping.") logger.info(f"URL {url} already processing. Skipping.")
documents_skipped += 1 documents_skipped += 1
continue continue
# Queue existing document for potential update check # Queue existing document for potential update check
urls_to_process.append({ urls_to_process.append(
'document': existing_document, {
'is_new': False, "document": existing_document,
'url': url, "is_new": False,
'unique_identifier_hash': unique_identifier_hash, "url": url,
}) "unique_identifier_hash": unique_identifier_hash,
}
)
continue continue
# Create new document with PENDING status (visible in UI immediately) # Create new document with PENDING status (visible in UI immediately)
@ -224,12 +230,14 @@ async def index_crawled_urls(
session.add(document) session.add(document)
new_documents_created = True new_documents_created = True
urls_to_process.append({ urls_to_process.append(
'document': document, {
'is_new': True, "document": document,
'url': url, "is_new": True,
'unique_identifier_hash': unique_identifier_hash, "url": url,
}) "unique_identifier_hash": unique_identifier_hash,
}
)
except Exception as e: except Exception as e:
logger.error(f"Error in Phase 1 for URL {url}: {e!s}", exc_info=True) logger.error(f"Error in Phase 1 for URL {url}: {e!s}", exc_info=True)
@ -238,7 +246,9 @@ async def index_crawled_urls(
# Commit all pending documents - they all appear in UI now # Commit all pending documents - they all appear in UI now
if new_documents_created: if new_documents_created:
logger.info(f"Phase 1: Committing {len([u for u in urls_to_process if u['is_new']])} pending documents") logger.info(
f"Phase 1: Committing {len([u for u in urls_to_process if u['is_new']])} pending documents"
)
await session.commit() await session.commit()
# ======================================================================= # =======================================================================
@ -255,9 +265,9 @@ async def index_crawled_urls(
await on_heartbeat_callback(documents_indexed + documents_updated) await on_heartbeat_callback(documents_indexed + documents_updated)
last_heartbeat_time = current_time last_heartbeat_time = current_time
document = item['document'] document = item["document"]
url = item['url'] url = item["url"]
is_new = item['is_new'] is_new = item["is_new"]
try: try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
@ -298,7 +308,9 @@ async def index_crawled_urls(
continue continue
# Format content as structured document for summary generation # Format content as structured document for summary generation
structured_document = crawler.format_to_structured_document(crawl_result) structured_document = crawler.format_to_structured_document(
crawl_result
)
# Generate content hash using a version WITHOUT metadata # Generate content hash using a version WITHOUT metadata
structured_document_for_hash = crawler.format_to_structured_document( structured_document_for_hash = crawler.format_to_structured_document(
@ -339,7 +351,9 @@ async def index_crawled_urls(
f"(existing document ID: {duplicate_by_content.id}). " f"(existing document ID: {duplicate_by_content.id}). "
f"Marking as failed." f"Marking as failed."
) )
document.status = DocumentStatus.failed("Duplicate content exists") document.status = DocumentStatus.failed(
"Duplicate content exists"
)
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
await session.commit() await session.commit()
duplicate_content_count += 1 duplicate_content_count += 1
@ -360,7 +374,10 @@ async def index_crawled_urls(
"document_type": "Crawled URL", "document_type": "Crawled URL",
"crawler_type": crawler_type, "crawler_type": crawler_type,
} }
summary_content, summary_embedding = await generate_document_summary( (
summary_content,
summary_embedding,
) = await generate_document_summary(
structured_document, user_llm, document_metadata_for_summary structured_document, user_llm, document_metadata_for_summary
) )
else: else:
@ -423,7 +440,9 @@ async def index_crawled_urls(
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
await session.commit() await session.commit()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
documents_failed += 1 documents_failed += 1
continue continue
@ -438,7 +457,9 @@ async def index_crawled_urls(
) )
try: try:
await session.commit() await session.commit()
logger.info("Successfully committed all webcrawler document changes to database") logger.info(
"Successfully committed all webcrawler document changes to database"
)
except Exception as e: except Exception as e:
# Handle any remaining integrity errors gracefully # Handle any remaining integrity errors gracefully
if "duplicate key value violates unique constraint" in str(e).lower(): if "duplicate key value violates unique constraint" in str(e).lower():

View file

@ -39,7 +39,8 @@ def safe_set_chunks(document: Document, chunks: list) -> None:
safe_set_chunks(document, chunks) # Always safe safe_set_chunks(document, chunks) # Always safe
""" """
from sqlalchemy.orm.attributes import set_committed_value from sqlalchemy.orm.attributes import set_committed_value
set_committed_value(document, 'chunks', chunks)
set_committed_value(document, "chunks", chunks)
def get_current_timestamp() -> datetime: def get_current_timestamp() -> datetime:

View file

@ -91,7 +91,9 @@ async def add_circleback_meeting_document(
# Document exists - check if content has changed # Document exists - check if content has changed
if existing_document.content_hash == content_hash: if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending) # Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY): if not DocumentStatus.is_state(
existing_document.status, DocumentStatus.READY
):
existing_document.status = DocumentStatus.ready() existing_document.status = DocumentStatus.ready()
await session.commit() await session.commit()
logger.info(f"Circleback meeting {meeting_id} unchanged. Skipping.") logger.info(f"Circleback meeting {meeting_id} unchanged. Skipping.")
@ -267,7 +269,9 @@ async def add_circleback_meeting_document(
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
await session.commit() await session.commit()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
raise db_error raise db_error
except Exception as e: except Exception as e:
await session.rollback() await session.rollback()
@ -279,5 +283,7 @@ async def add_circleback_meeting_document(
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
await session.commit() await session.commit()
except Exception as status_error: except Exception as status_error:
logger.error(f"Failed to update document status to failed: {status_error}") logger.error(
f"Failed to update document status to failed: {status_error}"
)
raise RuntimeError(f"Failed to process Circleback meeting: {e!s}") from e raise RuntimeError(f"Failed to process Circleback meeting: {e!s}") from e

View file

@ -1670,8 +1670,13 @@ async def process_file_in_background_with_document(
if filename.lower().endswith((".md", ".markdown", ".txt")): if filename.lower().endswith((".md", ".markdown", ".txt")):
# Update notification: parsing stage # Update notification: parsing stage
if notification: if notification:
await NotificationService.document_processing.notify_processing_progress( await (
session, notification, stage="parsing", stage_message="Reading file" NotificationService.document_processing.notify_processing_progress(
session,
notification,
stage="parsing",
stage_message="Reading file",
)
) )
await task_logger.log_task_progress( await task_logger.log_task_progress(
@ -1695,8 +1700,13 @@ async def process_file_in_background_with_document(
): ):
# Update notification: parsing stage (transcription) # Update notification: parsing stage (transcription)
if notification: if notification:
await NotificationService.document_processing.notify_processing_progress( await (
session, notification, stage="parsing", stage_message="Transcribing audio" NotificationService.document_processing.notify_processing_progress(
session,
notification,
stage="parsing",
stage_message="Transcribing audio",
)
) )
await task_logger.log_task_progress( await task_logger.log_task_progress(
@ -1708,7 +1718,8 @@ async def process_file_in_background_with_document(
# Transcribe audio # Transcribe audio
stt_service_type = ( stt_service_type = (
"local" "local"
if app_config.STT_SERVICE and app_config.STT_SERVICE.startswith("local/") if app_config.STT_SERVICE
and app_config.STT_SERVICE.startswith("local/")
else "external" else "external"
) )
@ -1719,7 +1730,9 @@ async def process_file_in_background_with_document(
transcribed_text = result.get("text", "") transcribed_text = result.get("text", "")
if not transcribed_text: if not transcribed_text:
raise ValueError("Transcription returned empty text") raise ValueError("Transcription returned empty text")
markdown_content = f"# Transcription of {filename}\n\n{transcribed_text}" markdown_content = (
f"# Transcription of {filename}\n\n{transcribed_text}"
)
else: else:
with open(file_path, "rb") as audio_file: with open(file_path, "rb") as audio_file:
transcription_kwargs = { transcription_kwargs = {
@ -1728,12 +1741,18 @@ async def process_file_in_background_with_document(
"api_key": app_config.STT_SERVICE_API_KEY, "api_key": app_config.STT_SERVICE_API_KEY,
} }
if app_config.STT_SERVICE_API_BASE: if app_config.STT_SERVICE_API_BASE:
transcription_kwargs["api_base"] = app_config.STT_SERVICE_API_BASE transcription_kwargs["api_base"] = (
transcription_response = await atranscription(**transcription_kwargs) app_config.STT_SERVICE_API_BASE
)
transcription_response = await atranscription(
**transcription_kwargs
)
transcribed_text = transcription_response.get("text", "") transcribed_text = transcription_response.get("text", "")
if not transcribed_text: if not transcribed_text:
raise ValueError("Transcription returned empty text") raise ValueError("Transcription returned empty text")
markdown_content = f"# Transcription of {filename}\n\n{transcribed_text}" markdown_content = (
f"# Transcription of {filename}\n\n{transcribed_text}"
)
etl_service = "AUDIO_TRANSCRIPTION" etl_service = "AUDIO_TRANSCRIPTION"
# Clean up temp file # Clean up temp file
@ -1742,13 +1761,18 @@ async def process_file_in_background_with_document(
else: else:
# Document files - use ETL service # Document files - use ETL service
from app.services.page_limit_service import PageLimitExceededError, PageLimitService from app.services.page_limit_service import (
PageLimitExceededError,
PageLimitService,
)
page_limit_service = PageLimitService(session) page_limit_service = PageLimitService(session)
# Estimate page count # Estimate page count
try: try:
estimated_pages = page_limit_service.estimate_pages_before_processing(file_path) estimated_pages = page_limit_service.estimate_pages_before_processing(
file_path
)
except Exception: except Exception:
file_size = os.path.getsize(file_path) file_size = os.path.getsize(file_path)
estimated_pages = max(1, file_size // (80 * 1024)) estimated_pages = max(1, file_size // (80 * 1024))
@ -1759,14 +1783,22 @@ async def process_file_in_background_with_document(
if app_config.ETL_SERVICE == "UNSTRUCTURED": if app_config.ETL_SERVICE == "UNSTRUCTURED":
if notification: if notification:
await NotificationService.document_processing.notify_processing_progress( await NotificationService.document_processing.notify_processing_progress(
session, notification, stage="parsing", stage_message="Extracting content" session,
notification,
stage="parsing",
stage_message="Extracting content",
) )
from langchain_unstructured import UnstructuredLoader from langchain_unstructured import UnstructuredLoader
loader = UnstructuredLoader( loader = UnstructuredLoader(
file_path, mode="elements", post_processors=[], languages=["eng"], file_path,
include_orig_elements=False, include_metadata=False, strategy="auto" mode="elements",
post_processors=[],
languages=["eng"],
include_orig_elements=False,
include_metadata=False,
strategy="auto",
) )
docs = await loader.aload() docs = await loader.aload()
markdown_content = await convert_document_to_markdown(docs) markdown_content = await convert_document_to_markdown(docs)
@ -1775,37 +1807,55 @@ async def process_file_in_background_with_document(
etl_service = "UNSTRUCTURED" etl_service = "UNSTRUCTURED"
# Update page usage # Update page usage
await page_limit_service.update_page_usage(user_id, final_page_count, allow_exceed=True) await page_limit_service.update_page_usage(
user_id, final_page_count, allow_exceed=True
)
elif app_config.ETL_SERVICE == "LLAMACLOUD": elif app_config.ETL_SERVICE == "LLAMACLOUD":
if notification: if notification:
await NotificationService.document_processing.notify_processing_progress( await NotificationService.document_processing.notify_processing_progress(
session, notification, stage="parsing", stage_message="Extracting content" session,
notification,
stage="parsing",
stage_message="Extracting content",
) )
result = await parse_with_llamacloud_retry( result = await parse_with_llamacloud_retry(
file_path=file_path, estimated_pages=estimated_pages, file_path=file_path,
task_logger=task_logger, log_entry=log_entry estimated_pages=estimated_pages,
task_logger=task_logger,
log_entry=log_entry,
)
markdown_documents = await result.aget_markdown_documents(
split_by_page=False
) )
markdown_documents = await result.aget_markdown_documents(split_by_page=False)
if not markdown_documents: if not markdown_documents:
raise RuntimeError(f"LlamaCloud parsing returned no documents: {filename}") raise RuntimeError(
f"LlamaCloud parsing returned no documents: {filename}"
)
markdown_content = markdown_documents[0].text markdown_content = markdown_documents[0].text
etl_service = "LLAMACLOUD" etl_service = "LLAMACLOUD"
# Update page usage # Update page usage
await page_limit_service.update_page_usage(user_id, estimated_pages, allow_exceed=True) await page_limit_service.update_page_usage(
user_id, estimated_pages, allow_exceed=True
)
elif app_config.ETL_SERVICE == "DOCLING": elif app_config.ETL_SERVICE == "DOCLING":
if notification: if notification:
await NotificationService.document_processing.notify_processing_progress( await NotificationService.document_processing.notify_processing_progress(
session, notification, stage="parsing", stage_message="Extracting content" session,
notification,
stage="parsing",
stage_message="Extracting content",
) )
# Suppress logging during Docling import # Suppress logging during Docling import
getLogger("docling.pipeline.base_pipeline").setLevel(ERROR) getLogger("docling.pipeline.base_pipeline").setLevel(ERROR)
getLogger("docling.document_converter").setLevel(ERROR) getLogger("docling.document_converter").setLevel(ERROR)
getLogger("docling_core.transforms.chunker.hierarchical_chunker").setLevel(ERROR) getLogger(
"docling_core.transforms.chunker.hierarchical_chunker"
).setLevel(ERROR)
from docling.document_converter import DocumentConverter from docling.document_converter import DocumentConverter
@ -1815,7 +1865,9 @@ async def process_file_in_background_with_document(
etl_service = "DOCLING" etl_service = "DOCLING"
# Update page usage # Update page usage
await page_limit_service.update_page_usage(user_id, estimated_pages, allow_exceed=True) await page_limit_service.update_page_usage(
user_id, estimated_pages, allow_exceed=True
)
else: else:
raise RuntimeError(f"Unknown ETL_SERVICE: {app_config.ETL_SERVICE}") raise RuntimeError(f"Unknown ETL_SERVICE: {app_config.ETL_SERVICE}")
@ -1922,7 +1974,11 @@ async def process_file_in_background_with_document(
log_entry, log_entry,
error_message, error_message,
str(e), str(e),
{"error_type": type(e).__name__, "filename": filename, "document_id": document.id}, {
"error_type": type(e).__name__,
"filename": filename,
"document_id": document.id,
},
) )
logging.error(f"Error processing file with document: {error_message}") logging.error(f"Error processing file with document: {error_message}")
raise raise

View file

@ -136,11 +136,19 @@ async def add_youtube_video_document(
document = existing_document document = existing_document
is_new_document = False is_new_document = False
# Check if already being processed # Check if already being processed
if DocumentStatus.is_state(existing_document.status, DocumentStatus.PENDING): if DocumentStatus.is_state(
logging.info(f"YouTube video {video_id} already pending. Returning existing.") existing_document.status, DocumentStatus.PENDING
):
logging.info(
f"YouTube video {video_id} already pending. Returning existing."
)
return existing_document return existing_document
if DocumentStatus.is_state(existing_document.status, DocumentStatus.PROCESSING): if DocumentStatus.is_state(
logging.info(f"YouTube video {video_id} already processing. Returning existing.") existing_document.status, DocumentStatus.PROCESSING
):
logging.info(
f"YouTube video {video_id} already processing. Returning existing."
)
return existing_document return existing_document
else: else:
# Create new document with PENDING status (visible in UI immediately) # Create new document with PENDING status (visible in UI immediately)
@ -300,7 +308,9 @@ async def add_youtube_video_document(
"video_id": video_id, "video_id": video_id,
}, },
) )
logging.info(f"Document for YouTube video {video_id} unchanged. Marking as ready.") logging.info(
f"Document for YouTube video {video_id} unchanged. Marking as ready."
)
document.status = DocumentStatus.ready() document.status = DocumentStatus.ready()
await session.commit() await session.commit()
return document return document
@ -408,7 +418,9 @@ async def add_youtube_video_document(
# Mark document as failed if it exists # Mark document as failed if it exists
if document: if document:
try: try:
document.status = DocumentStatus.failed(f"Database error: {str(db_error)[:150]}") document.status = DocumentStatus.failed(
f"Database error: {str(db_error)[:150]}"
)
document.updated_at = get_current_timestamp() document.updated_at = get_current_timestamp()
await session.commit() await session.commit()
except Exception: except Exception:

View file

@ -38,7 +38,9 @@ export function DocumentTypeChip({ type, className }: { type: string; className?
className={`inline-flex items-center gap-1.5 rounded bg-muted/40 px-2 py-1 text-xs text-muted-foreground max-w-full overflow-hidden ${className ?? ""}`} className={`inline-flex items-center gap-1.5 rounded bg-muted/40 px-2 py-1 text-xs text-muted-foreground max-w-full overflow-hidden ${className ?? ""}`}
> >
<span className="opacity-80 flex-shrink-0">{icon}</span> <span className="opacity-80 flex-shrink-0">{icon}</span>
<span ref={textRef} className="truncate min-w-0">{fullLabel}</span> <span ref={textRef} className="truncate min-w-0">
{fullLabel}
</span>
</span> </span>
); );

View file

@ -68,9 +68,7 @@ export function DocumentsFilters({
const filteredTypes = useMemo(() => { const filteredTypes = useMemo(() => {
if (!typeSearchQuery.trim()) return uniqueTypes; if (!typeSearchQuery.trim()) return uniqueTypes;
const query = typeSearchQuery.toLowerCase(); const query = typeSearchQuery.toLowerCase();
return uniqueTypes.filter((type) => return uniqueTypes.filter((type) => getDocumentTypeLabel(type).toLowerCase().includes(query));
getDocumentTypeLabel(type).toLowerCase().includes(query)
);
}, [uniqueTypes, typeSearchQuery]); }, [uniqueTypes, typeSearchQuery]);
const typeCounts = useMemo(() => { const typeCounts = useMemo(() => {
@ -156,94 +154,95 @@ export function DocumentsFilters({
{/* Filter Buttons Group */} {/* Filter Buttons Group */}
<div className="flex items-center gap-2 flex-wrap"> <div className="flex items-center gap-2 flex-wrap">
{/* Type Filter */} {/* Type Filter */}
<Popover> <Popover>
<PopoverTrigger asChild> <PopoverTrigger asChild>
<Button <Button
variant="outline" variant="outline"
size="sm" size="sm"
className="h-9 gap-2 border-dashed border-border/60 text-muted-foreground hover:text-foreground hover:border-border" className="h-9 gap-2 border-dashed border-border/60 text-muted-foreground hover:text-foreground hover:border-border"
> >
<FileType size={14} className="text-muted-foreground" /> <FileType size={14} className="text-muted-foreground" />
<span className="hidden sm:inline">Type</span> <span className="hidden sm:inline">Type</span>
{activeTypes.length > 0 && ( {activeTypes.length > 0 && (
<span className="flex h-5 w-5 items-center justify-center rounded-full bg-primary text-[10px] font-medium text-primary-foreground"> <span className="flex h-5 w-5 items-center justify-center rounded-full bg-primary text-[10px] font-medium text-primary-foreground">
{activeTypes.length} {activeTypes.length}
</span> </span>
)} )}
</Button> </Button>
</PopoverTrigger> </PopoverTrigger>
<PopoverContent className="w-64 !p-0 overflow-hidden" align="end"> <PopoverContent className="w-64 !p-0 overflow-hidden" align="end">
<div> <div>
{/* Search input */} {/* Search input */}
<div className="p-2 border-b border-border/50"> <div className="p-2 border-b border-border/50">
<div className="relative"> <div className="relative">
<Search className="absolute left-0.5 top-1/2 -translate-y-1/2 h-4 w-4 text-muted-foreground" /> <Search className="absolute left-0.5 top-1/2 -translate-y-1/2 h-4 w-4 text-muted-foreground" />
<Input <Input
placeholder="Search types..." placeholder="Search types..."
value={typeSearchQuery} value={typeSearchQuery}
onChange={(e) => setTypeSearchQuery(e.target.value)} onChange={(e) => setTypeSearchQuery(e.target.value)}
className="h-6 pl-6 text-sm bg-transparent border-0 focus-visible:ring-0" className="h-6 pl-6 text-sm bg-transparent border-0 focus-visible:ring-0"
/> />
</div>
</div>
<div className="max-h-[300px] overflow-y-auto overflow-x-hidden py-1.5 px-1.5">
{filteredTypes.length === 0 ? (
<div className="py-6 text-center text-sm text-muted-foreground">
No types found
</div> </div>
) : ( </div>
filteredTypes.map((value: DocumentTypeEnum, i) => (
<button <div className="max-h-[300px] overflow-y-auto overflow-x-hidden py-1.5 px-1.5">
key={value} {filteredTypes.length === 0 ? (
type="button" <div className="py-6 text-center text-sm text-muted-foreground">
className="flex w-full items-center gap-2.5 py-2 px-3 rounded-md hover:bg-muted/50 transition-colors cursor-pointer text-left" No types found
onClick={() => onToggleType(value, !activeTypes.includes(value))} </div>
) : (
filteredTypes.map((value: DocumentTypeEnum, i) => (
<button
key={value}
type="button"
className="flex w-full items-center gap-2.5 py-2 px-3 rounded-md hover:bg-muted/50 transition-colors cursor-pointer text-left"
onClick={() => onToggleType(value, !activeTypes.includes(value))}
>
{/* Icon */}
<div className="flex h-7 w-7 shrink-0 items-center justify-center rounded-md bg-muted/50 text-foreground/80">
{getDocumentTypeIcon(value, "h-4 w-4")}
</div>
{/* Text content */}
<div className="flex flex-col min-w-0 flex-1 gap-0.5">
<span className="text-[13px] font-medium text-foreground truncate leading-tight">
{getDocumentTypeLabel(value)}
</span>
<span className="text-[11px] text-muted-foreground leading-tight">
{typeCounts.get(value)} document
{(typeCounts.get(value) ?? 0) !== 1 ? "s" : ""}
</span>
</div>
{/* Checkbox */}
<Checkbox
id={`${id}-${i}`}
checked={activeTypes.includes(value)}
onCheckedChange={(checked: boolean) => onToggleType(value, !!checked)}
className="h-4 w-4 shrink-0 rounded border-muted-foreground/30 data-[state=checked]:bg-primary data-[state=checked]:border-primary"
/>
</button>
))
)}
</div>
{activeTypes.length > 0 && (
<div className="px-3 pt-1.5 pb-1.5 border-t border-border/50">
<Button
variant="ghost"
size="sm"
className="w-full h-7 text-[11px] text-muted-foreground hover:text-foreground"
onClick={() => {
activeTypes.forEach((t) => {
onToggleType(t, false);
});
}}
> >
{/* Icon */} Clear filters
<div className="flex h-7 w-7 shrink-0 items-center justify-center rounded-md bg-muted/50 text-foreground/80"> </Button>
{getDocumentTypeIcon(value, "h-4 w-4")} </div>
</div>
{/* Text content */}
<div className="flex flex-col min-w-0 flex-1 gap-0.5">
<span className="text-[13px] font-medium text-foreground truncate leading-tight">
{getDocumentTypeLabel(value)}
</span>
<span className="text-[11px] text-muted-foreground leading-tight">
{typeCounts.get(value)} document{(typeCounts.get(value) ?? 0) !== 1 ? "s" : ""}
</span>
</div>
{/* Checkbox */}
<Checkbox
id={`${id}-${i}`}
checked={activeTypes.includes(value)}
onCheckedChange={(checked: boolean) => onToggleType(value, !!checked)}
className="h-4 w-4 shrink-0 rounded border-muted-foreground/30 data-[state=checked]:bg-primary data-[state=checked]:border-primary"
/>
</button>
))
)} )}
</div> </div>
{activeTypes.length > 0 && ( </PopoverContent>
<div className="px-3 pt-1.5 pb-1.5 border-t border-border/50"> </Popover>
<Button
variant="ghost"
size="sm"
className="w-full h-7 text-[11px] text-muted-foreground hover:text-foreground"
onClick={() => {
activeTypes.forEach((t) => {
onToggleType(t, false);
});
}}
>
Clear filters
</Button>
</div>
)}
</div>
</PopoverContent>
</Popover>
{/* Bulk Delete Button */} {/* Bulk Delete Button */}
{selectedIds.size > 0 && ( {selectedIds.size > 0 && (
@ -255,22 +254,14 @@ export function DocumentsFilters({
exit={{ opacity: 0, scale: 0.9 }} exit={{ opacity: 0, scale: 0.9 }}
> >
{/* Mobile: icon with count */} {/* Mobile: icon with count */}
<Button <Button variant="destructive" size="sm" className="h-9 gap-1.5 px-2.5 md:hidden">
variant="destructive"
size="sm"
className="h-9 gap-1.5 px-2.5 md:hidden"
>
<Trash size={14} /> <Trash size={14} />
<span className="flex h-5 w-5 items-center justify-center rounded-full bg-destructive-foreground/20 text-[10px] font-medium"> <span className="flex h-5 w-5 items-center justify-center rounded-full bg-destructive-foreground/20 text-[10px] font-medium">
{selectedIds.size} {selectedIds.size}
</span> </span>
</Button> </Button>
{/* Desktop: full button */} {/* Desktop: full button */}
<Button <Button variant="destructive" size="sm" className="h-9 gap-2 hidden md:flex">
variant="destructive"
size="sm"
className="h-9 gap-2 hidden md:flex"
>
<Trash size={14} /> <Trash size={14} />
Delete Delete
<span className="flex h-5 w-5 items-center justify-center rounded-full bg-destructive-foreground/20 text-[10px] font-medium"> <span className="flex h-5 w-5 items-center justify-center rounded-full bg-destructive-foreground/20 text-[10px] font-medium">
@ -288,9 +279,12 @@ export function DocumentsFilters({
<CircleAlert size={18} strokeWidth={2} /> <CircleAlert size={18} strokeWidth={2} />
</div> </div>
<AlertDialogHeader className="flex-1"> <AlertDialogHeader className="flex-1">
<AlertDialogTitle>Delete {selectedIds.size} document{selectedIds.size !== 1 ? "s" : ""}?</AlertDialogTitle> <AlertDialogTitle>
Delete {selectedIds.size} document{selectedIds.size !== 1 ? "s" : ""}?
</AlertDialogTitle>
<AlertDialogDescription> <AlertDialogDescription>
This action cannot be undone. This will permanently delete the selected {selectedIds.size === 1 ? "document" : "documents"} from your search space. This action cannot be undone. This will permanently delete the selected{" "}
{selectedIds.size === 1 ? "document" : "documents"} from your search space.
</AlertDialogDescription> </AlertDialogDescription>
</AlertDialogHeader> </AlertDialogHeader>
</div> </div>

View file

@ -1,7 +1,20 @@
"use client"; "use client";
import { formatDistanceToNow } from "date-fns"; import { formatDistanceToNow } from "date-fns";
import { AlertCircle, Calendar, CheckCircle2, ChevronDown, ChevronUp, Clock, FileText, FileX, Loader2, Network, Plus, User } from "lucide-react"; import {
AlertCircle,
Calendar,
CheckCircle2,
ChevronDown,
ChevronUp,
Clock,
FileText,
FileX,
Loader2,
Network,
Plus,
User,
} from "lucide-react";
import { motion } from "motion/react"; import { motion } from "motion/react";
import { useTranslations } from "next-intl"; import { useTranslations } from "next-intl";
import React, { useRef, useState, useEffect, useCallback } from "react"; import React, { useRef, useState, useEffect, useCallback } from "react";
@ -10,12 +23,7 @@ import { JsonMetadataViewer } from "@/components/json-metadata-viewer";
import { MarkdownViewer } from "@/components/markdown-viewer"; import { MarkdownViewer } from "@/components/markdown-viewer";
import { Button } from "@/components/ui/button"; import { Button } from "@/components/ui/button";
import { Checkbox } from "@/components/ui/checkbox"; import { Checkbox } from "@/components/ui/checkbox";
import { import { Dialog, DialogContent, DialogHeader, DialogTitle } from "@/components/ui/dialog";
Dialog,
DialogContent,
DialogHeader,
DialogTitle,
} from "@/components/ui/dialog";
import { Skeleton } from "@/components/ui/skeleton"; import { Skeleton } from "@/components/ui/skeleton";
import { Spinner } from "@/components/ui/spinner"; import { Spinner } from "@/components/ui/spinner";
import { import {
@ -176,12 +184,10 @@ function SortableHeader({
> >
{icon && <span className="opacity-60">{icon}</span>} {icon && <span className="opacity-60">{icon}</span>}
{children} {children}
<span className={`transition-opacity ${isActive ? "opacity-100" : "opacity-0 group-hover:opacity-50"}`}> <span
{isActive && sortDesc ? ( className={`transition-opacity ${isActive ? "opacity-100" : "opacity-0 group-hover:opacity-50"}`}
<ChevronDown size={14} /> >
) : ( {isActive && sortDesc ? <ChevronDown size={14} /> : <ChevronUp size={14} />}
<ChevronUp size={14} />
)}
</span> </span>
</button> </button>
); );
@ -300,8 +306,10 @@ export function DocumentsTableShell({
// Only consider selectable documents for "select all" logic // Only consider selectable documents for "select all" logic
const selectableDocs = sorted.filter(isSelectable); const selectableDocs = sorted.filter(isSelectable);
const allSelectedOnPage = selectableDocs.length > 0 && selectableDocs.every((d) => selectedIds.has(d.id)); const allSelectedOnPage =
const someSelectedOnPage = selectableDocs.some((d) => selectedIds.has(d.id)) && !allSelectedOnPage; selectableDocs.length > 0 && selectableDocs.every((d) => selectedIds.has(d.id));
const someSelectedOnPage =
selectableDocs.some((d) => selectedIds.has(d.id)) && !allSelectedOnPage;
const toggleAll = (checked: boolean) => { const toggleAll = (checked: boolean) => {
const next = new Set(selectedIds); const next = new Set(selectedIds);
@ -388,10 +396,7 @@ export function DocumentsTableShell({
</div> </div>
</TableCell> </TableCell>
<TableCell className="w-[35%] py-2.5 max-w-0 border-r border-border/40"> <TableCell className="w-[35%] py-2.5 max-w-0 border-r border-border/40">
<Skeleton <Skeleton className="h-4" style={{ width: `${widthPercent}%` }} />
className="h-4"
style={{ width: `${widthPercent}%` }}
/>
</TableCell> </TableCell>
{columnVisibility.document_type && ( {columnVisibility.document_type && (
<TableCell className="w-[20%] min-w-[120px] max-w-[200px] py-2.5 border-r border-border/40 overflow-hidden"> <TableCell className="w-[20%] min-w-[120px] max-w-[200px] py-2.5 border-r border-border/40 overflow-hidden">
@ -429,24 +434,15 @@ export function DocumentsTableShell({
<div className="flex items-start gap-3"> <div className="flex items-start gap-3">
<Skeleton className="h-4 w-4 mt-0.5 rounded" /> <Skeleton className="h-4 w-4 mt-0.5 rounded" />
<div className="flex-1 min-w-0 space-y-2"> <div className="flex-1 min-w-0 space-y-2">
<Skeleton <Skeleton className="h-4" style={{ width: `${widthPercent}%` }} />
className="h-4"
style={{ width: `${widthPercent}%` }}
/>
<div className="flex flex-wrap items-center gap-2"> <div className="flex flex-wrap items-center gap-2">
<Skeleton className="h-5 w-20 rounded" /> <Skeleton className="h-5 w-20 rounded" />
{columnVisibility.created_by && ( {columnVisibility.created_by && <Skeleton className="h-3 w-14" />}
<Skeleton className="h-3 w-14" /> {columnVisibility.created_at && <Skeleton className="h-3 w-20" />}
)}
{columnVisibility.created_at && (
<Skeleton className="h-3 w-20" />
)}
</div> </div>
</div> </div>
<div className="flex items-center gap-2"> <div className="flex items-center gap-2">
{columnVisibility.status && ( {columnVisibility.status && <Skeleton className="h-5 w-5 rounded-full" />}
<Skeleton className="h-5 w-5 rounded-full" />
)}
<Skeleton className="h-7 w-7 rounded" /> <Skeleton className="h-7 w-7 rounded" />
</div> </div>
</div> </div>
@ -549,9 +545,7 @@ export function DocumentsTableShell({
)} )}
{columnVisibility.status && ( {columnVisibility.status && (
<TableHead className="w-20 text-center"> <TableHead className="w-20 text-center">
<span className="text-sm font-medium text-muted-foreground/70"> <span className="text-sm font-medium text-muted-foreground/70">Status</span>
Status
</span>
</TableHead> </TableHead>
)} )}
<TableHead className="w-10"> <TableHead className="w-10">
@ -580,9 +574,7 @@ export function DocumentsTableShell({
}, },
}} }}
className={`border-b border-border/40 transition-colors ${ className={`border-b border-border/40 transition-colors ${
isSelected isSelected ? "bg-primary/5 hover:bg-primary/8" : "hover:bg-muted/30"
? "bg-primary/5 hover:bg-primary/8"
: "hover:bg-muted/30"
}`} }`}
> >
<TableCell className="w-8 px-0 py-2.5 text-center"> <TableCell className="w-8 px-0 py-2.5 text-center">
@ -591,7 +583,9 @@ export function DocumentsTableShell({
checked={isSelected} checked={isSelected}
onCheckedChange={(v) => canSelect && toggleOne(doc.id, !!v)} onCheckedChange={(v) => canSelect && toggleOne(doc.id, !!v)}
disabled={!canSelect} disabled={!canSelect}
aria-label={canSelect ? "Select row" : "Cannot select while processing"} aria-label={
canSelect ? "Select row" : "Cannot select while processing"
}
className={`border-foreground data-[state=checked]:bg-primary data-[state=checked]:border-primary ${!canSelect ? "opacity-40 cursor-not-allowed" : ""}`} className={`border-foreground data-[state=checked]:bg-primary data-[state=checked]:border-primary ${!canSelect ? "opacity-40 cursor-not-allowed" : ""}`}
/> />
</div> </div>
@ -639,7 +633,9 @@ export function DocumentsTableShell({
<TableCell className="w-32 py-2.5 text-sm text-foreground border-r border-border/40"> <TableCell className="w-32 py-2.5 text-sm text-foreground border-r border-border/40">
<Tooltip> <Tooltip>
<TooltipTrigger asChild> <TooltipTrigger asChild>
<span className="cursor-default">{formatRelativeDate(doc.created_at)}</span> <span className="cursor-default">
{formatRelativeDate(doc.created_at)}
</span>
</TooltipTrigger> </TooltipTrigger>
<TooltipContent side="top"> <TooltipContent side="top">
{formatAbsoluteDate(doc.created_at)} {formatAbsoluteDate(doc.created_at)}
@ -720,9 +716,7 @@ export function DocumentsTableShell({
<div className="flex flex-wrap items-center gap-2"> <div className="flex flex-wrap items-center gap-2">
<DocumentTypeChip type={doc.document_type} /> <DocumentTypeChip type={doc.document_type} />
{columnVisibility.created_by && doc.created_by_name && ( {columnVisibility.created_by && doc.created_by_name && (
<span className="text-xs text-foreground"> <span className="text-xs text-foreground">{doc.created_by_name}</span>
{doc.created_by_name}
</span>
)} )}
{columnVisibility.created_at && ( {columnVisibility.created_at && (
<Tooltip> <Tooltip>

View file

@ -46,7 +46,8 @@ export function RowActions({
); );
// Documents in "pending" or "processing" state should show disabled delete // Documents in "pending" or "processing" state should show disabled delete
const isBeingProcessed = document.status?.state === "pending" || document.status?.state === "processing"; const isBeingProcessed =
document.status?.state === "pending" || document.status?.state === "processing";
// SURFSENSE_DOCS are system-managed and should not show delete at all // SURFSENSE_DOCS are system-managed and should not show delete at all
const shouldShowDelete = !NON_DELETABLE_DOCUMENT_TYPES.includes( const shouldShowDelete = !NON_DELETABLE_DOCUMENT_TYPES.includes(
@ -67,8 +68,9 @@ export function RowActions({
} catch (error: unknown) { } catch (error: unknown) {
console.error("Error deleting document:", error); console.error("Error deleting document:", error);
// Check for 409 Conflict (document started processing after UI loaded) // Check for 409 Conflict (document started processing after UI loaded)
const status = (error as { response?: { status?: number } })?.response?.status const status =
?? (error as { status?: number })?.status; (error as { response?: { status?: number } })?.response?.status ??
(error as { status?: number })?.status;
if (status === 409) { if (status === 409) {
toast.error("Document is now being processed. Please try again later."); toast.error("Document is now being processed. Please try again later.");
} else { } else {
@ -92,7 +94,11 @@ export function RowActions({
// Editable documents: show 3-dot dropdown with edit + delete // Editable documents: show 3-dot dropdown with edit + delete
<DropdownMenu> <DropdownMenu>
<DropdownMenuTrigger asChild> <DropdownMenuTrigger asChild>
<Button variant="ghost" size="icon" className="h-8 w-8 text-muted-foreground hover:text-foreground hover:bg-muted/80"> <Button
variant="ghost"
size="icon"
className="h-8 w-8 text-muted-foreground hover:text-foreground hover:bg-muted/80"
>
<MoreHorizontal className="h-4 w-4" /> <MoreHorizontal className="h-4 w-4" />
<span className="sr-only">Open menu</span> <span className="sr-only">Open menu</span>
</Button> </Button>
@ -101,7 +107,9 @@ export function RowActions({
<DropdownMenuItem <DropdownMenuItem
onClick={() => !isEditDisabled && handleEdit()} onClick={() => !isEditDisabled && handleEdit()}
disabled={isEditDisabled} disabled={isEditDisabled}
className={isEditDisabled ? "text-muted-foreground cursor-not-allowed opacity-50" : ""} className={
isEditDisabled ? "text-muted-foreground cursor-not-allowed opacity-50" : ""
}
> >
<Pencil className="mr-2 h-4 w-4" /> <Pencil className="mr-2 h-4 w-4" />
<span>Edit</span> <span>Edit</span>
@ -110,7 +118,11 @@ export function RowActions({
<DropdownMenuItem <DropdownMenuItem
onClick={() => !isDeleteDisabled && setIsDeleteOpen(true)} onClick={() => !isDeleteDisabled && setIsDeleteOpen(true)}
disabled={isDeleteDisabled} disabled={isDeleteDisabled}
className={isDeleteDisabled ? "text-muted-foreground cursor-not-allowed opacity-50" : "text-destructive focus:text-destructive"} className={
isDeleteDisabled
? "text-muted-foreground cursor-not-allowed opacity-50"
: "text-destructive focus:text-destructive"
}
> >
<Trash2 className="mr-2 h-4 w-4" /> <Trash2 className="mr-2 h-4 w-4" />
<span>Delete</span> <span>Delete</span>
@ -150,7 +162,9 @@ export function RowActions({
<DropdownMenuItem <DropdownMenuItem
onClick={() => !isEditDisabled && handleEdit()} onClick={() => !isEditDisabled && handleEdit()}
disabled={isEditDisabled} disabled={isEditDisabled}
className={isEditDisabled ? "text-muted-foreground cursor-not-allowed opacity-50" : ""} className={
isEditDisabled ? "text-muted-foreground cursor-not-allowed opacity-50" : ""
}
> >
<Pencil className="mr-2 h-4 w-4" /> <Pencil className="mr-2 h-4 w-4" />
<span>Edit</span> <span>Edit</span>
@ -159,7 +173,11 @@ export function RowActions({
<DropdownMenuItem <DropdownMenuItem
onClick={() => !isDeleteDisabled && setIsDeleteOpen(true)} onClick={() => !isDeleteDisabled && setIsDeleteOpen(true)}
disabled={isDeleteDisabled} disabled={isDeleteDisabled}
className={isDeleteDisabled ? "text-muted-foreground cursor-not-allowed opacity-50" : "text-destructive focus:text-destructive"} className={
isDeleteDisabled
? "text-muted-foreground cursor-not-allowed opacity-50"
: "text-destructive focus:text-destructive"
}
> >
<Trash2 className="mr-2 h-4 w-4" /> <Trash2 className="mr-2 h-4 w-4" />
<span>Delete</span> <span>Delete</span>

View file

@ -116,13 +116,15 @@ export default function DocumentsTable() {
created_by_id: item.created_by_id ?? null, created_by_id: item.created_by_id ?? null,
created_by_name: item.created_by_name ?? null, created_by_name: item.created_by_name ?? null,
created_at: item.created_at, created_at: item.created_at,
status: (item as { status?: { state: "ready" | "pending" | "processing" | "failed"; reason?: string } }).status ?? { state: "ready" as const }, status: (
item as {
status?: { state: "ready" | "pending" | "processing" | "failed"; reason?: string };
}
).status ?? { state: "ready" as const },
})) }))
: paginatedRealtimeDocuments; : paginatedRealtimeDocuments;
const displayTotal = isSearchMode const displayTotal = isSearchMode ? searchResponse?.total || 0 : sortedRealtimeDocuments.length;
? searchResponse?.total || 0
: sortedRealtimeDocuments.length;
const loading = isSearchMode ? isSearchLoading : realtimeLoading; const loading = isSearchMode ? isSearchLoading : realtimeLoading;
const error = isSearchMode ? searchError : realtimeError; const error = isSearchMode ? searchError : realtimeError;
@ -150,11 +152,11 @@ export default function DocumentsTable() {
// For real-time mode, use sortedRealtimeDocuments (which has status) // For real-time mode, use sortedRealtimeDocuments (which has status)
// For search mode, use searchResponse items (need to safely access status) // For search mode, use searchResponse items (need to safely access status)
const allDocs = isSearchMode const allDocs = isSearchMode
? (searchResponse?.items || []).map(item => ({ ? (searchResponse?.items || []).map((item) => ({
id: item.id, id: item.id,
status: (item as { status?: { state: string } }).status, status: (item as { status?: { state: string } }).status,
})) }))
: sortedRealtimeDocuments.map(doc => ({ id: doc.id, status: doc.status })); : sortedRealtimeDocuments.map((doc) => ({ id: doc.id, status: doc.status }));
const selectedDocs = allDocs.filter((doc) => selectedIds.has(doc.id)); const selectedDocs = allDocs.filter((doc) => selectedIds.has(doc.id));
const deletableIds = selectedDocs const deletableIds = selectedDocs
@ -163,7 +165,9 @@ export default function DocumentsTable() {
const inProgressCount = selectedIds.size - deletableIds.length; const inProgressCount = selectedIds.size - deletableIds.length;
if (inProgressCount > 0) { if (inProgressCount > 0) {
toast.warning(`${inProgressCount} document(s) are pending or processing and cannot be deleted.`); toast.warning(
`${inProgressCount} document(s) are pending or processing and cannot be deleted.`
);
} }
if (deletableIds.length === 0) { if (deletableIds.length === 0) {
@ -180,8 +184,9 @@ export default function DocumentsTable() {
await deleteDocumentMutation({ id }); await deleteDocumentMutation({ id });
return true; return true;
} catch (error: unknown) { } catch (error: unknown) {
const status = (error as { response?: { status?: number } })?.response?.status const status =
?? (error as { status?: number })?.status; (error as { response?: { status?: number } })?.response?.status ??
(error as { status?: number })?.status;
if (status === 409) conflictCount++; if (status === 409) conflictCount++;
return false; return false;
} }
@ -210,21 +215,24 @@ export default function DocumentsTable() {
}; };
// Single document delete handler for RowActions // Single document delete handler for RowActions
const handleDeleteDocument = useCallback(async (id: number): Promise<boolean> => { const handleDeleteDocument = useCallback(
try { async (id: number): Promise<boolean> => {
await deleteDocumentMutation({ id }); try {
toast.success(t("delete_success") || "Document deleted"); await deleteDocumentMutation({ id });
// If in search mode, refetch search results to reflect deletion toast.success(t("delete_success") || "Document deleted");
if (isSearchMode) { // If in search mode, refetch search results to reflect deletion
await refetchSearch(); if (isSearchMode) {
await refetchSearch();
}
// Real-time mode: Electric will sync the deletion automatically
return true;
} catch (e) {
console.error("Error deleting document:", e);
return false;
} }
// Real-time mode: Electric will sync the deletion automatically },
return true; [deleteDocumentMutation, isSearchMode, refetchSearch, t]
} catch (e) { );
console.error("Error deleting document:", e);
return false;
}
}, [deleteDocumentMutation, isSearchMode, refetchSearch, t]);
const handleSortChange = useCallback((key: SortKey) => { const handleSortChange = useCallback((key: SortKey) => {
setSortKey((currentKey) => { setSortKey((currentKey) => {

View file

@ -2,4 +2,3 @@ import { atom } from "jotai";
// Atom to control the connector dialog open state from anywhere in the app // Atom to control the connector dialog open state from anywhere in the app
export const connectorDialogOpenAtom = atom(false); export const connectorDialogOpenAtom = atom(false);

View file

@ -191,7 +191,9 @@ export const ConnectorIndicator: FC<{ hideTrigger?: boolean }> = ({ hideTrigger
{!hideTrigger && ( {!hideTrigger && (
<TooltipIconButton <TooltipIconButton
data-joyride="connector-icon" data-joyride="connector-icon"
tooltip={hasConnectors ? `Manage ${activeConnectorsCount} connectors` : "Connect your data"} tooltip={
hasConnectors ? `Manage ${activeConnectorsCount} connectors` : "Connect your data"
}
side="bottom" side="bottom"
className={cn( className={cn(
"size-[34px] rounded-full p-1 flex items-center justify-center transition-colors relative", "size-[34px] rounded-full p-1 flex items-center justify-center transition-colors relative",

View file

@ -346,13 +346,13 @@ export const useConnectorDialog = () => {
const connectorId = parseInt(params.connectorId, 10); const connectorId = parseInt(params.connectorId, 10);
newConnector = result.data.find((c: SearchSourceConnector) => c.id === connectorId); newConnector = result.data.find((c: SearchSourceConnector) => c.id === connectorId);
// If we found the connector, find the matching OAuth/Composio connector by type // If we found the connector, find the matching OAuth/Composio connector by type
if (newConnector) { if (newConnector) {
const connectorType = newConnector.connector_type; const connectorType = newConnector.connector_type;
oauthConnector = oauthConnector =
OAUTH_CONNECTORS.find((c) => c.connectorType === connectorType) || OAUTH_CONNECTORS.find((c) => c.connectorType === connectorType) ||
COMPOSIO_CONNECTORS.find((c) => c.connectorType === connectorType); COMPOSIO_CONNECTORS.find((c) => c.connectorType === connectorType);
} }
} }
// If we don't have a connector yet, try to find by connector param // If we don't have a connector yet, try to find by connector param
@ -361,12 +361,12 @@ export const useConnectorDialog = () => {
OAUTH_CONNECTORS.find((c) => c.id === params.connector) || OAUTH_CONNECTORS.find((c) => c.id === params.connector) ||
COMPOSIO_CONNECTORS.find((c) => c.id === params.connector); COMPOSIO_CONNECTORS.find((c) => c.id === params.connector);
if (oauthConnector) { if (oauthConnector) {
const oauthConnectorType = oauthConnector.connectorType; const oauthConnectorType = oauthConnector.connectorType;
newConnector = result.data.find( newConnector = result.data.find(
(c: SearchSourceConnector) => c.connector_type === oauthConnectorType (c: SearchSourceConnector) => c.connector_type === oauthConnectorType
); );
} }
} }
if (newConnector && oauthConnector) { if (newConnector && oauthConnector) {
@ -679,11 +679,11 @@ export const useConnectorDialog = () => {
}, },
}); });
const successMessage = const successMessage =
currentConnectorType === "MCP_CONNECTOR" currentConnectorType === "MCP_CONNECTOR"
? `${connector.name} added successfully` ? `${connector.name} added successfully`
: `${connectorTitle} connected and syncing started!`; : `${connectorTitle} connected and syncing started!`;
toast.success(successMessage); toast.success(successMessage);
const url = new URL(window.location.href); const url = new URL(window.location.href);
url.searchParams.delete("modal"); url.searchParams.delete("modal");

View file

@ -8,172 +8,167 @@ import { cn } from "@/lib/utils";
// /////////////////////////////////////////////////////////////////////////// // ///////////////////////////////////////////////////////////////////////////
// Types // Types
export type AnimationVariant = export type AnimationVariant = "circle" | "rectangle" | "gif" | "polygon" | "circle-blur";
| "circle"
| "rectangle"
| "gif"
| "polygon"
| "circle-blur";
export type AnimationStart = export type AnimationStart =
| "top-left" | "top-left"
| "top-right" | "top-right"
| "bottom-left" | "bottom-left"
| "bottom-right" | "bottom-right"
| "center" | "center"
| "top-center" | "top-center"
| "bottom-center" | "bottom-center"
| "bottom-up" | "bottom-up"
| "top-down" | "top-down"
| "left-right" | "left-right"
| "right-left"; | "right-left";
interface Animation { interface Animation {
name: string; name: string;
css: string; css: string;
} }
// /////////////////////////////////////////////////////////////////////////// // ///////////////////////////////////////////////////////////////////////////
// Helper functions // Helper functions
const getPositionCoords = (position: AnimationStart) => { const getPositionCoords = (position: AnimationStart) => {
switch (position) { switch (position) {
case "top-left": case "top-left":
return { cx: "0", cy: "0" }; return { cx: "0", cy: "0" };
case "top-right": case "top-right":
return { cx: "40", cy: "0" }; return { cx: "40", cy: "0" };
case "bottom-left": case "bottom-left":
return { cx: "0", cy: "40" }; return { cx: "0", cy: "40" };
case "bottom-right": case "bottom-right":
return { cx: "40", cy: "40" }; return { cx: "40", cy: "40" };
case "top-center": case "top-center":
return { cx: "20", cy: "0" }; return { cx: "20", cy: "0" };
case "bottom-center": case "bottom-center":
return { cx: "20", cy: "40" }; return { cx: "20", cy: "40" };
case "bottom-up": case "bottom-up":
case "top-down": case "top-down":
case "left-right": case "left-right":
case "right-left": case "right-left":
return { cx: "20", cy: "20" }; return { cx: "20", cy: "20" };
} }
}; };
const generateSVG = (variant: AnimationVariant, start: AnimationStart) => { const generateSVG = (variant: AnimationVariant, start: AnimationStart) => {
if (variant === "circle-blur") { if (variant === "circle-blur") {
if (start === "center") { if (start === "center") {
return `data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 40 40"><defs><filter id="blur"><feGaussianBlur stdDeviation="2"/></filter></defs><circle cx="20" cy="20" r="18" fill="white" filter="url(%23blur)"/></svg>`; return `data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 40 40"><defs><filter id="blur"><feGaussianBlur stdDeviation="2"/></filter></defs><circle cx="20" cy="20" r="18" fill="white" filter="url(%23blur)"/></svg>`;
} }
const positionCoords = getPositionCoords(start); const positionCoords = getPositionCoords(start);
if (!positionCoords) { if (!positionCoords) {
throw new Error(`Invalid start position: ${start}`); throw new Error(`Invalid start position: ${start}`);
} }
const { cx, cy } = positionCoords; const { cx, cy } = positionCoords;
return `data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 40 40"><defs><filter id="blur"><feGaussianBlur stdDeviation="2"/></filter></defs><circle cx="${cx}" cy="${cy}" r="18" fill="white" filter="url(%23blur)"/></svg>`; return `data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 40 40"><defs><filter id="blur"><feGaussianBlur stdDeviation="2"/></filter></defs><circle cx="${cx}" cy="${cy}" r="18" fill="white" filter="url(%23blur)"/></svg>`;
} }
if (start === "center") return; if (start === "center") return;
if (variant === "rectangle") return ""; if (variant === "rectangle") return "";
const positionCoords = getPositionCoords(start); const positionCoords = getPositionCoords(start);
if (!positionCoords) { if (!positionCoords) {
throw new Error(`Invalid start position: ${start}`); throw new Error(`Invalid start position: ${start}`);
} }
const { cx, cy } = positionCoords; const { cx, cy } = positionCoords;
if (variant === "circle") { if (variant === "circle") {
return `data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 40 40"><circle cx="${cx}" cy="${cy}" r="20" fill="white"/></svg>`; return `data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 40 40"><circle cx="${cx}" cy="${cy}" r="20" fill="white"/></svg>`;
} }
return ""; return "";
}; };
const getTransformOrigin = (start: AnimationStart) => { const getTransformOrigin = (start: AnimationStart) => {
switch (start) { switch (start) {
case "top-left": case "top-left":
return "top left"; return "top left";
case "top-right": case "top-right":
return "top right"; return "top right";
case "bottom-left": case "bottom-left":
return "bottom left"; return "bottom left";
case "bottom-right": case "bottom-right":
return "bottom right"; return "bottom right";
case "top-center": case "top-center":
return "top center"; return "top center";
case "bottom-center": case "bottom-center":
return "bottom center"; return "bottom center";
case "bottom-up": case "bottom-up":
case "top-down": case "top-down":
case "left-right": case "left-right":
case "right-left": case "right-left":
return "center"; return "center";
} }
}; };
export const createAnimation = ( export const createAnimation = (
variant: AnimationVariant, variant: AnimationVariant,
start: AnimationStart = "center", start: AnimationStart = "center",
blur = false, blur = false,
url?: string, url?: string
): Animation => { ): Animation => {
const svg = generateSVG(variant, start); const svg = generateSVG(variant, start);
const transformOrigin = getTransformOrigin(start); const transformOrigin = getTransformOrigin(start);
if (variant === "rectangle") { if (variant === "rectangle") {
const getClipPath = (direction: AnimationStart) => { const getClipPath = (direction: AnimationStart) => {
switch (direction) { switch (direction) {
case "bottom-up": case "bottom-up":
return { return {
from: "polygon(0% 100%, 100% 100%, 100% 100%, 0% 100%)", from: "polygon(0% 100%, 100% 100%, 100% 100%, 0% 100%)",
to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)", to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
}; };
case "top-down": case "top-down":
return { return {
from: "polygon(0% 0%, 100% 0%, 100% 0%, 0% 0%)", from: "polygon(0% 0%, 100% 0%, 100% 0%, 0% 0%)",
to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)", to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
}; };
case "left-right": case "left-right":
return { return {
from: "polygon(0% 0%, 0% 0%, 0% 100%, 0% 100%)", from: "polygon(0% 0%, 0% 0%, 0% 100%, 0% 100%)",
to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)", to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
}; };
case "right-left": case "right-left":
return { return {
from: "polygon(100% 0%, 100% 0%, 100% 100%, 100% 100%)", from: "polygon(100% 0%, 100% 0%, 100% 100%, 100% 100%)",
to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)", to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
}; };
case "top-left": case "top-left":
return { return {
from: "polygon(0% 0%, 0% 0%, 0% 0%, 0% 0%)", from: "polygon(0% 0%, 0% 0%, 0% 0%, 0% 0%)",
to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)", to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
}; };
case "top-right": case "top-right":
return { return {
from: "polygon(100% 0%, 100% 0%, 100% 0%, 100% 0%)", from: "polygon(100% 0%, 100% 0%, 100% 0%, 100% 0%)",
to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)", to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
}; };
case "bottom-left": case "bottom-left":
return { return {
from: "polygon(0% 100%, 0% 100%, 0% 100%, 0% 100%)", from: "polygon(0% 100%, 0% 100%, 0% 100%, 0% 100%)",
to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)", to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
}; };
case "bottom-right": case "bottom-right":
return { return {
from: "polygon(100% 100%, 100% 100%, 100% 100%, 100% 100%)", from: "polygon(100% 100%, 100% 100%, 100% 100%, 100% 100%)",
to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)", to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
}; };
default: default:
return { return {
from: "polygon(0% 100%, 100% 100%, 100% 100%, 0% 100%)", from: "polygon(0% 100%, 100% 100%, 100% 100%, 0% 100%)",
to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)", to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
}; };
} }
}; };
const clipPath = getClipPath(start); const clipPath = getClipPath(start);
return { return {
name: `${variant}-${start}${blur ? "-blur" : ""}`, name: `${variant}-${start}${blur ? "-blur" : ""}`,
css: ` css: `
::view-transition-group(root) { ::view-transition-group(root) {
animation-duration: 0.7s; animation-duration: 0.7s;
animation-timing-function: var(--expo-out); animation-timing-function: var(--expo-out);
@ -218,12 +213,12 @@ export const createAnimation = (
} }
} }
`, `,
}; };
} }
if (variant === "circle" && start == "center") { if (variant === "circle" && start == "center") {
return { return {
name: `${variant}-${start}${blur ? "-blur" : ""}`, name: `${variant}-${start}${blur ? "-blur" : ""}`,
css: ` css: `
::view-transition-group(root) { ::view-transition-group(root) {
animation-duration: 0.7s; animation-duration: 0.7s;
animation-timing-function: var(--expo-out); animation-timing-function: var(--expo-out);
@ -268,12 +263,12 @@ export const createAnimation = (
} }
} }
`, `,
}; };
} }
if (variant === "gif") { if (variant === "gif") {
return { return {
name: `${variant}-${start}`, name: `${variant}-${start}`,
css: ` css: `
::view-transition-group(root) { ::view-transition-group(root) {
animation-timing-function: var(--expo-in); animation-timing-function: var(--expo-in);
} }
@ -302,14 +297,14 @@ export const createAnimation = (
mask-size: 2000vmax; mask-size: 2000vmax;
} }
}`, }`,
}; };
} }
if (variant === "circle-blur") { if (variant === "circle-blur") {
if (start === "center") { if (start === "center") {
return { return {
name: `${variant}-${start}`, name: `${variant}-${start}`,
css: ` css: `
::view-transition-group(root) { ::view-transition-group(root) {
animation-timing-function: var(--expo-out); animation-timing-function: var(--expo-out);
} }
@ -334,12 +329,12 @@ export const createAnimation = (
} }
} }
`, `,
}; };
} }
return { return {
name: `${variant}-${start}`, name: `${variant}-${start}`,
css: ` css: `
::view-transition-group(root) { ::view-transition-group(root) {
animation-timing-function: var(--expo-out); animation-timing-function: var(--expo-out);
} }
@ -364,41 +359,41 @@ export const createAnimation = (
} }
} }
`, `,
}; };
} }
if (variant === "polygon") { if (variant === "polygon") {
const getPolygonClipPaths = (position: AnimationStart) => { const getPolygonClipPaths = (position: AnimationStart) => {
switch (position) { switch (position) {
case "top-left": case "top-left":
return { return {
darkFrom: "polygon(50% -71%, -50% 71%, -50% 71%, 50% -71%)", darkFrom: "polygon(50% -71%, -50% 71%, -50% 71%, 50% -71%)",
darkTo: "polygon(50% -71%, -50% 71%, 50% 171%, 171% 50%)", darkTo: "polygon(50% -71%, -50% 71%, 50% 171%, 171% 50%)",
lightFrom: "polygon(171% 50%, 50% 171%, 50% 171%, 171% 50%)", lightFrom: "polygon(171% 50%, 50% 171%, 50% 171%, 171% 50%)",
lightTo: "polygon(171% 50%, 50% 171%, -50% 71%, 50% -71%)", lightTo: "polygon(171% 50%, 50% 171%, -50% 71%, 50% -71%)",
}; };
case "top-right": case "top-right":
return { return {
darkFrom: "polygon(150% -71%, 250% 71%, 250% 71%, 150% -71%)", darkFrom: "polygon(150% -71%, 250% 71%, 250% 71%, 150% -71%)",
darkTo: "polygon(150% -71%, 250% 71%, 50% 171%, -71% 50%)", darkTo: "polygon(150% -71%, 250% 71%, 50% 171%, -71% 50%)",
lightFrom: "polygon(-71% 50%, 50% 171%, 50% 171%, -71% 50%)", lightFrom: "polygon(-71% 50%, 50% 171%, 50% 171%, -71% 50%)",
lightTo: "polygon(-71% 50%, 50% 171%, 250% 71%, 150% -71%)", lightTo: "polygon(-71% 50%, 50% 171%, 250% 71%, 150% -71%)",
}; };
default: default:
return { return {
darkFrom: "polygon(50% -71%, -50% 71%, -50% 71%, 50% -71%)", darkFrom: "polygon(50% -71%, -50% 71%, -50% 71%, 50% -71%)",
darkTo: "polygon(50% -71%, -50% 71%, 50% 171%, 171% 50%)", darkTo: "polygon(50% -71%, -50% 71%, 50% 171%, 171% 50%)",
lightFrom: "polygon(171% 50%, 50% 171%, 50% 171%, 171% 50%)", lightFrom: "polygon(171% 50%, 50% 171%, 50% 171%, 171% 50%)",
lightTo: "polygon(171% 50%, 50% 171%, -50% 71%, 50% -71%)", lightTo: "polygon(171% 50%, 50% 171%, -50% 71%, 50% -71%)",
}; };
} }
}; };
const clipPaths = getPolygonClipPaths(start); const clipPaths = getPolygonClipPaths(start);
return { return {
name: `${variant}-${start}${blur ? "-blur" : ""}`, name: `${variant}-${start}${blur ? "-blur" : ""}`,
css: ` css: `
::view-transition-group(root) { ::view-transition-group(root) {
animation-duration: 0.7s; animation-duration: 0.7s;
animation-timing-function: var(--expo-out); animation-timing-function: var(--expo-out);
@ -443,35 +438,35 @@ export const createAnimation = (
} }
} }
`, `,
}; };
} }
// Handle circle variants with start positions using clip-path // Handle circle variants with start positions using clip-path
if (variant === "circle" && start !== "center") { if (variant === "circle" && start !== "center") {
const getClipPathPosition = (position: AnimationStart) => { const getClipPathPosition = (position: AnimationStart) => {
switch (position) { switch (position) {
case "top-left": case "top-left":
return "0% 0%"; return "0% 0%";
case "top-right": case "top-right":
return "100% 0%"; return "100% 0%";
case "bottom-left": case "bottom-left":
return "0% 100%"; return "0% 100%";
case "bottom-right": case "bottom-right":
return "100% 100%"; return "100% 100%";
case "top-center": case "top-center":
return "50% 0%"; return "50% 0%";
case "bottom-center": case "bottom-center":
return "50% 100%"; return "50% 100%";
default: default:
return "50% 50%"; return "50% 50%";
} }
}; };
const clipPosition = getClipPathPosition(start); const clipPosition = getClipPathPosition(start);
return { return {
name: `${variant}-${start}${blur ? "-blur" : ""}`, name: `${variant}-${start}${blur ? "-blur" : ""}`,
css: ` css: `
::view-transition-group(root) { ::view-transition-group(root) {
animation-duration: 1s; animation-duration: 1s;
animation-timing-function: var(--expo-out); animation-timing-function: var(--expo-out);
@ -516,12 +511,12 @@ export const createAnimation = (
} }
} }
`, `,
}; };
} }
return { return {
name: `${variant}-${start}${blur ? "-blur" : ""}`, name: `${variant}-${start}${blur ? "-blur" : ""}`,
css: ` css: `
::view-transition-group(root) { ::view-transition-group(root) {
animation-timing-function: var(--expo-in); animation-timing-function: var(--expo-in);
} }
@ -549,237 +544,229 @@ export const createAnimation = (
} }
} }
`, `,
}; };
}; };
// /////////////////////////////////////////////////////////////////////////// // ///////////////////////////////////////////////////////////////////////////
// Custom hook for theme toggle functionality // Custom hook for theme toggle functionality
export const useThemeToggle = ({ export const useThemeToggle = ({
variant = "circle", variant = "circle",
start = "center", start = "center",
blur = false, blur = false,
gifUrl = "", gifUrl = "",
}: { }: {
variant?: AnimationVariant; variant?: AnimationVariant;
start?: AnimationStart; start?: AnimationStart;
blur?: boolean; blur?: boolean;
gifUrl?: string; gifUrl?: string;
} = {}) => { } = {}) => {
const { theme, setTheme, resolvedTheme } = useTheme(); const { theme, setTheme, resolvedTheme } = useTheme();
const [isDark, setIsDark] = useState(false); const [isDark, setIsDark] = useState(false);
// Sync isDark state with resolved theme after hydration // Sync isDark state with resolved theme after hydration
useEffect(() => { useEffect(() => {
setIsDark(resolvedTheme === "dark"); setIsDark(resolvedTheme === "dark");
}, [resolvedTheme]); }, [resolvedTheme]);
const styleId = "theme-transition-styles"; const styleId = "theme-transition-styles";
const updateStyles = useCallback((css: string) => { const updateStyles = useCallback((css: string) => {
if (typeof window === "undefined") return; if (typeof window === "undefined") return;
let styleElement = document.getElementById(styleId) as HTMLStyleElement; let styleElement = document.getElementById(styleId) as HTMLStyleElement;
if (!styleElement) { if (!styleElement) {
styleElement = document.createElement("style"); styleElement = document.createElement("style");
styleElement.id = styleId; styleElement.id = styleId;
document.head.appendChild(styleElement); document.head.appendChild(styleElement);
} }
styleElement.textContent = css; styleElement.textContent = css;
}, []); }, []);
const toggleTheme = useCallback(() => { const toggleTheme = useCallback(() => {
setIsDark(!isDark); setIsDark(!isDark);
const animation = createAnimation(variant, start, blur, gifUrl); const animation = createAnimation(variant, start, blur, gifUrl);
updateStyles(animation.css); updateStyles(animation.css);
if (typeof window === "undefined") return; if (typeof window === "undefined") return;
const switchTheme = () => { const switchTheme = () => {
setTheme(theme === "light" ? "dark" : "light"); setTheme(theme === "light" ? "dark" : "light");
}; };
if (!document.startViewTransition) { if (!document.startViewTransition) {
switchTheme(); switchTheme();
return; return;
} }
document.startViewTransition(switchTheme); document.startViewTransition(switchTheme);
}, [theme, setTheme, variant, start, blur, gifUrl, updateStyles, isDark]); }, [theme, setTheme, variant, start, blur, gifUrl, updateStyles, isDark]);
const setCrazyLightTheme = useCallback(() => { const setCrazyLightTheme = useCallback(() => {
setIsDark(false); setIsDark(false);
const animation = createAnimation(variant, start, blur, gifUrl); const animation = createAnimation(variant, start, blur, gifUrl);
updateStyles(animation.css); updateStyles(animation.css);
if (typeof window === "undefined") return; if (typeof window === "undefined") return;
const switchTheme = () => { const switchTheme = () => {
setTheme("light"); setTheme("light");
}; };
if (!document.startViewTransition) { if (!document.startViewTransition) {
switchTheme(); switchTheme();
return; return;
} }
document.startViewTransition(switchTheme); document.startViewTransition(switchTheme);
}, [setTheme, variant, start, blur, gifUrl, updateStyles]); }, [setTheme, variant, start, blur, gifUrl, updateStyles]);
const setCrazyDarkTheme = useCallback(() => { const setCrazyDarkTheme = useCallback(() => {
setIsDark(true); setIsDark(true);
const animation = createAnimation(variant, start, blur, gifUrl); const animation = createAnimation(variant, start, blur, gifUrl);
updateStyles(animation.css); updateStyles(animation.css);
if (typeof window === "undefined") return; if (typeof window === "undefined") return;
const switchTheme = () => { const switchTheme = () => {
setTheme("dark"); setTheme("dark");
}; };
if (!document.startViewTransition) { if (!document.startViewTransition) {
switchTheme(); switchTheme();
return; return;
} }
document.startViewTransition(switchTheme); document.startViewTransition(switchTheme);
}, [setTheme, variant, start, blur, gifUrl, updateStyles]); }, [setTheme, variant, start, blur, gifUrl, updateStyles]);
const setCrazySystemTheme = useCallback(() => { const setCrazySystemTheme = useCallback(() => {
if (typeof window === "undefined") return; if (typeof window === "undefined") return;
const prefersDark = window.matchMedia( const prefersDark = window.matchMedia("(prefers-color-scheme: dark)").matches;
"(prefers-color-scheme: dark)", setIsDark(prefersDark);
).matches;
setIsDark(prefersDark);
const animation = createAnimation(variant, start, blur, gifUrl); const animation = createAnimation(variant, start, blur, gifUrl);
updateStyles(animation.css); updateStyles(animation.css);
const switchTheme = () => { const switchTheme = () => {
setTheme("system"); setTheme("system");
}; };
if (!document.startViewTransition) { if (!document.startViewTransition) {
switchTheme(); switchTheme();
return; return;
} }
document.startViewTransition(switchTheme); document.startViewTransition(switchTheme);
}, [setTheme, variant, start, blur, gifUrl, updateStyles]); }, [setTheme, variant, start, blur, gifUrl, updateStyles]);
return { return {
isDark, isDark,
setIsDark, setIsDark,
toggleTheme, toggleTheme,
setCrazyLightTheme, setCrazyLightTheme,
setCrazyDarkTheme, setCrazyDarkTheme,
setCrazySystemTheme, setCrazySystemTheme,
}; };
}; };
// /////////////////////////////////////////////////////////////////////////// // ///////////////////////////////////////////////////////////////////////////
// Theme Toggle Button Component (Sun/Moon Style) // Theme Toggle Button Component (Sun/Moon Style)
export const ThemeToggleButton = ({ export const ThemeToggleButton = ({
className = "", className = "",
variant = "circle", variant = "circle",
start = "center", start = "center",
blur = false, blur = false,
gifUrl = "", gifUrl = "",
}: { }: {
className?: string; className?: string;
variant?: AnimationVariant; variant?: AnimationVariant;
start?: AnimationStart; start?: AnimationStart;
blur?: boolean; blur?: boolean;
gifUrl?: string; gifUrl?: string;
}) => { }) => {
const { isDark, toggleTheme } = useThemeToggle({ const { isDark, toggleTheme } = useThemeToggle({
variant, variant,
start, start,
blur, blur,
gifUrl, gifUrl,
}); });
const clipId = useId(); const clipId = useId();
const clipPathId = `theme-toggle-clip-${clipId}`; const clipPathId = `theme-toggle-clip-${clipId}`;
return ( return (
<button <button
type="button" type="button"
className={cn( className={cn(
"size-10 cursor-pointer rounded-full p-2 transition-all duration-300 active:scale-95 bg-transparent", "size-10 cursor-pointer rounded-full p-2 transition-all duration-300 active:scale-95 bg-transparent",
isDark ? "text-white" : "text-black", isDark ? "text-white" : "text-black",
className, className
)} )}
onClick={toggleTheme} onClick={toggleTheme}
aria-label="Toggle theme" aria-label="Toggle theme"
> >
<span className="sr-only">Toggle theme</span> <span className="sr-only">Toggle theme</span>
<svg <svg
xmlns="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg"
aria-hidden="true" aria-hidden="true"
fill="currentColor" fill="currentColor"
strokeLinecap="round" strokeLinecap="round"
viewBox="0 0 32 32" viewBox="0 0 32 32"
> >
<clipPath id={clipPathId}> <clipPath id={clipPathId}>
<motion.path <motion.path
animate={{ y: isDark ? 10 : 0, x: isDark ? -12 : 0 }} animate={{ y: isDark ? 10 : 0, x: isDark ? -12 : 0 }}
transition={{ ease: "easeInOut", duration: 0.35 }} transition={{ ease: "easeInOut", duration: 0.35 }}
d="M0-5h30a1 1 0 0 0 9 13v24H0Z" d="M0-5h30a1 1 0 0 0 9 13v24H0Z"
/> />
</clipPath> </clipPath>
<g clipPath={`url(#${clipPathId})`}> <g clipPath={`url(#${clipPathId})`}>
<motion.circle <motion.circle
animate={{ r: isDark ? 10 : 8 }} animate={{ r: isDark ? 10 : 8 }}
transition={{ ease: "easeInOut", duration: 0.35 }} transition={{ ease: "easeInOut", duration: 0.35 }}
cx="16" cx="16"
cy="16" cy="16"
/> />
<motion.g <motion.g
animate={{ animate={{
rotate: isDark ? -100 : 0, rotate: isDark ? -100 : 0,
scale: isDark ? 0.5 : 1, scale: isDark ? 0.5 : 1,
opacity: isDark ? 0 : 1, opacity: isDark ? 0 : 1,
}} }}
transition={{ ease: "easeInOut", duration: 0.35 }} transition={{ ease: "easeInOut", duration: 0.35 }}
stroke="currentColor" stroke="currentColor"
strokeWidth="1.5" strokeWidth="1.5"
> >
<path d="M16 5.5v-4" /> <path d="M16 5.5v-4" />
<path d="M16 30.5v-4" /> <path d="M16 30.5v-4" />
<path d="M1.5 16h4" /> <path d="M1.5 16h4" />
<path d="M26.5 16h4" /> <path d="M26.5 16h4" />
<path d="m23.4 8.6 2.8-2.8" /> <path d="m23.4 8.6 2.8-2.8" />
<path d="m5.7 26.3 2.9-2.9" /> <path d="m5.7 26.3 2.9-2.9" />
<path d="m5.8 5.8 2.8 2.8" /> <path d="m5.8 5.8 2.8 2.8" />
<path d="m23.4 23.4 2.9 2.9" /> <path d="m23.4 23.4 2.9 2.9" />
</motion.g> </motion.g>
</g> </g>
</svg> </svg>
</button> </button>
); );
}; };
// /////////////////////////////////////////////////////////////////////////// // ///////////////////////////////////////////////////////////////////////////
// Backwards compatible export (alias for ThemeToggleButton with default settings) // Backwards compatible export (alias for ThemeToggleButton with default settings)
export function ThemeTogglerComponent() { export function ThemeTogglerComponent() {
return ( return <ThemeToggleButton variant="circle" start="top-right" className="size-8" />;
<ThemeToggleButton
variant="circle"
start="top-right"
className="size-8"
/>
);
} }
/** /**

View file

@ -144,7 +144,7 @@ export function useDocuments(
(doc: DocumentElectric): DocumentDisplay => ({ (doc: DocumentElectric): DocumentDisplay => ({
...doc, ...doc,
created_by_name: doc.created_by_id created_by_name: doc.created_by_id
? userCacheRef.current.get(doc.created_by_id) ?? null ? (userCacheRef.current.get(doc.created_by_id) ?? null)
: null, : null,
status: doc.status ?? { state: "ready" }, status: doc.status ?? { state: "ready" },
}), }),
@ -232,7 +232,15 @@ export function useDocuments(
const handle = await client.syncShape({ const handle = await client.syncShape({
table: "documents", table: "documents",
where: `search_space_id = ${spaceId}`, where: `search_space_id = ${spaceId}`,
columns: ["id", "document_type", "search_space_id", "title", "created_by_id", "created_at", "status"], columns: [
"id",
"document_type",
"search_space_id",
"title",
"created_by_id",
"created_at",
"status",
],
primaryKey: ["id"], primaryKey: ["id"],
}); });
@ -258,7 +266,10 @@ export function useDocuments(
// Set up live query // Set up live query
const db = client.db as { const db = client.db as {
live?: { live?: {
query: <T>(sql: string, params?: (number | string)[]) => Promise<{ query: <T>(
sql: string,
params?: (number | string)[]
) => Promise<{
subscribe: (cb: (result: { rows: T[] }) => void) => void; subscribe: (cb: (result: { rows: T[] }) => void) => void;
unsubscribe?: () => void; unsubscribe?: () => void;
}>; }>;
@ -297,8 +308,7 @@ export function useDocuments(
if (!mounted || !result.rows) return; if (!mounted || !result.rows) return;
// DEBUG: Log first few raw documents to see what's coming from Electric // DEBUG: Log first few raw documents to see what's coming from Electric
console.log("[useDocuments] Raw data sample:", result.rows.slice(0, 3)); console.log("[useDocuments] Raw data sample:", result.rows.slice(0, 3));
const validItems = result.rows.filter(isValidDocument); const validItems = result.rows.filter(isValidDocument);
const isFullySynced = syncHandleRef.current?.isUpToDate ?? false; const isFullySynced = syncHandleRef.current?.isUpToDate ?? false;
@ -309,8 +319,9 @@ export function useDocuments(
// Fetch user names for new users (non-blocking) // Fetch user names for new users (non-blocking)
const unknownUserIds = validItems const unknownUserIds = validItems
.filter((doc): doc is DocumentElectric & { created_by_id: string } => .filter(
doc.created_by_id !== null && !userCacheRef.current.has(doc.created_by_id) (doc): doc is DocumentElectric & { created_by_id: string } =>
doc.created_by_id !== null && !userCacheRef.current.has(doc.created_by_id)
) )
.map((doc) => doc.created_by_id); .map((doc) => doc.created_by_id);
@ -326,7 +337,7 @@ export function useDocuments(
prev.map((doc) => ({ prev.map((doc) => ({
...doc, ...doc,
created_by_name: doc.created_by_id created_by_name: doc.created_by_id
? userCacheRef.current.get(doc.created_by_id) ?? null ? (userCacheRef.current.get(doc.created_by_id) ?? null)
: null, : null,
})) }))
); );
@ -358,7 +369,9 @@ export function useDocuments(
// Case 2: Electric is fully synced - TRUST IT COMPLETELY (handles bulk deletes) // Case 2: Electric is fully synced - TRUST IT COMPLETELY (handles bulk deletes)
if (isFullySynced) { if (isFullySynced) {
const liveDocs = deduplicateAndSort(validItems.map(electricToDisplayDoc)); const liveDocs = deduplicateAndSort(validItems.map(electricToDisplayDoc));
console.log(`[useDocuments] Synced update: ${liveDocs.length} docs (was ${prev.length})`); console.log(
`[useDocuments] Synced update: ${liveDocs.length} docs (was ${prev.length})`
);
return liveDocs; return liveDocs;
} }

View file

@ -495,9 +495,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
// Parse the WHERE clause to build a DELETE statement // Parse the WHERE clause to build a DELETE statement
// The WHERE clause is already validated and formatted // The WHERE clause is already validated and formatted
await tx.exec(`DELETE FROM ${table} WHERE ${validatedWhere}`); await tx.exec(`DELETE FROM ${table} WHERE ${validatedWhere}`);
debugLog( debugLog(`[Electric] 🗑️ Cleared ${table} rows matching: ${validatedWhere}`);
`[Electric] 🗑️ Cleared ${table} rows matching: ${validatedWhere}`
);
} else { } else {
// No WHERE clause means we're syncing the entire table // No WHERE clause means we're syncing the entire table
await tx.exec(`DELETE FROM ${table}`); await tx.exec(`DELETE FROM ${table}`);
@ -514,10 +512,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
}, },
}; };
debugLog( debugLog("[Electric] syncShapeToTable config:", JSON.stringify(shapeConfig, null, 2));
"[Electric] syncShapeToTable config:",
JSON.stringify(shapeConfig, null, 2)
);
let shape: { unsubscribe: () => void; isUpToDate: boolean; stream: unknown }; let shape: { unsubscribe: () => void; isUpToDate: boolean; stream: unknown };
try { try {
@ -550,9 +545,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
retryError instanceof Error ? retryError.message : String(retryError); retryError instanceof Error ? retryError.message : String(retryError);
if (retryMessage.includes("Already syncing")) { if (retryMessage.includes("Already syncing")) {
// Still syncing - create a placeholder handle that indicates the table is being synced // Still syncing - create a placeholder handle that indicates the table is being synced
debugWarn( debugWarn(`[Electric] ${table} still syncing, creating placeholder handle`);
`[Electric] ${table} still syncing, creating placeholder handle`
);
const placeholderHandle: SyncHandle = { const placeholderHandle: SyncHandle = {
unsubscribe: () => { unsubscribe: () => {
debugLog(`[Electric] Placeholder unsubscribe for: ${cacheKey}`); debugLog(`[Electric] Placeholder unsubscribe for: ${cacheKey}`);
@ -656,9 +649,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
// Also check stream's isUpToDate property immediately // Also check stream's isUpToDate property immediately
if (stream?.isUpToDate) { if (stream?.isUpToDate) {
debugLog( debugLog(`[Electric] ✅ Stream isUpToDate is true immediately for ${table}`);
`[Electric] ✅ Stream isUpToDate is true immediately for ${table}`
);
resolveInitialSync(); resolveInitialSync();
} }
} }
@ -671,9 +662,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
} }
if (shape.isUpToDate || stream?.isUpToDate) { if (shape.isUpToDate || stream?.isUpToDate) {
debugLog( debugLog(`[Electric] ✅ Sync completed (detected via polling) for ${table}`);
`[Electric] ✅ Sync completed (detected via polling) for ${table}`
);
clearInterval(pollInterval); clearInterval(pollInterval);
resolveInitialSync(); resolveInitialSync();
} }