mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-25 00:36:31 +02:00
chore: ran linting
This commit is contained in:
parent
00a617ef17
commit
aa66928154
44 changed files with 2025 additions and 1658 deletions
|
|
@ -13,8 +13,6 @@ Changes:
|
||||||
|
|
||||||
from collections.abc import Sequence
|
from collections.abc import Sequence
|
||||||
|
|
||||||
import sqlalchemy as sa
|
|
||||||
|
|
||||||
from alembic import op
|
from alembic import op
|
||||||
|
|
||||||
# revision identifiers, used by Alembic.
|
# revision identifiers, used by Alembic.
|
||||||
|
|
@ -77,4 +75,3 @@ def downgrade() -> None:
|
||||||
END$$;
|
END$$;
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -285,24 +285,28 @@ async def _analyze_gmail_messages_phase1(
|
||||||
if existing_document:
|
if existing_document:
|
||||||
if existing_document.content_hash == content_hash:
|
if existing_document.content_hash == content_hash:
|
||||||
# Ensure status is ready (might have been stuck in processing/pending)
|
# Ensure status is ready (might have been stuck in processing/pending)
|
||||||
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
|
if not DocumentStatus.is_state(
|
||||||
|
existing_document.status, DocumentStatus.READY
|
||||||
|
):
|
||||||
existing_document.status = DocumentStatus.ready()
|
existing_document.status = DocumentStatus.ready()
|
||||||
documents_skipped += 1
|
documents_skipped += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Queue existing document for update (will be set to processing in Phase 2)
|
# Queue existing document for update (will be set to processing in Phase 2)
|
||||||
messages_to_process.append({
|
messages_to_process.append(
|
||||||
'document': existing_document,
|
{
|
||||||
'is_new': False,
|
"document": existing_document,
|
||||||
'markdown_content': markdown_content,
|
"is_new": False,
|
||||||
'content_hash': content_hash,
|
"markdown_content": markdown_content,
|
||||||
'message_id': message_id,
|
"content_hash": content_hash,
|
||||||
'thread_id': thread_id,
|
"message_id": message_id,
|
||||||
'subject': subject,
|
"thread_id": thread_id,
|
||||||
'sender': sender,
|
"subject": subject,
|
||||||
'date_str': date_str,
|
"sender": sender,
|
||||||
'label_ids': label_ids,
|
"date_str": date_str,
|
||||||
})
|
"label_ids": label_ids,
|
||||||
|
}
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Document doesn't exist by unique_identifier_hash
|
# Document doesn't exist by unique_identifier_hash
|
||||||
|
|
@ -350,18 +354,20 @@ async def _analyze_gmail_messages_phase1(
|
||||||
)
|
)
|
||||||
session.add(document)
|
session.add(document)
|
||||||
|
|
||||||
messages_to_process.append({
|
messages_to_process.append(
|
||||||
'document': document,
|
{
|
||||||
'is_new': True,
|
"document": document,
|
||||||
'markdown_content': markdown_content,
|
"is_new": True,
|
||||||
'content_hash': content_hash,
|
"markdown_content": markdown_content,
|
||||||
'message_id': message_id,
|
"content_hash": content_hash,
|
||||||
'thread_id': thread_id,
|
"message_id": message_id,
|
||||||
'subject': subject,
|
"thread_id": thread_id,
|
||||||
'sender': sender,
|
"subject": subject,
|
||||||
'date_str': date_str,
|
"sender": sender,
|
||||||
'label_ids': label_ids,
|
"date_str": date_str,
|
||||||
})
|
"label_ids": label_ids,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in Phase 1 for message: {e!s}", exc_info=True)
|
logger.error(f"Error in Phase 1 for message: {e!s}", exc_info=True)
|
||||||
|
|
@ -398,7 +404,7 @@ async def _process_gmail_messages_phase2(
|
||||||
await on_heartbeat_callback(documents_indexed)
|
await on_heartbeat_callback(documents_indexed)
|
||||||
last_heartbeat_time = current_time
|
last_heartbeat_time = current_time
|
||||||
|
|
||||||
document = item['document']
|
document = item["document"]
|
||||||
try:
|
try:
|
||||||
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
||||||
document.status = DocumentStatus.processing()
|
document.status = DocumentStatus.processing()
|
||||||
|
|
@ -411,37 +417,35 @@ async def _process_gmail_messages_phase2(
|
||||||
|
|
||||||
if user_llm:
|
if user_llm:
|
||||||
document_metadata_for_summary = {
|
document_metadata_for_summary = {
|
||||||
"message_id": item['message_id'],
|
"message_id": item["message_id"],
|
||||||
"thread_id": item['thread_id'],
|
"thread_id": item["thread_id"],
|
||||||
"subject": item['subject'],
|
"subject": item["subject"],
|
||||||
"sender": item['sender'],
|
"sender": item["sender"],
|
||||||
"document_type": "Gmail Message (Composio)",
|
"document_type": "Gmail Message (Composio)",
|
||||||
}
|
}
|
||||||
summary_content, summary_embedding = await generate_document_summary(
|
summary_content, summary_embedding = await generate_document_summary(
|
||||||
item['markdown_content'], user_llm, document_metadata_for_summary
|
item["markdown_content"], user_llm, document_metadata_for_summary
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
summary_content = (
|
summary_content = f"Gmail: {item['subject']}\n\nFrom: {item['sender']}\nDate: {item['date_str']}"
|
||||||
f"Gmail: {item['subject']}\n\nFrom: {item['sender']}\nDate: {item['date_str']}"
|
|
||||||
)
|
|
||||||
summary_embedding = config.embedding_model_instance.embed(
|
summary_embedding = config.embedding_model_instance.embed(
|
||||||
summary_content
|
summary_content
|
||||||
)
|
)
|
||||||
|
|
||||||
chunks = await create_document_chunks(item['markdown_content'])
|
chunks = await create_document_chunks(item["markdown_content"])
|
||||||
|
|
||||||
# Update document to READY with actual content
|
# Update document to READY with actual content
|
||||||
document.title = item['subject']
|
document.title = item["subject"]
|
||||||
document.content = summary_content
|
document.content = summary_content
|
||||||
document.content_hash = item['content_hash']
|
document.content_hash = item["content_hash"]
|
||||||
document.embedding = summary_embedding
|
document.embedding = summary_embedding
|
||||||
document.document_metadata = {
|
document.document_metadata = {
|
||||||
"message_id": item['message_id'],
|
"message_id": item["message_id"],
|
||||||
"thread_id": item['thread_id'],
|
"thread_id": item["thread_id"],
|
||||||
"subject": item['subject'],
|
"subject": item["subject"],
|
||||||
"sender": item['sender'],
|
"sender": item["sender"],
|
||||||
"date": item['date_str'],
|
"date": item["date_str"],
|
||||||
"labels": item['label_ids'],
|
"labels": item["label_ids"],
|
||||||
"connector_id": connector_id,
|
"connector_id": connector_id,
|
||||||
"source": "composio",
|
"source": "composio",
|
||||||
}
|
}
|
||||||
|
|
@ -465,7 +469,9 @@ async def _process_gmail_messages_phase2(
|
||||||
document.status = DocumentStatus.failed(str(e))
|
document.status = DocumentStatus.failed(str(e))
|
||||||
document.updated_at = get_current_timestamp()
|
document.updated_at = get_current_timestamp()
|
||||||
except Exception as status_error:
|
except Exception as status_error:
|
||||||
logger.error(f"Failed to update document status to failed: {status_error}")
|
logger.error(
|
||||||
|
f"Failed to update document status to failed: {status_error}"
|
||||||
|
)
|
||||||
documents_failed += 1
|
documents_failed += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
@ -571,7 +577,9 @@ async def index_composio_gmail(
|
||||||
)
|
)
|
||||||
|
|
||||||
all_messages.extend(messages)
|
all_messages.extend(messages)
|
||||||
logger.info(f"Fetched {len(messages)} messages (total: {len(all_messages)})")
|
logger.info(
|
||||||
|
f"Fetched {len(messages)} messages (total: {len(all_messages)})"
|
||||||
|
)
|
||||||
|
|
||||||
if not next_token or len(messages) < current_batch_size:
|
if not next_token or len(messages) < current_batch_size:
|
||||||
break
|
break
|
||||||
|
|
@ -616,7 +624,7 @@ async def index_composio_gmail(
|
||||||
)
|
)
|
||||||
|
|
||||||
# Commit all pending documents - they all appear in UI now
|
# Commit all pending documents - they all appear in UI now
|
||||||
new_documents_count = len([m for m in messages_to_process if m['is_new']])
|
new_documents_count = len([m for m in messages_to_process if m["is_new"]])
|
||||||
if new_documents_count > 0:
|
if new_documents_count > 0:
|
||||||
logger.info(f"Phase 1: Committing {new_documents_count} pending documents")
|
logger.info(f"Phase 1: Committing {new_documents_count} pending documents")
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
@ -645,9 +653,7 @@ async def index_composio_gmail(
|
||||||
await update_connector_last_indexed(session, connector, update_last_indexed)
|
await update_connector_last_indexed(session, connector, update_last_indexed)
|
||||||
|
|
||||||
# Final commit to ensure all documents are persisted
|
# Final commit to ensure all documents are persisted
|
||||||
logger.info(
|
logger.info(f"Final commit: Total {documents_indexed} Gmail messages processed")
|
||||||
f"Final commit: Total {documents_indexed} Gmail messages processed"
|
|
||||||
)
|
|
||||||
try:
|
try:
|
||||||
await session.commit()
|
await session.commit()
|
||||||
logger.info(
|
logger.info(
|
||||||
|
|
|
||||||
|
|
@ -268,7 +268,9 @@ async def index_composio_google_calendar(
|
||||||
documents_indexed = 0
|
documents_indexed = 0
|
||||||
documents_skipped = 0
|
documents_skipped = 0
|
||||||
documents_failed = 0 # Track events that failed processing
|
documents_failed = 0 # Track events that failed processing
|
||||||
duplicate_content_count = 0 # Track events skipped due to duplicate content_hash
|
duplicate_content_count = (
|
||||||
|
0 # Track events skipped due to duplicate content_hash
|
||||||
|
)
|
||||||
last_heartbeat_time = time.time()
|
last_heartbeat_time = time.time()
|
||||||
|
|
||||||
# =======================================================================
|
# =======================================================================
|
||||||
|
|
@ -317,23 +319,27 @@ async def index_composio_google_calendar(
|
||||||
if existing_document:
|
if existing_document:
|
||||||
if existing_document.content_hash == content_hash:
|
if existing_document.content_hash == content_hash:
|
||||||
# Ensure status is ready (might have been stuck in processing/pending)
|
# Ensure status is ready (might have been stuck in processing/pending)
|
||||||
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
|
if not DocumentStatus.is_state(
|
||||||
|
existing_document.status, DocumentStatus.READY
|
||||||
|
):
|
||||||
existing_document.status = DocumentStatus.ready()
|
existing_document.status = DocumentStatus.ready()
|
||||||
documents_skipped += 1
|
documents_skipped += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Queue existing document for update (will be set to processing in Phase 2)
|
# Queue existing document for update (will be set to processing in Phase 2)
|
||||||
events_to_process.append({
|
events_to_process.append(
|
||||||
'document': existing_document,
|
{
|
||||||
'is_new': False,
|
"document": existing_document,
|
||||||
'markdown_content': markdown_content,
|
"is_new": False,
|
||||||
'content_hash': content_hash,
|
"markdown_content": markdown_content,
|
||||||
'event_id': event_id,
|
"content_hash": content_hash,
|
||||||
'summary': summary,
|
"event_id": event_id,
|
||||||
'start_time': start_time,
|
"summary": summary,
|
||||||
'end_time': end_time,
|
"start_time": start_time,
|
||||||
'location': location,
|
"end_time": end_time,
|
||||||
})
|
"location": location,
|
||||||
|
}
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Document doesn't exist by unique_identifier_hash
|
# Document doesn't exist by unique_identifier_hash
|
||||||
|
|
@ -383,17 +389,19 @@ async def index_composio_google_calendar(
|
||||||
session.add(document)
|
session.add(document)
|
||||||
new_documents_created = True
|
new_documents_created = True
|
||||||
|
|
||||||
events_to_process.append({
|
events_to_process.append(
|
||||||
'document': document,
|
{
|
||||||
'is_new': True,
|
"document": document,
|
||||||
'markdown_content': markdown_content,
|
"is_new": True,
|
||||||
'content_hash': content_hash,
|
"markdown_content": markdown_content,
|
||||||
'event_id': event_id,
|
"content_hash": content_hash,
|
||||||
'summary': summary,
|
"event_id": event_id,
|
||||||
'start_time': start_time,
|
"summary": summary,
|
||||||
'end_time': end_time,
|
"start_time": start_time,
|
||||||
'location': location,
|
"end_time": end_time,
|
||||||
})
|
"location": location,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in Phase 1 for event: {e!s}", exc_info=True)
|
logger.error(f"Error in Phase 1 for event: {e!s}", exc_info=True)
|
||||||
|
|
@ -402,7 +410,9 @@ async def index_composio_google_calendar(
|
||||||
|
|
||||||
# Commit all pending documents - they all appear in UI now
|
# Commit all pending documents - they all appear in UI now
|
||||||
if new_documents_created:
|
if new_documents_created:
|
||||||
logger.info(f"Phase 1: Committing {len([e for e in events_to_process if e['is_new']])} pending documents")
|
logger.info(
|
||||||
|
f"Phase 1: Committing {len([e for e in events_to_process if e['is_new']])} pending documents"
|
||||||
|
)
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
# =======================================================================
|
# =======================================================================
|
||||||
|
|
@ -419,7 +429,7 @@ async def index_composio_google_calendar(
|
||||||
await on_heartbeat_callback(documents_indexed)
|
await on_heartbeat_callback(documents_indexed)
|
||||||
last_heartbeat_time = current_time
|
last_heartbeat_time = current_time
|
||||||
|
|
||||||
document = item['document']
|
document = item["document"]
|
||||||
try:
|
try:
|
||||||
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
||||||
document.status = DocumentStatus.processing()
|
document.status = DocumentStatus.processing()
|
||||||
|
|
@ -432,35 +442,40 @@ async def index_composio_google_calendar(
|
||||||
|
|
||||||
if user_llm:
|
if user_llm:
|
||||||
document_metadata_for_summary = {
|
document_metadata_for_summary = {
|
||||||
"event_id": item['event_id'],
|
"event_id": item["event_id"],
|
||||||
"summary": item['summary'],
|
"summary": item["summary"],
|
||||||
"start_time": item['start_time'],
|
"start_time": item["start_time"],
|
||||||
"document_type": "Google Calendar Event (Composio)",
|
"document_type": "Google Calendar Event (Composio)",
|
||||||
}
|
}
|
||||||
summary_content, summary_embedding = await generate_document_summary(
|
(
|
||||||
item['markdown_content'], user_llm, document_metadata_for_summary
|
summary_content,
|
||||||
|
summary_embedding,
|
||||||
|
) = await generate_document_summary(
|
||||||
|
item["markdown_content"],
|
||||||
|
user_llm,
|
||||||
|
document_metadata_for_summary,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
summary_content = f"Calendar: {item['summary']}\n\nStart: {item['start_time']}\nEnd: {item['end_time']}"
|
summary_content = f"Calendar: {item['summary']}\n\nStart: {item['start_time']}\nEnd: {item['end_time']}"
|
||||||
if item['location']:
|
if item["location"]:
|
||||||
summary_content += f"\nLocation: {item['location']}"
|
summary_content += f"\nLocation: {item['location']}"
|
||||||
summary_embedding = config.embedding_model_instance.embed(
|
summary_embedding = config.embedding_model_instance.embed(
|
||||||
summary_content
|
summary_content
|
||||||
)
|
)
|
||||||
|
|
||||||
chunks = await create_document_chunks(item['markdown_content'])
|
chunks = await create_document_chunks(item["markdown_content"])
|
||||||
|
|
||||||
# Update document to READY with actual content
|
# Update document to READY with actual content
|
||||||
document.title = item['summary']
|
document.title = item["summary"]
|
||||||
document.content = summary_content
|
document.content = summary_content
|
||||||
document.content_hash = item['content_hash']
|
document.content_hash = item["content_hash"]
|
||||||
document.embedding = summary_embedding
|
document.embedding = summary_embedding
|
||||||
document.document_metadata = {
|
document.document_metadata = {
|
||||||
"event_id": item['event_id'],
|
"event_id": item["event_id"],
|
||||||
"summary": item['summary'],
|
"summary": item["summary"],
|
||||||
"start_time": item['start_time'],
|
"start_time": item["start_time"],
|
||||||
"end_time": item['end_time'],
|
"end_time": item["end_time"],
|
||||||
"location": item['location'],
|
"location": item["location"],
|
||||||
"connector_id": connector_id,
|
"connector_id": connector_id,
|
||||||
"source": "composio",
|
"source": "composio",
|
||||||
}
|
}
|
||||||
|
|
@ -484,7 +499,9 @@ async def index_composio_google_calendar(
|
||||||
document.status = DocumentStatus.failed(str(e))
|
document.status = DocumentStatus.failed(str(e))
|
||||||
document.updated_at = get_current_timestamp()
|
document.updated_at = get_current_timestamp()
|
||||||
except Exception as status_error:
|
except Exception as status_error:
|
||||||
logger.error(f"Failed to update document status to failed: {status_error}")
|
logger.error(
|
||||||
|
f"Failed to update document status to failed: {status_error}"
|
||||||
|
)
|
||||||
documents_failed += 1
|
documents_failed += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -938,13 +938,15 @@ async def _index_composio_drive_delta_sync(
|
||||||
|
|
||||||
if existing_document:
|
if existing_document:
|
||||||
# Queue existing document for update
|
# Queue existing document for update
|
||||||
files_to_process.append({
|
files_to_process.append(
|
||||||
'document': existing_document,
|
{
|
||||||
'is_new': False,
|
"document": existing_document,
|
||||||
'file_id': file_id,
|
"is_new": False,
|
||||||
'file_name': file_name,
|
"file_id": file_id,
|
||||||
'mime_type': mime_type,
|
"file_name": file_name,
|
||||||
})
|
"mime_type": mime_type,
|
||||||
|
}
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Create new document with PENDING status
|
# Create new document with PENDING status
|
||||||
|
|
@ -974,13 +976,15 @@ async def _index_composio_drive_delta_sync(
|
||||||
session.add(document)
|
session.add(document)
|
||||||
new_documents_created = True
|
new_documents_created = True
|
||||||
|
|
||||||
files_to_process.append({
|
files_to_process.append(
|
||||||
'document': document,
|
{
|
||||||
'is_new': True,
|
"document": document,
|
||||||
'file_id': file_id,
|
"is_new": True,
|
||||||
'file_name': file_name,
|
"file_id": file_id,
|
||||||
'mime_type': mime_type,
|
"file_name": file_name,
|
||||||
})
|
"mime_type": mime_type,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in Phase 1 for change: {e!s}", exc_info=True)
|
logger.error(f"Error in Phase 1 for change: {e!s}", exc_info=True)
|
||||||
|
|
@ -989,7 +993,9 @@ async def _index_composio_drive_delta_sync(
|
||||||
|
|
||||||
# Commit all pending documents - they all appear in UI now
|
# Commit all pending documents - they all appear in UI now
|
||||||
if new_documents_created:
|
if new_documents_created:
|
||||||
logger.info(f"Phase 1: Committing {len([f for f in files_to_process if f['is_new']])} pending documents")
|
logger.info(
|
||||||
|
f"Phase 1: Committing {len([f for f in files_to_process if f['is_new']])} pending documents"
|
||||||
|
)
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
# =======================================================================
|
# =======================================================================
|
||||||
|
|
@ -1005,7 +1011,7 @@ async def _index_composio_drive_delta_sync(
|
||||||
await on_heartbeat_callback(documents_indexed)
|
await on_heartbeat_callback(documents_indexed)
|
||||||
last_heartbeat_time = current_time
|
last_heartbeat_time = current_time
|
||||||
|
|
||||||
document = item['document']
|
document = item["document"]
|
||||||
try:
|
try:
|
||||||
# Set to PROCESSING and commit
|
# Set to PROCESSING and commit
|
||||||
document.status = DocumentStatus.processing()
|
document.status = DocumentStatus.processing()
|
||||||
|
|
@ -1013,11 +1019,13 @@ async def _index_composio_drive_delta_sync(
|
||||||
|
|
||||||
# Get file content
|
# Get file content
|
||||||
content, content_error = await composio_connector.get_drive_file_content(
|
content, content_error = await composio_connector.get_drive_file_content(
|
||||||
item['file_id'], original_mime_type=item['mime_type']
|
item["file_id"], original_mime_type=item["mime_type"]
|
||||||
)
|
)
|
||||||
|
|
||||||
if content_error or not content:
|
if content_error or not content:
|
||||||
logger.warning(f"Could not get content for file {item['file_name']}: {content_error}")
|
logger.warning(
|
||||||
|
f"Could not get content for file {item['file_name']}: {content_error}"
|
||||||
|
)
|
||||||
markdown_content = f"# {item['file_name']}\n\n"
|
markdown_content = f"# {item['file_name']}\n\n"
|
||||||
markdown_content += f"**File ID:** {item['file_id']}\n"
|
markdown_content += f"**File ID:** {item['file_id']}\n"
|
||||||
markdown_content += f"**Type:** {item['mime_type']}\n"
|
markdown_content += f"**Type:** {item['mime_type']}\n"
|
||||||
|
|
@ -1031,9 +1039,9 @@ async def _index_composio_drive_delta_sync(
|
||||||
else:
|
else:
|
||||||
markdown_content = await _process_file_content(
|
markdown_content = await _process_file_content(
|
||||||
content=content,
|
content=content,
|
||||||
file_name=item['file_name'],
|
file_name=item["file_name"],
|
||||||
file_id=item['file_id'],
|
file_id=item["file_id"],
|
||||||
mime_type=item['mime_type'],
|
mime_type=item["mime_type"],
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
session=session,
|
session=session,
|
||||||
|
|
@ -1045,14 +1053,14 @@ async def _index_composio_drive_delta_sync(
|
||||||
content_hash = generate_content_hash(markdown_content, search_space_id)
|
content_hash = generate_content_hash(markdown_content, search_space_id)
|
||||||
|
|
||||||
# For existing documents, check if content changed
|
# For existing documents, check if content changed
|
||||||
if not item['is_new'] and document.content_hash == content_hash:
|
if not item["is_new"] and document.content_hash == content_hash:
|
||||||
if not DocumentStatus.is_state(document.status, DocumentStatus.READY):
|
if not DocumentStatus.is_state(document.status, DocumentStatus.READY):
|
||||||
document.status = DocumentStatus.ready()
|
document.status = DocumentStatus.ready()
|
||||||
documents_skipped += 1
|
documents_skipped += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Check for duplicate content hash (for new documents)
|
# Check for duplicate content hash (for new documents)
|
||||||
if item['is_new']:
|
if item["is_new"]:
|
||||||
with session.no_autoflush:
|
with session.no_autoflush:
|
||||||
duplicate_by_content = await check_duplicate_document_by_hash(
|
duplicate_by_content = await check_duplicate_document_by_hash(
|
||||||
session, content_hash
|
session, content_hash
|
||||||
|
|
@ -1067,13 +1075,15 @@ async def _index_composio_drive_delta_sync(
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Heavy processing (LLM, embeddings, chunks)
|
# Heavy processing (LLM, embeddings, chunks)
|
||||||
user_llm = await get_user_long_context_llm(session, user_id, search_space_id)
|
user_llm = await get_user_long_context_llm(
|
||||||
|
session, user_id, search_space_id
|
||||||
|
)
|
||||||
|
|
||||||
if user_llm:
|
if user_llm:
|
||||||
document_metadata_for_summary = {
|
document_metadata_for_summary = {
|
||||||
"file_id": item['file_id'],
|
"file_id": item["file_id"],
|
||||||
"file_name": item['file_name'],
|
"file_name": item["file_name"],
|
||||||
"mime_type": item['mime_type'],
|
"mime_type": item["mime_type"],
|
||||||
"document_type": "Google Drive File (Composio)",
|
"document_type": "Google Drive File (Composio)",
|
||||||
}
|
}
|
||||||
summary_content, summary_embedding = await generate_document_summary(
|
summary_content, summary_embedding = await generate_document_summary(
|
||||||
|
|
@ -1081,20 +1091,22 @@ async def _index_composio_drive_delta_sync(
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
summary_content = f"Google Drive File: {item['file_name']}\n\nType: {item['mime_type']}"
|
summary_content = f"Google Drive File: {item['file_name']}\n\nType: {item['mime_type']}"
|
||||||
summary_embedding = config.embedding_model_instance.embed(summary_content)
|
summary_embedding = config.embedding_model_instance.embed(
|
||||||
|
summary_content
|
||||||
|
)
|
||||||
|
|
||||||
chunks = await create_document_chunks(markdown_content)
|
chunks = await create_document_chunks(markdown_content)
|
||||||
|
|
||||||
# Update document to READY
|
# Update document to READY
|
||||||
document.title = item['file_name']
|
document.title = item["file_name"]
|
||||||
document.content = summary_content
|
document.content = summary_content
|
||||||
document.content_hash = content_hash
|
document.content_hash = content_hash
|
||||||
document.embedding = summary_embedding
|
document.embedding = summary_embedding
|
||||||
document.document_metadata = {
|
document.document_metadata = {
|
||||||
"file_id": item['file_id'],
|
"file_id": item["file_id"],
|
||||||
"file_name": item['file_name'],
|
"file_name": item["file_name"],
|
||||||
"FILE_NAME": item['file_name'],
|
"FILE_NAME": item["file_name"],
|
||||||
"mime_type": item['mime_type'],
|
"mime_type": item["mime_type"],
|
||||||
"connector_id": connector_id,
|
"connector_id": connector_id,
|
||||||
"source": "composio",
|
"source": "composio",
|
||||||
}
|
}
|
||||||
|
|
@ -1117,7 +1129,9 @@ async def _index_composio_drive_delta_sync(
|
||||||
document.status = DocumentStatus.failed(str(e))
|
document.status = DocumentStatus.failed(str(e))
|
||||||
document.updated_at = get_current_timestamp()
|
document.updated_at = get_current_timestamp()
|
||||||
except Exception as status_error:
|
except Exception as status_error:
|
||||||
logger.error(f"Failed to update document status to failed: {status_error}")
|
logger.error(
|
||||||
|
f"Failed to update document status to failed: {status_error}"
|
||||||
|
)
|
||||||
documents_failed += 1
|
documents_failed += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
@ -1329,13 +1343,15 @@ async def _index_composio_drive_full_scan(
|
||||||
|
|
||||||
if existing_document:
|
if existing_document:
|
||||||
# Queue existing document for update (will be set to processing in Phase 2)
|
# Queue existing document for update (will be set to processing in Phase 2)
|
||||||
files_to_process.append({
|
files_to_process.append(
|
||||||
'document': existing_document,
|
{
|
||||||
'is_new': False,
|
"document": existing_document,
|
||||||
'file_id': file_id,
|
"is_new": False,
|
||||||
'file_name': file_name,
|
"file_id": file_id,
|
||||||
'mime_type': mime_type,
|
"file_name": file_name,
|
||||||
})
|
"mime_type": mime_type,
|
||||||
|
}
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Create new document with PENDING status (visible in UI immediately)
|
# Create new document with PENDING status (visible in UI immediately)
|
||||||
|
|
@ -1365,13 +1381,15 @@ async def _index_composio_drive_full_scan(
|
||||||
session.add(document)
|
session.add(document)
|
||||||
new_documents_created = True
|
new_documents_created = True
|
||||||
|
|
||||||
files_to_process.append({
|
files_to_process.append(
|
||||||
'document': document,
|
{
|
||||||
'is_new': True,
|
"document": document,
|
||||||
'file_id': file_id,
|
"is_new": True,
|
||||||
'file_name': file_name,
|
"file_id": file_id,
|
||||||
'mime_type': mime_type,
|
"file_name": file_name,
|
||||||
})
|
"mime_type": mime_type,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in Phase 1 for file: {e!s}", exc_info=True)
|
logger.error(f"Error in Phase 1 for file: {e!s}", exc_info=True)
|
||||||
|
|
@ -1380,7 +1398,9 @@ async def _index_composio_drive_full_scan(
|
||||||
|
|
||||||
# Commit all pending documents - they all appear in UI now
|
# Commit all pending documents - they all appear in UI now
|
||||||
if new_documents_created:
|
if new_documents_created:
|
||||||
logger.info(f"Phase 1: Committing {len([f for f in files_to_process if f['is_new']])} pending documents")
|
logger.info(
|
||||||
|
f"Phase 1: Committing {len([f for f in files_to_process if f['is_new']])} pending documents"
|
||||||
|
)
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
# =======================================================================
|
# =======================================================================
|
||||||
|
|
@ -1397,7 +1417,7 @@ async def _index_composio_drive_full_scan(
|
||||||
await on_heartbeat_callback(documents_indexed)
|
await on_heartbeat_callback(documents_indexed)
|
||||||
last_heartbeat_time = current_time
|
last_heartbeat_time = current_time
|
||||||
|
|
||||||
document = item['document']
|
document = item["document"]
|
||||||
try:
|
try:
|
||||||
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
||||||
document.status = DocumentStatus.processing()
|
document.status = DocumentStatus.processing()
|
||||||
|
|
@ -1405,11 +1425,13 @@ async def _index_composio_drive_full_scan(
|
||||||
|
|
||||||
# Get file content (pass mime_type for Google Workspace export handling)
|
# Get file content (pass mime_type for Google Workspace export handling)
|
||||||
content, content_error = await composio_connector.get_drive_file_content(
|
content, content_error = await composio_connector.get_drive_file_content(
|
||||||
item['file_id'], original_mime_type=item['mime_type']
|
item["file_id"], original_mime_type=item["mime_type"]
|
||||||
)
|
)
|
||||||
|
|
||||||
if content_error or not content:
|
if content_error or not content:
|
||||||
logger.warning(f"Could not get content for file {item['file_name']}: {content_error}")
|
logger.warning(
|
||||||
|
f"Could not get content for file {item['file_name']}: {content_error}"
|
||||||
|
)
|
||||||
markdown_content = f"# {item['file_name']}\n\n"
|
markdown_content = f"# {item['file_name']}\n\n"
|
||||||
markdown_content += f"**File ID:** {item['file_id']}\n"
|
markdown_content += f"**File ID:** {item['file_id']}\n"
|
||||||
markdown_content += f"**Type:** {item['mime_type']}\n"
|
markdown_content += f"**Type:** {item['mime_type']}\n"
|
||||||
|
|
@ -1424,9 +1446,9 @@ async def _index_composio_drive_full_scan(
|
||||||
# Process content based on file type
|
# Process content based on file type
|
||||||
markdown_content = await _process_file_content(
|
markdown_content = await _process_file_content(
|
||||||
content=content,
|
content=content,
|
||||||
file_name=item['file_name'],
|
file_name=item["file_name"],
|
||||||
file_id=item['file_id'],
|
file_id=item["file_id"],
|
||||||
mime_type=item['mime_type'],
|
mime_type=item["mime_type"],
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
session=session,
|
session=session,
|
||||||
|
|
@ -1438,7 +1460,7 @@ async def _index_composio_drive_full_scan(
|
||||||
content_hash = generate_content_hash(markdown_content, search_space_id)
|
content_hash = generate_content_hash(markdown_content, search_space_id)
|
||||||
|
|
||||||
# For existing documents, check if content changed
|
# For existing documents, check if content changed
|
||||||
if not item['is_new'] and document.content_hash == content_hash:
|
if not item["is_new"] and document.content_hash == content_hash:
|
||||||
# Ensure status is ready
|
# Ensure status is ready
|
||||||
if not DocumentStatus.is_state(document.status, DocumentStatus.READY):
|
if not DocumentStatus.is_state(document.status, DocumentStatus.READY):
|
||||||
document.status = DocumentStatus.ready()
|
document.status = DocumentStatus.ready()
|
||||||
|
|
@ -1446,7 +1468,7 @@ async def _index_composio_drive_full_scan(
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Check for duplicate content hash (for new documents)
|
# Check for duplicate content hash (for new documents)
|
||||||
if item['is_new']:
|
if item["is_new"]:
|
||||||
with session.no_autoflush:
|
with session.no_autoflush:
|
||||||
duplicate_by_content = await check_duplicate_document_by_hash(
|
duplicate_by_content = await check_duplicate_document_by_hash(
|
||||||
session, content_hash
|
session, content_hash
|
||||||
|
|
@ -1462,13 +1484,15 @@ async def _index_composio_drive_full_scan(
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Heavy processing (LLM, embeddings, chunks)
|
# Heavy processing (LLM, embeddings, chunks)
|
||||||
user_llm = await get_user_long_context_llm(session, user_id, search_space_id)
|
user_llm = await get_user_long_context_llm(
|
||||||
|
session, user_id, search_space_id
|
||||||
|
)
|
||||||
|
|
||||||
if user_llm:
|
if user_llm:
|
||||||
document_metadata_for_summary = {
|
document_metadata_for_summary = {
|
||||||
"file_id": item['file_id'],
|
"file_id": item["file_id"],
|
||||||
"file_name": item['file_name'],
|
"file_name": item["file_name"],
|
||||||
"mime_type": item['mime_type'],
|
"mime_type": item["mime_type"],
|
||||||
"document_type": "Google Drive File (Composio)",
|
"document_type": "Google Drive File (Composio)",
|
||||||
}
|
}
|
||||||
summary_content, summary_embedding = await generate_document_summary(
|
summary_content, summary_embedding = await generate_document_summary(
|
||||||
|
|
@ -1476,20 +1500,22 @@ async def _index_composio_drive_full_scan(
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
summary_content = f"Google Drive File: {item['file_name']}\n\nType: {item['mime_type']}"
|
summary_content = f"Google Drive File: {item['file_name']}\n\nType: {item['mime_type']}"
|
||||||
summary_embedding = config.embedding_model_instance.embed(summary_content)
|
summary_embedding = config.embedding_model_instance.embed(
|
||||||
|
summary_content
|
||||||
|
)
|
||||||
|
|
||||||
chunks = await create_document_chunks(markdown_content)
|
chunks = await create_document_chunks(markdown_content)
|
||||||
|
|
||||||
# Update document to READY with actual content
|
# Update document to READY with actual content
|
||||||
document.title = item['file_name']
|
document.title = item["file_name"]
|
||||||
document.content = summary_content
|
document.content = summary_content
|
||||||
document.content_hash = content_hash
|
document.content_hash = content_hash
|
||||||
document.embedding = summary_embedding
|
document.embedding = summary_embedding
|
||||||
document.document_metadata = {
|
document.document_metadata = {
|
||||||
"file_id": item['file_id'],
|
"file_id": item["file_id"],
|
||||||
"file_name": item['file_name'],
|
"file_name": item["file_name"],
|
||||||
"FILE_NAME": item['file_name'],
|
"FILE_NAME": item["file_name"],
|
||||||
"mime_type": item['mime_type'],
|
"mime_type": item["mime_type"],
|
||||||
"connector_id": connector_id,
|
"connector_id": connector_id,
|
||||||
"source": "composio",
|
"source": "composio",
|
||||||
}
|
}
|
||||||
|
|
@ -1515,7 +1541,9 @@ async def _index_composio_drive_full_scan(
|
||||||
document.status = DocumentStatus.failed(str(e))
|
document.status = DocumentStatus.failed(str(e))
|
||||||
document.updated_at = get_current_timestamp()
|
document.updated_at = get_current_timestamp()
|
||||||
except Exception as status_error:
|
except Exception as status_error:
|
||||||
logger.error(f"Failed to update document status to failed: {status_error}")
|
logger.error(
|
||||||
|
f"Failed to update document status to failed: {status_error}"
|
||||||
|
)
|
||||||
documents_failed += 1
|
documents_failed += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -103,67 +103,70 @@ class PodcastStatus(str, Enum):
|
||||||
class DocumentStatus:
|
class DocumentStatus:
|
||||||
"""
|
"""
|
||||||
Helper class for document processing status (stored as JSONB).
|
Helper class for document processing status (stored as JSONB).
|
||||||
|
|
||||||
Status values:
|
Status values:
|
||||||
- {"state": "ready"} - Document is fully processed and searchable
|
- {"state": "ready"} - Document is fully processed and searchable
|
||||||
- {"state": "pending"} - Document is queued, waiting to be processed
|
- {"state": "pending"} - Document is queued, waiting to be processed
|
||||||
- {"state": "processing"} - Document is currently being processed (only 1 at a time)
|
- {"state": "processing"} - Document is currently being processed (only 1 at a time)
|
||||||
- {"state": "failed", "reason": "..."} - Processing failed with reason
|
- {"state": "failed", "reason": "..."} - Processing failed with reason
|
||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
document.status = DocumentStatus.pending()
|
document.status = DocumentStatus.pending()
|
||||||
document.status = DocumentStatus.processing()
|
document.status = DocumentStatus.processing()
|
||||||
document.status = DocumentStatus.ready()
|
document.status = DocumentStatus.ready()
|
||||||
document.status = DocumentStatus.failed("LLM rate limit exceeded")
|
document.status = DocumentStatus.failed("LLM rate limit exceeded")
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# State constants
|
# State constants
|
||||||
READY = "ready"
|
READY = "ready"
|
||||||
PENDING = "pending"
|
PENDING = "pending"
|
||||||
PROCESSING = "processing"
|
PROCESSING = "processing"
|
||||||
FAILED = "failed"
|
FAILED = "failed"
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def ready() -> dict:
|
def ready() -> dict:
|
||||||
"""Return status dict for a ready/searchable document."""
|
"""Return status dict for a ready/searchable document."""
|
||||||
return {"state": DocumentStatus.READY}
|
return {"state": DocumentStatus.READY}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def pending() -> dict:
|
def pending() -> dict:
|
||||||
"""Return status dict for a document waiting to be processed."""
|
"""Return status dict for a document waiting to be processed."""
|
||||||
return {"state": DocumentStatus.PENDING}
|
return {"state": DocumentStatus.PENDING}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def processing() -> dict:
|
def processing() -> dict:
|
||||||
"""Return status dict for a document being processed."""
|
"""Return status dict for a document being processed."""
|
||||||
return {"state": DocumentStatus.PROCESSING}
|
return {"state": DocumentStatus.PROCESSING}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def failed(reason: str, **extra_details) -> dict:
|
def failed(reason: str, **extra_details) -> dict:
|
||||||
"""
|
"""
|
||||||
Return status dict for a failed document.
|
Return status dict for a failed document.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
reason: Human-readable failure reason
|
reason: Human-readable failure reason
|
||||||
**extra_details: Optional additional details (duplicate_of, error_code, etc.)
|
**extra_details: Optional additional details (duplicate_of, error_code, etc.)
|
||||||
"""
|
"""
|
||||||
status = {"state": DocumentStatus.FAILED, "reason": reason[:500]} # Truncate long reasons
|
status = {
|
||||||
|
"state": DocumentStatus.FAILED,
|
||||||
|
"reason": reason[:500],
|
||||||
|
} # Truncate long reasons
|
||||||
if extra_details:
|
if extra_details:
|
||||||
status.update(extra_details)
|
status.update(extra_details)
|
||||||
return status
|
return status
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_state(status: dict | None) -> str | None:
|
def get_state(status: dict | None) -> str | None:
|
||||||
"""Extract state from status dict, returns None if invalid."""
|
"""Extract state from status dict, returns None if invalid."""
|
||||||
if status is None:
|
if status is None:
|
||||||
return None
|
return None
|
||||||
return status.get("state") if isinstance(status, dict) else None
|
return status.get("state") if isinstance(status, dict) else None
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_state(status: dict | None, state: str) -> bool:
|
def is_state(status: dict | None, state: str) -> bool:
|
||||||
"""Check if status matches a given state."""
|
"""Check if status matches a given state."""
|
||||||
return DocumentStatus.get_state(status) == state
|
return DocumentStatus.get_state(status) == state
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_failure_reason(status: dict | None) -> str | None:
|
def get_failure_reason(status: dict | None) -> str | None:
|
||||||
"""Extract failure reason from status dict."""
|
"""Extract failure reason from status dict."""
|
||||||
|
|
@ -866,7 +869,7 @@ class Document(BaseModel, TimestampMixin):
|
||||||
JSONB,
|
JSONB,
|
||||||
nullable=False,
|
nullable=False,
|
||||||
default=DocumentStatus.ready,
|
default=DocumentStatus.ready,
|
||||||
server_default=text("'{\"state\": \"ready\"}'::jsonb"),
|
server_default=text('\'{"state": "ready"}\'::jsonb'),
|
||||||
index=True,
|
index=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -114,11 +114,11 @@ async def create_documents_file_upload(
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Upload files as documents with real-time status tracking.
|
Upload files as documents with real-time status tracking.
|
||||||
|
|
||||||
Implements 2-phase document status updates for real-time UI feedback:
|
Implements 2-phase document status updates for real-time UI feedback:
|
||||||
- Phase 1: Create all documents with 'pending' status (visible in UI immediately via ElectricSQL)
|
- Phase 1: Create all documents with 'pending' status (visible in UI immediately via ElectricSQL)
|
||||||
- Phase 2: Celery processes each file: pending → processing → ready/failed
|
- Phase 2: Celery processes each file: pending → processing → ready/failed
|
||||||
|
|
||||||
Requires DOCUMENTS_CREATE permission.
|
Requires DOCUMENTS_CREATE permission.
|
||||||
"""
|
"""
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
@ -144,7 +144,9 @@ async def create_documents_file_upload(
|
||||||
raise HTTPException(status_code=400, detail="No files provided")
|
raise HTTPException(status_code=400, detail="No files provided")
|
||||||
|
|
||||||
created_documents: list[Document] = []
|
created_documents: list[Document] = []
|
||||||
files_to_process: list[tuple[Document, str, str]] = [] # (document, temp_path, filename)
|
files_to_process: list[
|
||||||
|
tuple[Document, str, str]
|
||||||
|
] = [] # (document, temp_path, filename)
|
||||||
skipped_duplicates = 0
|
skipped_duplicates = 0
|
||||||
|
|
||||||
# ===== PHASE 1: Create pending documents for all files =====
|
# ===== PHASE 1: Create pending documents for all files =====
|
||||||
|
|
@ -201,7 +203,9 @@ async def create_documents_file_upload(
|
||||||
)
|
)
|
||||||
session.add(document)
|
session.add(document)
|
||||||
created_documents.append(document)
|
created_documents.append(document)
|
||||||
files_to_process.append((document, temp_path, file.filename or "unknown"))
|
files_to_process.append(
|
||||||
|
(document, temp_path, file.filename or "unknown")
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
|
|
@ -348,15 +352,15 @@ async def read_documents(
|
||||||
created_by_name = None
|
created_by_name = None
|
||||||
if doc.created_by:
|
if doc.created_by:
|
||||||
created_by_name = doc.created_by.display_name or doc.created_by.email
|
created_by_name = doc.created_by.display_name or doc.created_by.email
|
||||||
|
|
||||||
# Parse status from JSONB
|
# Parse status from JSONB
|
||||||
status_data = None
|
status_data = None
|
||||||
if hasattr(doc, 'status') and doc.status:
|
if hasattr(doc, "status") and doc.status:
|
||||||
status_data = DocumentStatusSchema(
|
status_data = DocumentStatusSchema(
|
||||||
state=doc.status.get("state", "ready"),
|
state=doc.status.get("state", "ready"),
|
||||||
reason=doc.status.get("reason"),
|
reason=doc.status.get("reason"),
|
||||||
)
|
)
|
||||||
|
|
||||||
api_documents.append(
|
api_documents.append(
|
||||||
DocumentRead(
|
DocumentRead(
|
||||||
id=doc.id,
|
id=doc.id,
|
||||||
|
|
@ -503,15 +507,15 @@ async def search_documents(
|
||||||
created_by_name = None
|
created_by_name = None
|
||||||
if doc.created_by:
|
if doc.created_by:
|
||||||
created_by_name = doc.created_by.display_name or doc.created_by.email
|
created_by_name = doc.created_by.display_name or doc.created_by.email
|
||||||
|
|
||||||
# Parse status from JSONB
|
# Parse status from JSONB
|
||||||
status_data = None
|
status_data = None
|
||||||
if hasattr(doc, 'status') and doc.status:
|
if hasattr(doc, "status") and doc.status:
|
||||||
status_data = DocumentStatusSchema(
|
status_data = DocumentStatusSchema(
|
||||||
state=doc.status.get("state", "ready"),
|
state=doc.status.get("state", "ready"),
|
||||||
reason=doc.status.get("reason"),
|
reason=doc.status.get("reason"),
|
||||||
)
|
)
|
||||||
|
|
||||||
api_documents.append(
|
api_documents.append(
|
||||||
DocumentRead(
|
DocumentRead(
|
||||||
id=doc.id,
|
id=doc.id,
|
||||||
|
|
|
||||||
|
|
@ -43,6 +43,7 @@ class DocumentUpdate(DocumentBase):
|
||||||
|
|
||||||
class DocumentStatusSchema(BaseModel):
|
class DocumentStatusSchema(BaseModel):
|
||||||
"""Document processing status."""
|
"""Document processing status."""
|
||||||
|
|
||||||
state: str # "ready", "processing", "failed"
|
state: str # "ready", "processing", "failed"
|
||||||
reason: str | None = None
|
reason: str | None = None
|
||||||
|
|
||||||
|
|
@ -59,8 +60,12 @@ class DocumentRead(BaseModel):
|
||||||
updated_at: datetime | None
|
updated_at: datetime | None
|
||||||
search_space_id: int
|
search_space_id: int
|
||||||
created_by_id: UUID | None = None # User who created/uploaded this document
|
created_by_id: UUID | None = None # User who created/uploaded this document
|
||||||
created_by_name: str | None = None # Display name or email of the user who created this document
|
created_by_name: str | None = (
|
||||||
status: DocumentStatusSchema | None = None # Processing status (ready, processing, failed)
|
None # Display name or email of the user who created this document
|
||||||
|
)
|
||||||
|
status: DocumentStatusSchema | None = (
|
||||||
|
None # Processing status (ready, processing, failed)
|
||||||
|
)
|
||||||
|
|
||||||
model_config = ConfigDict(from_attributes=True)
|
model_config = ConfigDict(from_attributes=True)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1465,11 +1465,7 @@ class ConnectorService:
|
||||||
issue_key = metadata.get("issue_key", "")
|
issue_key = metadata.get("issue_key", "")
|
||||||
issue_title = metadata.get("issue_title", "Untitled Issue")
|
issue_title = metadata.get("issue_title", "Untitled Issue")
|
||||||
status = metadata.get("status", "")
|
status = metadata.get("status", "")
|
||||||
title = (
|
title = f"{issue_key} - {issue_title}" if issue_key else issue_title
|
||||||
f"{issue_key} - {issue_title}"
|
|
||||||
if issue_key
|
|
||||||
else issue_title
|
|
||||||
)
|
|
||||||
if status:
|
if status:
|
||||||
title += f" ({status})"
|
title += f" ({status})"
|
||||||
return title
|
return title
|
||||||
|
|
@ -2387,11 +2383,7 @@ class ConnectorService:
|
||||||
def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
|
def _title_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
|
||||||
event_name = metadata.get("event_name", "Untitled Event")
|
event_name = metadata.get("event_name", "Untitled Event")
|
||||||
start_time = metadata.get("start_time", "")
|
start_time = metadata.get("start_time", "")
|
||||||
return (
|
return f"{event_name} ({start_time})" if start_time else event_name
|
||||||
f"{event_name} ({start_time})"
|
|
||||||
if start_time
|
|
||||||
else event_name
|
|
||||||
)
|
|
||||||
|
|
||||||
def _url_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
|
def _url_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
|
||||||
return metadata.get("event_url", "") or ""
|
return metadata.get("event_url", "") or ""
|
||||||
|
|
|
||||||
|
|
@ -548,11 +548,11 @@ def process_file_upload_with_document_task(
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Celery task to process uploaded file with existing pending document.
|
Celery task to process uploaded file with existing pending document.
|
||||||
|
|
||||||
This task is used by the 2-phase document upload flow:
|
This task is used by the 2-phase document upload flow:
|
||||||
- Phase 1 (API): Creates pending document (visible in UI immediately)
|
- Phase 1 (API): Creates pending document (visible in UI immediately)
|
||||||
- Phase 2 (this task): Updates document status: pending → processing → ready/failed
|
- Phase 2 (this task): Updates document status: pending → processing → ready/failed
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
document_id: ID of the pending document created in Phase 1
|
document_id: ID of the pending document created in Phase 1
|
||||||
temp_path: Path to the uploaded file
|
temp_path: Path to the uploaded file
|
||||||
|
|
@ -634,7 +634,7 @@ async def _process_file_with_document(
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Process file and update existing pending document status.
|
Process file and update existing pending document status.
|
||||||
|
|
||||||
This function implements Phase 2 of the 2-phase document upload:
|
This function implements Phase 2 of the 2-phase document upload:
|
||||||
- Sets document status to 'processing' (shows spinner in UI)
|
- Sets document status to 'processing' (shows spinner in UI)
|
||||||
- Processes the file (parsing, embedding, chunking)
|
- Processes the file (parsing, embedding, chunking)
|
||||||
|
|
@ -669,11 +669,15 @@ async def _process_file_with_document(
|
||||||
file_size = os.path.getsize(temp_path)
|
file_size = os.path.getsize(temp_path)
|
||||||
logger.info(f"[_process_file_with_document] File size: {file_size} bytes")
|
logger.info(f"[_process_file_with_document] File size: {file_size} bytes")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"[_process_file_with_document] Could not get file size: {e}")
|
logger.warning(
|
||||||
|
f"[_process_file_with_document] Could not get file size: {e}"
|
||||||
|
)
|
||||||
file_size = None
|
file_size = None
|
||||||
|
|
||||||
# Create notification for document processing
|
# Create notification for document processing
|
||||||
logger.info(f"[_process_file_with_document] Creating notification for: {filename}")
|
logger.info(
|
||||||
|
f"[_process_file_with_document] Creating notification for: {filename}"
|
||||||
|
)
|
||||||
notification = (
|
notification = (
|
||||||
await NotificationService.document_processing.notify_processing_started(
|
await NotificationService.document_processing.notify_processing_started(
|
||||||
session=session,
|
session=session,
|
||||||
|
|
@ -822,7 +826,9 @@ async def _process_file_with_document(
|
||||||
if os.path.exists(temp_path):
|
if os.path.exists(temp_path):
|
||||||
try:
|
try:
|
||||||
os.unlink(temp_path)
|
os.unlink(temp_path)
|
||||||
logger.info(f"[_process_file_with_document] Cleaned up temp file: {temp_path}")
|
logger.info(
|
||||||
|
f"[_process_file_with_document] Cleaned up temp file: {temp_path}"
|
||||||
|
)
|
||||||
except Exception as cleanup_error:
|
except Exception as cleanup_error:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"[_process_file_with_document] Failed to clean up temp file: {cleanup_error}"
|
f"[_process_file_with_document] Failed to clean up temp file: {cleanup_error}"
|
||||||
|
|
|
||||||
|
|
@ -154,9 +154,7 @@ async def _cleanup_stale_notifications():
|
||||||
f"Found {len(stale_notification_ids)} stale connector indexing notifications "
|
f"Found {len(stale_notification_ids)} stale connector indexing notifications "
|
||||||
f"(no Redis heartbeat key): {stale_notification_ids}"
|
f"(no Redis heartbeat key): {stale_notification_ids}"
|
||||||
)
|
)
|
||||||
logger.info(
|
logger.info(f"Connector IDs for document cleanup: {stale_connector_ids}")
|
||||||
f"Connector IDs for document cleanup: {stale_connector_ids}"
|
|
||||||
)
|
|
||||||
|
|
||||||
# O(1) Batch UPDATE notifications using JSONB || operator
|
# O(1) Batch UPDATE notifications using JSONB || operator
|
||||||
# This merges the update data into existing notification_metadata
|
# This merges the update data into existing notification_metadata
|
||||||
|
|
|
||||||
|
|
@ -140,7 +140,9 @@ async def index_airtable_records(
|
||||||
log_entry, success_msg, {"bases_count": 0}
|
log_entry, success_msg, {"bases_count": 0}
|
||||||
)
|
)
|
||||||
# CRITICAL: Update timestamp even when no bases found so Electric SQL syncs
|
# CRITICAL: Update timestamp even when no bases found so Electric SQL syncs
|
||||||
await update_connector_last_indexed(session, connector, update_last_indexed)
|
await update_connector_last_indexed(
|
||||||
|
session, connector, update_last_indexed
|
||||||
|
)
|
||||||
await session.commit()
|
await session.commit()
|
||||||
return 0, None # Return None (not error) when no items found
|
return 0, None # Return None (not error) when no items found
|
||||||
|
|
||||||
|
|
@ -277,22 +279,28 @@ async def index_airtable_records(
|
||||||
# Document exists - check if content has changed
|
# Document exists - check if content has changed
|
||||||
if existing_document.content_hash == content_hash:
|
if existing_document.content_hash == content_hash:
|
||||||
# Ensure status is ready (might have been stuck in processing/pending)
|
# Ensure status is ready (might have been stuck in processing/pending)
|
||||||
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
|
if not DocumentStatus.is_state(
|
||||||
existing_document.status = DocumentStatus.ready()
|
existing_document.status, DocumentStatus.READY
|
||||||
|
):
|
||||||
|
existing_document.status = (
|
||||||
|
DocumentStatus.ready()
|
||||||
|
)
|
||||||
documents_skipped += 1
|
documents_skipped += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Queue existing document for update (will be set to processing in Phase 2)
|
# Queue existing document for update (will be set to processing in Phase 2)
|
||||||
records_to_process.append({
|
records_to_process.append(
|
||||||
'document': existing_document,
|
{
|
||||||
'is_new': False,
|
"document": existing_document,
|
||||||
'markdown_content': markdown_content,
|
"is_new": False,
|
||||||
'content_hash': content_hash,
|
"markdown_content": markdown_content,
|
||||||
'record_id': record_id,
|
"content_hash": content_hash,
|
||||||
'record': record,
|
"record_id": record_id,
|
||||||
'base_name': base_name,
|
"record": record,
|
||||||
'table_name': table_name,
|
"base_name": base_name,
|
||||||
})
|
"table_name": table_name,
|
||||||
|
}
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Document doesn't exist by unique_identifier_hash
|
# Document doesn't exist by unique_identifier_hash
|
||||||
|
|
@ -339,25 +347,31 @@ async def index_airtable_records(
|
||||||
session.add(document)
|
session.add(document)
|
||||||
new_documents_created = True
|
new_documents_created = True
|
||||||
|
|
||||||
records_to_process.append({
|
records_to_process.append(
|
||||||
'document': document,
|
{
|
||||||
'is_new': True,
|
"document": document,
|
||||||
'markdown_content': markdown_content,
|
"is_new": True,
|
||||||
'content_hash': content_hash,
|
"markdown_content": markdown_content,
|
||||||
'record_id': record_id,
|
"content_hash": content_hash,
|
||||||
'record': record,
|
"record_id": record_id,
|
||||||
'base_name': base_name,
|
"record": record,
|
||||||
'table_name': table_name,
|
"base_name": base_name,
|
||||||
})
|
"table_name": table_name,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in Phase 1 for record: {e!s}", exc_info=True)
|
logger.error(
|
||||||
|
f"Error in Phase 1 for record: {e!s}", exc_info=True
|
||||||
|
)
|
||||||
documents_failed += 1
|
documents_failed += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Commit all pending documents - they all appear in UI now
|
# Commit all pending documents - they all appear in UI now
|
||||||
if new_documents_created:
|
if new_documents_created:
|
||||||
logger.info(f"Phase 1: Committing {len([r for r in records_to_process if r['is_new']])} pending documents")
|
logger.info(
|
||||||
|
f"Phase 1: Committing {len([r for r in records_to_process if r['is_new']])} pending documents"
|
||||||
|
)
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
# =======================================================================
|
# =======================================================================
|
||||||
|
|
@ -374,7 +388,7 @@ async def index_airtable_records(
|
||||||
await on_heartbeat_callback(documents_indexed)
|
await on_heartbeat_callback(documents_indexed)
|
||||||
last_heartbeat_time = current_time
|
last_heartbeat_time = current_time
|
||||||
|
|
||||||
document = item['document']
|
document = item["document"]
|
||||||
try:
|
try:
|
||||||
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
||||||
document.status = DocumentStatus.processing()
|
document.status = DocumentStatus.processing()
|
||||||
|
|
@ -387,13 +401,18 @@ async def index_airtable_records(
|
||||||
|
|
||||||
if user_llm:
|
if user_llm:
|
||||||
document_metadata_for_summary = {
|
document_metadata_for_summary = {
|
||||||
"record_id": item['record_id'],
|
"record_id": item["record_id"],
|
||||||
"created_time": item['record'].get("CREATED_TIME()", ""),
|
"created_time": item["record"].get("CREATED_TIME()", ""),
|
||||||
"document_type": "Airtable Record",
|
"document_type": "Airtable Record",
|
||||||
"connector_type": "Airtable",
|
"connector_type": "Airtable",
|
||||||
}
|
}
|
||||||
summary_content, summary_embedding = await generate_document_summary(
|
(
|
||||||
item['markdown_content'], user_llm, document_metadata_for_summary
|
summary_content,
|
||||||
|
summary_embedding,
|
||||||
|
) = await generate_document_summary(
|
||||||
|
item["markdown_content"],
|
||||||
|
user_llm,
|
||||||
|
document_metadata_for_summary,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# Fallback to simple summary if no LLM configured
|
# Fallback to simple summary if no LLM configured
|
||||||
|
|
@ -402,18 +421,18 @@ async def index_airtable_records(
|
||||||
summary_content
|
summary_content
|
||||||
)
|
)
|
||||||
|
|
||||||
chunks = await create_document_chunks(item['markdown_content'])
|
chunks = await create_document_chunks(item["markdown_content"])
|
||||||
|
|
||||||
# Update document to READY with actual content
|
# Update document to READY with actual content
|
||||||
document.title = item['record_id']
|
document.title = item["record_id"]
|
||||||
document.content = summary_content
|
document.content = summary_content
|
||||||
document.content_hash = item['content_hash']
|
document.content_hash = item["content_hash"]
|
||||||
document.embedding = summary_embedding
|
document.embedding = summary_embedding
|
||||||
document.document_metadata = {
|
document.document_metadata = {
|
||||||
"record_id": item['record_id'],
|
"record_id": item["record_id"],
|
||||||
"created_time": item['record'].get("CREATED_TIME()", ""),
|
"created_time": item["record"].get("CREATED_TIME()", ""),
|
||||||
"base_name": item['base_name'],
|
"base_name": item["base_name"],
|
||||||
"table_name": item['table_name'],
|
"table_name": item["table_name"],
|
||||||
"connector_id": connector_id,
|
"connector_id": connector_id,
|
||||||
}
|
}
|
||||||
safe_set_chunks(document, chunks)
|
safe_set_chunks(document, chunks)
|
||||||
|
|
@ -430,13 +449,17 @@ async def index_airtable_records(
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error processing Airtable record: {e!s}", exc_info=True)
|
logger.error(
|
||||||
|
f"Error processing Airtable record: {e!s}", exc_info=True
|
||||||
|
)
|
||||||
# Mark document as failed with reason (visible in UI)
|
# Mark document as failed with reason (visible in UI)
|
||||||
try:
|
try:
|
||||||
document.status = DocumentStatus.failed(str(e))
|
document.status = DocumentStatus.failed(str(e))
|
||||||
document.updated_at = get_current_timestamp()
|
document.updated_at = get_current_timestamp()
|
||||||
except Exception as status_error:
|
except Exception as status_error:
|
||||||
logger.error(f"Failed to update document status to failed: {status_error}")
|
logger.error(
|
||||||
|
f"Failed to update document status to failed: {status_error}"
|
||||||
|
)
|
||||||
documents_failed += 1
|
documents_failed += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
@ -446,7 +469,9 @@ async def index_airtable_records(
|
||||||
total_processed = documents_indexed
|
total_processed = documents_indexed
|
||||||
|
|
||||||
# Final commit to ensure all documents are persisted (safety net)
|
# Final commit to ensure all documents are persisted (safety net)
|
||||||
logger.info(f"Final commit: Total {documents_indexed} Airtable records processed")
|
logger.info(
|
||||||
|
f"Final commit: Total {documents_indexed} Airtable records processed"
|
||||||
|
)
|
||||||
try:
|
try:
|
||||||
await session.commit()
|
await session.commit()
|
||||||
logger.info(
|
logger.info(
|
||||||
|
|
|
||||||
|
|
@ -31,29 +31,30 @@ def get_current_timestamp() -> datetime:
|
||||||
def safe_set_chunks(document: Document, chunks: list) -> None:
|
def safe_set_chunks(document: Document, chunks: list) -> None:
|
||||||
"""
|
"""
|
||||||
Safely assign chunks to a document without triggering lazy loading.
|
Safely assign chunks to a document without triggering lazy loading.
|
||||||
|
|
||||||
ALWAYS use this instead of `document.chunks = chunks` to avoid
|
ALWAYS use this instead of `document.chunks = chunks` to avoid
|
||||||
SQLAlchemy async errors (MissingGreenlet / greenlet_spawn).
|
SQLAlchemy async errors (MissingGreenlet / greenlet_spawn).
|
||||||
|
|
||||||
Why this is needed:
|
Why this is needed:
|
||||||
- Direct assignment `document.chunks = chunks` triggers SQLAlchemy to
|
- Direct assignment `document.chunks = chunks` triggers SQLAlchemy to
|
||||||
load the OLD chunks first (for comparison/orphan detection)
|
load the OLD chunks first (for comparison/orphan detection)
|
||||||
- This lazy loading fails in async context with asyncpg driver
|
- This lazy loading fails in async context with asyncpg driver
|
||||||
- set_committed_value bypasses this by setting the value directly
|
- set_committed_value bypasses this by setting the value directly
|
||||||
|
|
||||||
This function is safe regardless of how the document was loaded
|
This function is safe regardless of how the document was loaded
|
||||||
(with or without selectinload).
|
(with or without selectinload).
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
document: The Document object to update
|
document: The Document object to update
|
||||||
chunks: List of Chunk objects to assign
|
chunks: List of Chunk objects to assign
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
# Instead of: document.chunks = chunks (DANGEROUS!)
|
# Instead of: document.chunks = chunks (DANGEROUS!)
|
||||||
safe_set_chunks(document, chunks) # Always safe
|
safe_set_chunks(document, chunks) # Always safe
|
||||||
"""
|
"""
|
||||||
from sqlalchemy.orm.attributes import set_committed_value
|
from sqlalchemy.orm.attributes import set_committed_value
|
||||||
set_committed_value(document, 'chunks', chunks)
|
|
||||||
|
set_committed_value(document, "chunks", chunks)
|
||||||
|
|
||||||
|
|
||||||
async def check_duplicate_document_by_hash(
|
async def check_duplicate_document_by_hash(
|
||||||
|
|
|
||||||
|
|
@ -261,7 +261,9 @@ async def index_bookstack_pages(
|
||||||
# Document exists - check if content has changed
|
# Document exists - check if content has changed
|
||||||
if existing_document.content_hash == content_hash:
|
if existing_document.content_hash == content_hash:
|
||||||
# Ensure status is ready (might have been stuck in processing/pending)
|
# Ensure status is ready (might have been stuck in processing/pending)
|
||||||
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
|
if not DocumentStatus.is_state(
|
||||||
|
existing_document.status, DocumentStatus.READY
|
||||||
|
):
|
||||||
existing_document.status = DocumentStatus.ready()
|
existing_document.status = DocumentStatus.ready()
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Document for BookStack page {page_name} unchanged. Skipping."
|
f"Document for BookStack page {page_name} unchanged. Skipping."
|
||||||
|
|
@ -270,20 +272,22 @@ async def index_bookstack_pages(
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Queue existing document for update (will be set to processing in Phase 2)
|
# Queue existing document for update (will be set to processing in Phase 2)
|
||||||
pages_to_process.append({
|
pages_to_process.append(
|
||||||
'document': existing_document,
|
{
|
||||||
'is_new': False,
|
"document": existing_document,
|
||||||
'page_id': page_id,
|
"is_new": False,
|
||||||
'page_name': page_name,
|
"page_id": page_id,
|
||||||
'page_slug': page_slug,
|
"page_name": page_name,
|
||||||
'book_id': book_id,
|
"page_slug": page_slug,
|
||||||
'book_slug': book_slug,
|
"book_id": book_id,
|
||||||
'chapter_id': chapter_id,
|
"book_slug": book_slug,
|
||||||
'page_url': page_url,
|
"chapter_id": chapter_id,
|
||||||
'page_content': page_content,
|
"page_url": page_url,
|
||||||
'full_content': full_content,
|
"page_content": page_content,
|
||||||
'content_hash': content_hash,
|
"full_content": full_content,
|
||||||
})
|
"content_hash": content_hash,
|
||||||
|
}
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Document doesn't exist by unique_identifier_hash
|
# Document doesn't exist by unique_identifier_hash
|
||||||
|
|
@ -331,20 +335,22 @@ async def index_bookstack_pages(
|
||||||
session.add(document)
|
session.add(document)
|
||||||
new_documents_created = True
|
new_documents_created = True
|
||||||
|
|
||||||
pages_to_process.append({
|
pages_to_process.append(
|
||||||
'document': document,
|
{
|
||||||
'is_new': True,
|
"document": document,
|
||||||
'page_id': page_id,
|
"is_new": True,
|
||||||
'page_name': page_name,
|
"page_id": page_id,
|
||||||
'page_slug': page_slug,
|
"page_name": page_name,
|
||||||
'book_id': book_id,
|
"page_slug": page_slug,
|
||||||
'book_slug': book_slug,
|
"book_id": book_id,
|
||||||
'chapter_id': chapter_id,
|
"book_slug": book_slug,
|
||||||
'page_url': page_url,
|
"chapter_id": chapter_id,
|
||||||
'page_content': page_content,
|
"page_url": page_url,
|
||||||
'full_content': full_content,
|
"page_content": page_content,
|
||||||
'content_hash': content_hash,
|
"full_content": full_content,
|
||||||
})
|
"content_hash": content_hash,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in Phase 1 for page: {e!s}", exc_info=True)
|
logger.error(f"Error in Phase 1 for page: {e!s}", exc_info=True)
|
||||||
|
|
@ -353,7 +359,9 @@ async def index_bookstack_pages(
|
||||||
|
|
||||||
# Commit all pending documents - they all appear in UI now
|
# Commit all pending documents - they all appear in UI now
|
||||||
if new_documents_created:
|
if new_documents_created:
|
||||||
logger.info(f"Phase 1: Committing {len([p for p in pages_to_process if p['is_new']])} pending documents")
|
logger.info(
|
||||||
|
f"Phase 1: Committing {len([p for p in pages_to_process if p['is_new']])} pending documents"
|
||||||
|
)
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
# =======================================================================
|
# =======================================================================
|
||||||
|
|
@ -370,7 +378,7 @@ async def index_bookstack_pages(
|
||||||
await on_heartbeat_callback(documents_indexed)
|
await on_heartbeat_callback(documents_indexed)
|
||||||
last_heartbeat_time = current_time
|
last_heartbeat_time = current_time
|
||||||
|
|
||||||
document = item['document']
|
document = item["document"]
|
||||||
try:
|
try:
|
||||||
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
||||||
document.status = DocumentStatus.processing()
|
document.status = DocumentStatus.processing()
|
||||||
|
|
@ -383,23 +391,23 @@ async def index_bookstack_pages(
|
||||||
|
|
||||||
# Build document metadata
|
# Build document metadata
|
||||||
doc_metadata = {
|
doc_metadata = {
|
||||||
"page_id": item['page_id'],
|
"page_id": item["page_id"],
|
||||||
"page_name": item['page_name'],
|
"page_name": item["page_name"],
|
||||||
"page_slug": item['page_slug'],
|
"page_slug": item["page_slug"],
|
||||||
"book_id": item['book_id'],
|
"book_id": item["book_id"],
|
||||||
"book_slug": item['book_slug'],
|
"book_slug": item["book_slug"],
|
||||||
"chapter_id": item['chapter_id'],
|
"chapter_id": item["chapter_id"],
|
||||||
"base_url": bookstack_base_url,
|
"base_url": bookstack_base_url,
|
||||||
"page_url": item['page_url'],
|
"page_url": item["page_url"],
|
||||||
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||||
"connector_id": connector_id,
|
"connector_id": connector_id,
|
||||||
}
|
}
|
||||||
|
|
||||||
if user_llm:
|
if user_llm:
|
||||||
summary_metadata = {
|
summary_metadata = {
|
||||||
"page_name": item['page_name'],
|
"page_name": item["page_name"],
|
||||||
"page_id": item['page_id'],
|
"page_id": item["page_id"],
|
||||||
"book_id": item['book_id'],
|
"book_id": item["book_id"],
|
||||||
"document_type": "BookStack Page",
|
"document_type": "BookStack Page",
|
||||||
"connector_type": "BookStack",
|
"connector_type": "BookStack",
|
||||||
}
|
}
|
||||||
|
|
@ -407,17 +415,15 @@ async def index_bookstack_pages(
|
||||||
summary_content,
|
summary_content,
|
||||||
summary_embedding,
|
summary_embedding,
|
||||||
) = await generate_document_summary(
|
) = await generate_document_summary(
|
||||||
item['full_content'], user_llm, summary_metadata
|
item["full_content"], user_llm, summary_metadata
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# Fallback to simple summary if no LLM configured
|
# Fallback to simple summary if no LLM configured
|
||||||
summary_content = (
|
summary_content = f"BookStack Page: {item['page_name']}\n\nBook ID: {item['book_id']}\n\n"
|
||||||
f"BookStack Page: {item['page_name']}\n\nBook ID: {item['book_id']}\n\n"
|
if item["page_content"]:
|
||||||
)
|
|
||||||
if item['page_content']:
|
|
||||||
# Take first 1000 characters of content for summary
|
# Take first 1000 characters of content for summary
|
||||||
content_preview = item['page_content'][:1000]
|
content_preview = item["page_content"][:1000]
|
||||||
if len(item['page_content']) > 1000:
|
if len(item["page_content"]) > 1000:
|
||||||
content_preview += "..."
|
content_preview += "..."
|
||||||
summary_content += f"Content Preview: {content_preview}\n\n"
|
summary_content += f"Content Preview: {content_preview}\n\n"
|
||||||
summary_embedding = config.embedding_model_instance.embed(
|
summary_embedding = config.embedding_model_instance.embed(
|
||||||
|
|
@ -425,12 +431,12 @@ async def index_bookstack_pages(
|
||||||
)
|
)
|
||||||
|
|
||||||
# Process chunks - using the full page content
|
# Process chunks - using the full page content
|
||||||
chunks = await create_document_chunks(item['full_content'])
|
chunks = await create_document_chunks(item["full_content"])
|
||||||
|
|
||||||
# Update document to READY with actual content
|
# Update document to READY with actual content
|
||||||
document.title = item['page_name']
|
document.title = item["page_name"]
|
||||||
document.content = summary_content
|
document.content = summary_content
|
||||||
document.content_hash = item['content_hash']
|
document.content_hash = item["content_hash"]
|
||||||
document.embedding = summary_embedding
|
document.embedding = summary_embedding
|
||||||
document.document_metadata = doc_metadata
|
document.document_metadata = doc_metadata
|
||||||
safe_set_chunks(document, chunks)
|
safe_set_chunks(document, chunks)
|
||||||
|
|
@ -456,7 +462,9 @@ async def index_bookstack_pages(
|
||||||
document.status = DocumentStatus.failed(str(e))
|
document.status = DocumentStatus.failed(str(e))
|
||||||
document.updated_at = get_current_timestamp()
|
document.updated_at = get_current_timestamp()
|
||||||
except Exception as status_error:
|
except Exception as status_error:
|
||||||
logger.error(f"Failed to update document status to failed: {status_error}")
|
logger.error(
|
||||||
|
f"Failed to update document status to failed: {status_error}"
|
||||||
|
)
|
||||||
skipped_pages.append(
|
skipped_pages.append(
|
||||||
f"{item.get('page_name', 'Unknown')} (processing error)"
|
f"{item.get('page_name', 'Unknown')} (processing error)"
|
||||||
)
|
)
|
||||||
|
|
@ -473,7 +481,9 @@ async def index_bookstack_pages(
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
await session.commit()
|
await session.commit()
|
||||||
logger.info("Successfully committed all BookStack document changes to database")
|
logger.info(
|
||||||
|
"Successfully committed all BookStack document changes to database"
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Handle any remaining integrity errors gracefully (race conditions, etc.)
|
# Handle any remaining integrity errors gracefully (race conditions, etc.)
|
||||||
if (
|
if (
|
||||||
|
|
|
||||||
|
|
@ -260,7 +260,9 @@ async def index_clickup_tasks(
|
||||||
# Document exists - check if content has changed
|
# Document exists - check if content has changed
|
||||||
if existing_document.content_hash == content_hash:
|
if existing_document.content_hash == content_hash:
|
||||||
# Ensure status is ready (might have been stuck in processing/pending)
|
# Ensure status is ready (might have been stuck in processing/pending)
|
||||||
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
|
if not DocumentStatus.is_state(
|
||||||
|
existing_document.status, DocumentStatus.READY
|
||||||
|
):
|
||||||
existing_document.status = DocumentStatus.ready()
|
existing_document.status = DocumentStatus.ready()
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Document for ClickUp task {task_name} unchanged. Skipping."
|
f"Document for ClickUp task {task_name} unchanged. Skipping."
|
||||||
|
|
@ -272,22 +274,24 @@ async def index_clickup_tasks(
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Content changed for ClickUp task {task_name}. Queuing for update."
|
f"Content changed for ClickUp task {task_name}. Queuing for update."
|
||||||
)
|
)
|
||||||
tasks_to_process.append({
|
tasks_to_process.append(
|
||||||
'document': existing_document,
|
{
|
||||||
'is_new': False,
|
"document": existing_document,
|
||||||
'task_content': task_content,
|
"is_new": False,
|
||||||
'content_hash': content_hash,
|
"task_content": task_content,
|
||||||
'task_id': task_id,
|
"content_hash": content_hash,
|
||||||
'task_name': task_name,
|
"task_id": task_id,
|
||||||
'task_status': task_status,
|
"task_name": task_name,
|
||||||
'task_priority': task_priority,
|
"task_status": task_status,
|
||||||
'task_list_name': task_list_name,
|
"task_priority": task_priority,
|
||||||
'task_space_name': task_space_name,
|
"task_list_name": task_list_name,
|
||||||
'task_assignees': task_assignees,
|
"task_space_name": task_space_name,
|
||||||
'task_due_date': task_due_date,
|
"task_assignees": task_assignees,
|
||||||
'task_created': task_created,
|
"task_due_date": task_due_date,
|
||||||
'task_updated': task_updated,
|
"task_created": task_created,
|
||||||
})
|
"task_updated": task_updated,
|
||||||
|
}
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Document doesn't exist by unique_identifier_hash
|
# Document doesn't exist by unique_identifier_hash
|
||||||
|
|
@ -335,22 +339,24 @@ async def index_clickup_tasks(
|
||||||
session.add(document)
|
session.add(document)
|
||||||
new_documents_created = True
|
new_documents_created = True
|
||||||
|
|
||||||
tasks_to_process.append({
|
tasks_to_process.append(
|
||||||
'document': document,
|
{
|
||||||
'is_new': True,
|
"document": document,
|
||||||
'task_content': task_content,
|
"is_new": True,
|
||||||
'content_hash': content_hash,
|
"task_content": task_content,
|
||||||
'task_id': task_id,
|
"content_hash": content_hash,
|
||||||
'task_name': task_name,
|
"task_id": task_id,
|
||||||
'task_status': task_status,
|
"task_name": task_name,
|
||||||
'task_priority': task_priority,
|
"task_status": task_status,
|
||||||
'task_list_name': task_list_name,
|
"task_priority": task_priority,
|
||||||
'task_space_name': task_space_name,
|
"task_list_name": task_list_name,
|
||||||
'task_assignees': task_assignees,
|
"task_space_name": task_space_name,
|
||||||
'task_due_date': task_due_date,
|
"task_assignees": task_assignees,
|
||||||
'task_created': task_created,
|
"task_due_date": task_due_date,
|
||||||
'task_updated': task_updated,
|
"task_created": task_created,
|
||||||
})
|
"task_updated": task_updated,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(
|
logger.error(
|
||||||
|
|
@ -362,7 +368,9 @@ async def index_clickup_tasks(
|
||||||
|
|
||||||
# Commit all pending documents - they all appear in UI now
|
# Commit all pending documents - they all appear in UI now
|
||||||
if new_documents_created:
|
if new_documents_created:
|
||||||
logger.info(f"Phase 1: Committing {len([t for t in tasks_to_process if t['is_new']])} pending documents")
|
logger.info(
|
||||||
|
f"Phase 1: Committing {len([t for t in tasks_to_process if t['is_new']])} pending documents"
|
||||||
|
)
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
# =======================================================================
|
# =======================================================================
|
||||||
|
|
@ -379,7 +387,7 @@ async def index_clickup_tasks(
|
||||||
await on_heartbeat_callback(documents_indexed)
|
await on_heartbeat_callback(documents_indexed)
|
||||||
last_heartbeat_time = current_time
|
last_heartbeat_time = current_time
|
||||||
|
|
||||||
document = item['document']
|
document = item["document"]
|
||||||
try:
|
try:
|
||||||
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
||||||
document.status = DocumentStatus.processing()
|
document.status = DocumentStatus.processing()
|
||||||
|
|
@ -392,13 +400,13 @@ async def index_clickup_tasks(
|
||||||
|
|
||||||
if user_llm:
|
if user_llm:
|
||||||
document_metadata_for_summary = {
|
document_metadata_for_summary = {
|
||||||
"task_id": item['task_id'],
|
"task_id": item["task_id"],
|
||||||
"task_name": item['task_name'],
|
"task_name": item["task_name"],
|
||||||
"task_status": item['task_status'],
|
"task_status": item["task_status"],
|
||||||
"task_priority": item['task_priority'],
|
"task_priority": item["task_priority"],
|
||||||
"task_list": item['task_list_name'],
|
"task_list": item["task_list_name"],
|
||||||
"task_space": item['task_space_name'],
|
"task_space": item["task_space_name"],
|
||||||
"assignees": len(item['task_assignees']),
|
"assignees": len(item["task_assignees"]),
|
||||||
"document_type": "ClickUp Task",
|
"document_type": "ClickUp Task",
|
||||||
"connector_type": "ClickUp",
|
"connector_type": "ClickUp",
|
||||||
}
|
}
|
||||||
|
|
@ -406,30 +414,30 @@ async def index_clickup_tasks(
|
||||||
summary_content,
|
summary_content,
|
||||||
summary_embedding,
|
summary_embedding,
|
||||||
) = await generate_document_summary(
|
) = await generate_document_summary(
|
||||||
item['task_content'], user_llm, document_metadata_for_summary
|
item["task_content"], user_llm, document_metadata_for_summary
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
summary_content = item['task_content']
|
summary_content = item["task_content"]
|
||||||
summary_embedding = config.embedding_model_instance.embed(
|
summary_embedding = config.embedding_model_instance.embed(
|
||||||
item['task_content']
|
item["task_content"]
|
||||||
)
|
)
|
||||||
|
|
||||||
chunks = await create_document_chunks(item['task_content'])
|
chunks = await create_document_chunks(item["task_content"])
|
||||||
|
|
||||||
# Update document to READY with actual content
|
# Update document to READY with actual content
|
||||||
document.title = item['task_name']
|
document.title = item["task_name"]
|
||||||
document.content = summary_content
|
document.content = summary_content
|
||||||
document.content_hash = item['content_hash']
|
document.content_hash = item["content_hash"]
|
||||||
document.embedding = summary_embedding
|
document.embedding = summary_embedding
|
||||||
document.document_metadata = {
|
document.document_metadata = {
|
||||||
"task_id": item['task_id'],
|
"task_id": item["task_id"],
|
||||||
"task_name": item['task_name'],
|
"task_name": item["task_name"],
|
||||||
"task_status": item['task_status'],
|
"task_status": item["task_status"],
|
||||||
"task_priority": item['task_priority'],
|
"task_priority": item["task_priority"],
|
||||||
"task_assignees": item['task_assignees'],
|
"task_assignees": item["task_assignees"],
|
||||||
"task_due_date": item['task_due_date'],
|
"task_due_date": item["task_due_date"],
|
||||||
"task_created": item['task_created'],
|
"task_created": item["task_created"],
|
||||||
"task_updated": item['task_updated'],
|
"task_updated": item["task_updated"],
|
||||||
"connector_id": connector_id,
|
"connector_id": connector_id,
|
||||||
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||||
}
|
}
|
||||||
|
|
@ -456,7 +464,9 @@ async def index_clickup_tasks(
|
||||||
document.status = DocumentStatus.failed(str(e))
|
document.status = DocumentStatus.failed(str(e))
|
||||||
document.updated_at = get_current_timestamp()
|
document.updated_at = get_current_timestamp()
|
||||||
except Exception as status_error:
|
except Exception as status_error:
|
||||||
logger.error(f"Failed to update document status to failed: {status_error}")
|
logger.error(
|
||||||
|
f"Failed to update document status to failed: {status_error}"
|
||||||
|
)
|
||||||
documents_failed += 1
|
documents_failed += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -262,23 +262,27 @@ async def index_confluence_pages(
|
||||||
# Document exists - check if content has changed
|
# Document exists - check if content has changed
|
||||||
if existing_document.content_hash == content_hash:
|
if existing_document.content_hash == content_hash:
|
||||||
# Ensure status is ready (might have been stuck in processing/pending)
|
# Ensure status is ready (might have been stuck in processing/pending)
|
||||||
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
|
if not DocumentStatus.is_state(
|
||||||
|
existing_document.status, DocumentStatus.READY
|
||||||
|
):
|
||||||
existing_document.status = DocumentStatus.ready()
|
existing_document.status = DocumentStatus.ready()
|
||||||
documents_skipped += 1
|
documents_skipped += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Queue existing document for update (will be set to processing in Phase 2)
|
# Queue existing document for update (will be set to processing in Phase 2)
|
||||||
pages_to_process.append({
|
pages_to_process.append(
|
||||||
'document': existing_document,
|
{
|
||||||
'is_new': False,
|
"document": existing_document,
|
||||||
'full_content': full_content,
|
"is_new": False,
|
||||||
'page_content': page_content,
|
"full_content": full_content,
|
||||||
'content_hash': content_hash,
|
"page_content": page_content,
|
||||||
'page_id': page_id,
|
"content_hash": content_hash,
|
||||||
'page_title': page_title,
|
"page_id": page_id,
|
||||||
'space_id': space_id,
|
"page_title": page_title,
|
||||||
'comment_count': comment_count,
|
"space_id": space_id,
|
||||||
})
|
"comment_count": comment_count,
|
||||||
|
}
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Document doesn't exist by unique_identifier_hash
|
# Document doesn't exist by unique_identifier_hash
|
||||||
|
|
@ -323,17 +327,19 @@ async def index_confluence_pages(
|
||||||
session.add(document)
|
session.add(document)
|
||||||
new_documents_created = True
|
new_documents_created = True
|
||||||
|
|
||||||
pages_to_process.append({
|
pages_to_process.append(
|
||||||
'document': document,
|
{
|
||||||
'is_new': True,
|
"document": document,
|
||||||
'full_content': full_content,
|
"is_new": True,
|
||||||
'page_content': page_content,
|
"full_content": full_content,
|
||||||
'content_hash': content_hash,
|
"page_content": page_content,
|
||||||
'page_id': page_id,
|
"content_hash": content_hash,
|
||||||
'page_title': page_title,
|
"page_id": page_id,
|
||||||
'space_id': space_id,
|
"page_title": page_title,
|
||||||
'comment_count': comment_count,
|
"space_id": space_id,
|
||||||
})
|
"comment_count": comment_count,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in Phase 1 for page: {e!s}", exc_info=True)
|
logger.error(f"Error in Phase 1 for page: {e!s}", exc_info=True)
|
||||||
|
|
@ -342,7 +348,9 @@ async def index_confluence_pages(
|
||||||
|
|
||||||
# Commit all pending documents - they all appear in UI now
|
# Commit all pending documents - they all appear in UI now
|
||||||
if new_documents_created:
|
if new_documents_created:
|
||||||
logger.info(f"Phase 1: Committing {len([p for p in pages_to_process if p['is_new']])} pending documents")
|
logger.info(
|
||||||
|
f"Phase 1: Committing {len([p for p in pages_to_process if p['is_new']])} pending documents"
|
||||||
|
)
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
# =======================================================================
|
# =======================================================================
|
||||||
|
|
@ -359,7 +367,7 @@ async def index_confluence_pages(
|
||||||
await on_heartbeat_callback(documents_indexed)
|
await on_heartbeat_callback(documents_indexed)
|
||||||
last_heartbeat_time = current_time
|
last_heartbeat_time = current_time
|
||||||
|
|
||||||
document = item['document']
|
document = item["document"]
|
||||||
try:
|
try:
|
||||||
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
||||||
document.status = DocumentStatus.processing()
|
document.status = DocumentStatus.processing()
|
||||||
|
|
@ -372,10 +380,10 @@ async def index_confluence_pages(
|
||||||
|
|
||||||
if user_llm:
|
if user_llm:
|
||||||
document_metadata = {
|
document_metadata = {
|
||||||
"page_title": item['page_title'],
|
"page_title": item["page_title"],
|
||||||
"page_id": item['page_id'],
|
"page_id": item["page_id"],
|
||||||
"space_id": item['space_id'],
|
"space_id": item["space_id"],
|
||||||
"comment_count": item['comment_count'],
|
"comment_count": item["comment_count"],
|
||||||
"document_type": "Confluence Page",
|
"document_type": "Confluence Page",
|
||||||
"connector_type": "Confluence",
|
"connector_type": "Confluence",
|
||||||
}
|
}
|
||||||
|
|
@ -383,17 +391,15 @@ async def index_confluence_pages(
|
||||||
summary_content,
|
summary_content,
|
||||||
summary_embedding,
|
summary_embedding,
|
||||||
) = await generate_document_summary(
|
) = await generate_document_summary(
|
||||||
item['full_content'], user_llm, document_metadata
|
item["full_content"], user_llm, document_metadata
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# Fallback to simple summary if no LLM configured
|
# Fallback to simple summary if no LLM configured
|
||||||
summary_content = (
|
summary_content = f"Confluence Page: {item['page_title']}\n\nSpace ID: {item['space_id']}\n\n"
|
||||||
f"Confluence Page: {item['page_title']}\n\nSpace ID: {item['space_id']}\n\n"
|
if item["page_content"]:
|
||||||
)
|
|
||||||
if item['page_content']:
|
|
||||||
# Take first 1000 characters of content for summary
|
# Take first 1000 characters of content for summary
|
||||||
content_preview = item['page_content'][:1000]
|
content_preview = item["page_content"][:1000]
|
||||||
if len(item['page_content']) > 1000:
|
if len(item["page_content"]) > 1000:
|
||||||
content_preview += "..."
|
content_preview += "..."
|
||||||
summary_content += f"Content Preview: {content_preview}\n\n"
|
summary_content += f"Content Preview: {content_preview}\n\n"
|
||||||
summary_content += f"Comments: {item['comment_count']}"
|
summary_content += f"Comments: {item['comment_count']}"
|
||||||
|
|
@ -402,18 +408,18 @@ async def index_confluence_pages(
|
||||||
)
|
)
|
||||||
|
|
||||||
# Process chunks - using the full page content with comments
|
# Process chunks - using the full page content with comments
|
||||||
chunks = await create_document_chunks(item['full_content'])
|
chunks = await create_document_chunks(item["full_content"])
|
||||||
|
|
||||||
# Update document to READY with actual content
|
# Update document to READY with actual content
|
||||||
document.title = item['page_title']
|
document.title = item["page_title"]
|
||||||
document.content = summary_content
|
document.content = summary_content
|
||||||
document.content_hash = item['content_hash']
|
document.content_hash = item["content_hash"]
|
||||||
document.embedding = summary_embedding
|
document.embedding = summary_embedding
|
||||||
document.document_metadata = {
|
document.document_metadata = {
|
||||||
"page_id": item['page_id'],
|
"page_id": item["page_id"],
|
||||||
"page_title": item['page_title'],
|
"page_title": item["page_title"],
|
||||||
"space_id": item['space_id'],
|
"space_id": item["space_id"],
|
||||||
"comment_count": item['comment_count'],
|
"comment_count": item["comment_count"],
|
||||||
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||||
"connector_id": connector_id,
|
"connector_id": connector_id,
|
||||||
}
|
}
|
||||||
|
|
@ -440,7 +446,9 @@ async def index_confluence_pages(
|
||||||
document.status = DocumentStatus.failed(str(e))
|
document.status = DocumentStatus.failed(str(e))
|
||||||
document.updated_at = get_current_timestamp()
|
document.updated_at = get_current_timestamp()
|
||||||
except Exception as status_error:
|
except Exception as status_error:
|
||||||
logger.error(f"Failed to update document status to failed: {status_error}")
|
logger.error(
|
||||||
|
f"Failed to update document status to failed: {status_error}"
|
||||||
|
)
|
||||||
documents_failed += 1
|
documents_failed += 1
|
||||||
continue # Skip this page and continue with others
|
continue # Skip this page and continue with others
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -352,9 +352,7 @@ async def index_discord_messages(
|
||||||
try:
|
try:
|
||||||
channels = await discord_client.get_text_channels(guild_id)
|
channels = await discord_client.get_text_channels(guild_id)
|
||||||
if not channels:
|
if not channels:
|
||||||
logger.info(
|
logger.info(f"No channels found in guild {guild_name}. Skipping.")
|
||||||
f"No channels found in guild {guild_name}. Skipping."
|
|
||||||
)
|
|
||||||
skipped_channels.append(f"{guild_name} (no channels)")
|
skipped_channels.append(f"{guild_name} (no channels)")
|
||||||
else:
|
else:
|
||||||
for channel in channels:
|
for channel in channels:
|
||||||
|
|
@ -456,25 +454,31 @@ async def index_discord_messages(
|
||||||
# Document exists - check if content has changed
|
# Document exists - check if content has changed
|
||||||
if existing_document.content_hash == content_hash:
|
if existing_document.content_hash == content_hash:
|
||||||
# Ensure status is ready (might have been stuck in processing/pending)
|
# Ensure status is ready (might have been stuck in processing/pending)
|
||||||
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
|
if not DocumentStatus.is_state(
|
||||||
existing_document.status = DocumentStatus.ready()
|
existing_document.status, DocumentStatus.READY
|
||||||
|
):
|
||||||
|
existing_document.status = (
|
||||||
|
DocumentStatus.ready()
|
||||||
|
)
|
||||||
documents_skipped += 1
|
documents_skipped += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Queue existing document for update (will be set to processing in Phase 2)
|
# Queue existing document for update (will be set to processing in Phase 2)
|
||||||
messages_to_process.append({
|
messages_to_process.append(
|
||||||
'document': existing_document,
|
{
|
||||||
'is_new': False,
|
"document": existing_document,
|
||||||
'combined_document_string': combined_document_string,
|
"is_new": False,
|
||||||
'content_hash': content_hash,
|
"combined_document_string": combined_document_string,
|
||||||
'guild_name': guild_name,
|
"content_hash": content_hash,
|
||||||
'guild_id': guild_id,
|
"guild_name": guild_name,
|
||||||
'channel_name': channel_name,
|
"guild_id": guild_id,
|
||||||
'channel_id': channel_id,
|
"channel_name": channel_name,
|
||||||
'message_id': msg_id,
|
"channel_id": channel_id,
|
||||||
'message_timestamp': msg_timestamp,
|
"message_id": msg_id,
|
||||||
'message_user_name': msg_user_name,
|
"message_timestamp": msg_timestamp,
|
||||||
})
|
"message_user_name": msg_user_name,
|
||||||
|
}
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Document doesn't exist by unique_identifier_hash
|
# Document doesn't exist by unique_identifier_hash
|
||||||
|
|
@ -522,19 +526,21 @@ async def index_discord_messages(
|
||||||
session.add(document)
|
session.add(document)
|
||||||
new_documents_created = True
|
new_documents_created = True
|
||||||
|
|
||||||
messages_to_process.append({
|
messages_to_process.append(
|
||||||
'document': document,
|
{
|
||||||
'is_new': True,
|
"document": document,
|
||||||
'combined_document_string': combined_document_string,
|
"is_new": True,
|
||||||
'content_hash': content_hash,
|
"combined_document_string": combined_document_string,
|
||||||
'guild_name': guild_name,
|
"content_hash": content_hash,
|
||||||
'guild_id': guild_id,
|
"guild_name": guild_name,
|
||||||
'channel_name': channel_name,
|
"guild_id": guild_id,
|
||||||
'channel_id': channel_id,
|
"channel_name": channel_name,
|
||||||
'message_id': msg_id,
|
"channel_id": channel_id,
|
||||||
'message_timestamp': msg_timestamp,
|
"message_id": msg_id,
|
||||||
'message_user_name': msg_user_name,
|
"message_timestamp": msg_timestamp,
|
||||||
})
|
"message_user_name": msg_user_name,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(
|
logger.error(
|
||||||
|
|
@ -547,7 +553,9 @@ async def index_discord_messages(
|
||||||
|
|
||||||
# Commit all pending documents - they all appear in UI now
|
# Commit all pending documents - they all appear in UI now
|
||||||
if new_documents_created:
|
if new_documents_created:
|
||||||
logger.info(f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents")
|
logger.info(
|
||||||
|
f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents"
|
||||||
|
)
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
# =======================================================================
|
# =======================================================================
|
||||||
|
|
@ -564,31 +572,31 @@ async def index_discord_messages(
|
||||||
await on_heartbeat_callback(documents_indexed)
|
await on_heartbeat_callback(documents_indexed)
|
||||||
last_heartbeat_time = current_time
|
last_heartbeat_time = current_time
|
||||||
|
|
||||||
document = item['document']
|
document = item["document"]
|
||||||
try:
|
try:
|
||||||
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
||||||
document.status = DocumentStatus.processing()
|
document.status = DocumentStatus.processing()
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
# Heavy processing (embeddings, chunks)
|
# Heavy processing (embeddings, chunks)
|
||||||
chunks = await create_document_chunks(item['combined_document_string'])
|
chunks = await create_document_chunks(item["combined_document_string"])
|
||||||
doc_embedding = config.embedding_model_instance.embed(
|
doc_embedding = config.embedding_model_instance.embed(
|
||||||
item['combined_document_string']
|
item["combined_document_string"]
|
||||||
)
|
)
|
||||||
|
|
||||||
# Update document to READY with actual content
|
# Update document to READY with actual content
|
||||||
document.title = f"{item['guild_name']}#{item['channel_name']}"
|
document.title = f"{item['guild_name']}#{item['channel_name']}"
|
||||||
document.content = item['combined_document_string']
|
document.content = item["combined_document_string"]
|
||||||
document.content_hash = item['content_hash']
|
document.content_hash = item["content_hash"]
|
||||||
document.embedding = doc_embedding
|
document.embedding = doc_embedding
|
||||||
document.document_metadata = {
|
document.document_metadata = {
|
||||||
"guild_name": item['guild_name'],
|
"guild_name": item["guild_name"],
|
||||||
"guild_id": item['guild_id'],
|
"guild_id": item["guild_id"],
|
||||||
"channel_name": item['channel_name'],
|
"channel_name": item["channel_name"],
|
||||||
"channel_id": item['channel_id'],
|
"channel_id": item["channel_id"],
|
||||||
"message_id": item['message_id'],
|
"message_id": item["message_id"],
|
||||||
"message_timestamp": item['message_timestamp'],
|
"message_timestamp": item["message_timestamp"],
|
||||||
"message_user_name": item['message_user_name'],
|
"message_user_name": item["message_user_name"],
|
||||||
"indexed_at": datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S"),
|
"indexed_at": datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S"),
|
||||||
"connector_id": connector_id,
|
"connector_id": connector_id,
|
||||||
}
|
}
|
||||||
|
|
@ -612,7 +620,9 @@ async def index_discord_messages(
|
||||||
document.status = DocumentStatus.failed(str(e))
|
document.status = DocumentStatus.failed(str(e))
|
||||||
document.updated_at = get_current_timestamp()
|
document.updated_at = get_current_timestamp()
|
||||||
except Exception as status_error:
|
except Exception as status_error:
|
||||||
logger.error(f"Failed to update document status to failed: {status_error}")
|
logger.error(
|
||||||
|
f"Failed to update document status to failed: {status_error}"
|
||||||
|
)
|
||||||
documents_failed += 1
|
documents_failed += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -253,7 +253,9 @@ async def index_elasticsearch_documents(
|
||||||
# If content is unchanged, skip. Otherwise queue for update.
|
# If content is unchanged, skip. Otherwise queue for update.
|
||||||
if existing_doc.content_hash == content_hash:
|
if existing_doc.content_hash == content_hash:
|
||||||
# Ensure status is ready (might have been stuck in processing/pending)
|
# Ensure status is ready (might have been stuck in processing/pending)
|
||||||
if not DocumentStatus.is_state(existing_doc.status, DocumentStatus.READY):
|
if not DocumentStatus.is_state(
|
||||||
|
existing_doc.status, DocumentStatus.READY
|
||||||
|
):
|
||||||
existing_doc.status = DocumentStatus.ready()
|
existing_doc.status = DocumentStatus.ready()
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Skipping ES doc {doc_id} — already indexed (doc id {existing_doc.id})"
|
f"Skipping ES doc {doc_id} — already indexed (doc id {existing_doc.id})"
|
||||||
|
|
@ -262,17 +264,19 @@ async def index_elasticsearch_documents(
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Queue existing document for update (will be set to processing in Phase 2)
|
# Queue existing document for update (will be set to processing in Phase 2)
|
||||||
docs_to_process.append({
|
docs_to_process.append(
|
||||||
'document': existing_doc,
|
{
|
||||||
'is_new': False,
|
"document": existing_doc,
|
||||||
'doc_id': doc_id,
|
"is_new": False,
|
||||||
'title': title,
|
"doc_id": doc_id,
|
||||||
'content': content,
|
"title": title,
|
||||||
'content_hash': content_hash,
|
"content": content,
|
||||||
'unique_identifier_hash': unique_identifier_hash,
|
"content_hash": content_hash,
|
||||||
'hit': hit,
|
"unique_identifier_hash": unique_identifier_hash,
|
||||||
'source': source,
|
"hit": hit,
|
||||||
})
|
"source": source,
|
||||||
|
}
|
||||||
|
)
|
||||||
hits_collected += 1
|
hits_collected += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
@ -310,17 +314,19 @@ async def index_elasticsearch_documents(
|
||||||
session.add(document)
|
session.add(document)
|
||||||
new_documents_created = True
|
new_documents_created = True
|
||||||
|
|
||||||
docs_to_process.append({
|
docs_to_process.append(
|
||||||
'document': document,
|
{
|
||||||
'is_new': True,
|
"document": document,
|
||||||
'doc_id': doc_id,
|
"is_new": True,
|
||||||
'title': title,
|
"doc_id": doc_id,
|
||||||
'content': content,
|
"title": title,
|
||||||
'content_hash': content_hash,
|
"content": content,
|
||||||
'unique_identifier_hash': unique_identifier_hash,
|
"content_hash": content_hash,
|
||||||
'hit': hit,
|
"unique_identifier_hash": unique_identifier_hash,
|
||||||
'source': source,
|
"hit": hit,
|
||||||
})
|
"source": source,
|
||||||
|
}
|
||||||
|
)
|
||||||
hits_collected += 1
|
hits_collected += 1
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -330,7 +336,9 @@ async def index_elasticsearch_documents(
|
||||||
|
|
||||||
# Commit all pending documents - they all appear in UI now
|
# Commit all pending documents - they all appear in UI now
|
||||||
if new_documents_created:
|
if new_documents_created:
|
||||||
logger.info(f"Phase 1: Committing {len([d for d in docs_to_process if d['is_new']])} pending documents")
|
logger.info(
|
||||||
|
f"Phase 1: Committing {len([d for d in docs_to_process if d['is_new']])} pending documents"
|
||||||
|
)
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
# =======================================================================
|
# =======================================================================
|
||||||
|
|
@ -347,7 +355,7 @@ async def index_elasticsearch_documents(
|
||||||
await on_heartbeat_callback(documents_processed)
|
await on_heartbeat_callback(documents_processed)
|
||||||
last_heartbeat_time = current_time
|
last_heartbeat_time = current_time
|
||||||
|
|
||||||
document = item['document']
|
document = item["document"]
|
||||||
try:
|
try:
|
||||||
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
||||||
document.status = DocumentStatus.processing()
|
document.status = DocumentStatus.processing()
|
||||||
|
|
@ -355,9 +363,9 @@ async def index_elasticsearch_documents(
|
||||||
|
|
||||||
# Build metadata
|
# Build metadata
|
||||||
metadata = {
|
metadata = {
|
||||||
"elasticsearch_id": item['doc_id'],
|
"elasticsearch_id": item["doc_id"],
|
||||||
"elasticsearch_index": item['hit'].get("_index", index_name),
|
"elasticsearch_index": item["hit"].get("_index", index_name),
|
||||||
"elasticsearch_score": item['hit'].get("_score"),
|
"elasticsearch_score": item["hit"].get("_score"),
|
||||||
"indexed_at": datetime.now().isoformat(),
|
"indexed_at": datetime.now().isoformat(),
|
||||||
"source": "ELASTICSEARCH_CONNECTOR",
|
"source": "ELASTICSEARCH_CONNECTOR",
|
||||||
"connector_id": connector_id,
|
"connector_id": connector_id,
|
||||||
|
|
@ -366,17 +374,17 @@ async def index_elasticsearch_documents(
|
||||||
# Add any additional metadata fields specified in config
|
# Add any additional metadata fields specified in config
|
||||||
if "ELASTICSEARCH_METADATA_FIELDS" in config:
|
if "ELASTICSEARCH_METADATA_FIELDS" in config:
|
||||||
for field in config["ELASTICSEARCH_METADATA_FIELDS"]:
|
for field in config["ELASTICSEARCH_METADATA_FIELDS"]:
|
||||||
if field in item['source']:
|
if field in item["source"]:
|
||||||
metadata[f"es_{field}"] = item['source'][field]
|
metadata[f"es_{field}"] = item["source"][field]
|
||||||
|
|
||||||
# Create chunks
|
# Create chunks
|
||||||
chunks = await create_document_chunks(item['content'])
|
chunks = await create_document_chunks(item["content"])
|
||||||
|
|
||||||
# Update document to READY with actual content
|
# Update document to READY with actual content
|
||||||
document.title = item['title']
|
document.title = item["title"]
|
||||||
document.content = item['content']
|
document.content = item["content"]
|
||||||
document.content_hash = item['content_hash']
|
document.content_hash = item["content_hash"]
|
||||||
document.unique_identifier_hash = item['unique_identifier_hash']
|
document.unique_identifier_hash = item["unique_identifier_hash"]
|
||||||
document.document_metadata = metadata
|
document.document_metadata = metadata
|
||||||
safe_set_chunks(document, chunks)
|
safe_set_chunks(document, chunks)
|
||||||
document.updated_at = get_current_timestamp()
|
document.updated_at = get_current_timestamp()
|
||||||
|
|
@ -399,7 +407,9 @@ async def index_elasticsearch_documents(
|
||||||
document.status = DocumentStatus.failed(str(e))
|
document.status = DocumentStatus.failed(str(e))
|
||||||
document.updated_at = get_current_timestamp()
|
document.updated_at = get_current_timestamp()
|
||||||
except Exception as status_error:
|
except Exception as status_error:
|
||||||
logger.error(f"Failed to update document status to failed: {status_error}")
|
logger.error(
|
||||||
|
f"Failed to update document status to failed: {status_error}"
|
||||||
|
)
|
||||||
documents_failed += 1
|
documents_failed += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
@ -411,10 +421,14 @@ async def index_elasticsearch_documents(
|
||||||
)
|
)
|
||||||
|
|
||||||
# Final commit for any remaining documents not yet committed in batches
|
# Final commit for any remaining documents not yet committed in batches
|
||||||
logger.info(f"Final commit: Total {documents_processed} Elasticsearch documents processed")
|
logger.info(
|
||||||
|
f"Final commit: Total {documents_processed} Elasticsearch documents processed"
|
||||||
|
)
|
||||||
try:
|
try:
|
||||||
await session.commit()
|
await session.commit()
|
||||||
logger.info("Successfully committed all Elasticsearch document changes to database")
|
logger.info(
|
||||||
|
"Successfully committed all Elasticsearch document changes to database"
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Handle any remaining integrity errors gracefully (race conditions, etc.)
|
# Handle any remaining integrity errors gracefully (race conditions, etc.)
|
||||||
if (
|
if (
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,7 @@ from sqlalchemy.exc import SQLAlchemyError
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from app.config import config
|
from app.config import config
|
||||||
from app.connectors.github_connector import GitHubConnector, RepositoryDigest
|
from app.connectors.github_connector import GitHubConnector
|
||||||
from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
|
from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
|
||||||
from app.services.llm_service import get_user_long_context_llm
|
from app.services.llm_service import get_user_long_context_llm
|
||||||
from app.services.task_logging_service import TaskLoggingService
|
from app.services.task_logging_service import TaskLoggingService
|
||||||
|
|
@ -237,7 +237,9 @@ async def index_github_repos(
|
||||||
# Document exists - check if content has changed
|
# Document exists - check if content has changed
|
||||||
if existing_document.content_hash == content_hash:
|
if existing_document.content_hash == content_hash:
|
||||||
# Ensure status is ready (might have been stuck in processing/pending)
|
# Ensure status is ready (might have been stuck in processing/pending)
|
||||||
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
|
if not DocumentStatus.is_state(
|
||||||
|
existing_document.status, DocumentStatus.READY
|
||||||
|
):
|
||||||
existing_document.status = DocumentStatus.ready()
|
existing_document.status = DocumentStatus.ready()
|
||||||
logger.info(f"Repository {repo_full_name} unchanged. Skipping.")
|
logger.info(f"Repository {repo_full_name} unchanged. Skipping.")
|
||||||
documents_skipped += 1
|
documents_skipped += 1
|
||||||
|
|
@ -247,14 +249,16 @@ async def index_github_repos(
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Content changed for repository {repo_full_name}. Queuing for update."
|
f"Content changed for repository {repo_full_name}. Queuing for update."
|
||||||
)
|
)
|
||||||
repos_to_process.append({
|
repos_to_process.append(
|
||||||
'document': existing_document,
|
{
|
||||||
'is_new': False,
|
"document": existing_document,
|
||||||
'digest': digest,
|
"is_new": False,
|
||||||
'content_hash': content_hash,
|
"digest": digest,
|
||||||
'repo_full_name': repo_full_name,
|
"content_hash": content_hash,
|
||||||
'unique_identifier_hash': unique_identifier_hash,
|
"repo_full_name": repo_full_name,
|
||||||
})
|
"unique_identifier_hash": unique_identifier_hash,
|
||||||
|
}
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Document doesn't exist by unique_identifier_hash
|
# Document doesn't exist by unique_identifier_hash
|
||||||
|
|
@ -298,14 +302,16 @@ async def index_github_repos(
|
||||||
session.add(document)
|
session.add(document)
|
||||||
new_documents_created = True
|
new_documents_created = True
|
||||||
|
|
||||||
repos_to_process.append({
|
repos_to_process.append(
|
||||||
'document': document,
|
{
|
||||||
'is_new': True,
|
"document": document,
|
||||||
'digest': digest,
|
"is_new": True,
|
||||||
'content_hash': content_hash,
|
"digest": digest,
|
||||||
'repo_full_name': repo_full_name,
|
"content_hash": content_hash,
|
||||||
'unique_identifier_hash': unique_identifier_hash,
|
"repo_full_name": repo_full_name,
|
||||||
})
|
"unique_identifier_hash": unique_identifier_hash,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as repo_err:
|
except Exception as repo_err:
|
||||||
logger.error(
|
logger.error(
|
||||||
|
|
@ -317,7 +323,9 @@ async def index_github_repos(
|
||||||
|
|
||||||
# Commit all pending documents - they all appear in UI now
|
# Commit all pending documents - they all appear in UI now
|
||||||
if new_documents_created:
|
if new_documents_created:
|
||||||
logger.info(f"Phase 1: Committing {len([r for r in repos_to_process if r['is_new']])} pending documents")
|
logger.info(
|
||||||
|
f"Phase 1: Committing {len([r for r in repos_to_process if r['is_new']])} pending documents"
|
||||||
|
)
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
# =======================================================================
|
# =======================================================================
|
||||||
|
|
@ -334,9 +342,9 @@ async def index_github_repos(
|
||||||
await on_heartbeat_callback(documents_indexed)
|
await on_heartbeat_callback(documents_indexed)
|
||||||
last_heartbeat_time = current_time
|
last_heartbeat_time = current_time
|
||||||
|
|
||||||
document = item['document']
|
document = item["document"]
|
||||||
digest = item['digest']
|
digest = item["digest"]
|
||||||
repo_full_name = item['repo_full_name']
|
repo_full_name = item["repo_full_name"]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
||||||
|
|
@ -353,7 +361,9 @@ async def index_github_repos(
|
||||||
"document_type": "GitHub Repository",
|
"document_type": "GitHub Repository",
|
||||||
"connector_type": "GitHub",
|
"connector_type": "GitHub",
|
||||||
"ingestion_method": "gitingest",
|
"ingestion_method": "gitingest",
|
||||||
"file_tree": digest.tree[:2000] if len(digest.tree) > 2000 else digest.tree,
|
"file_tree": digest.tree[:2000]
|
||||||
|
if len(digest.tree) > 2000
|
||||||
|
else digest.tree,
|
||||||
"estimated_tokens": digest.estimated_tokens,
|
"estimated_tokens": digest.estimated_tokens,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -377,13 +387,17 @@ async def index_github_repos(
|
||||||
f"## Summary\n{digest.summary}\n\n"
|
f"## Summary\n{digest.summary}\n\n"
|
||||||
f"## File Structure\n{digest.tree[:3000]}"
|
f"## File Structure\n{digest.tree[:3000]}"
|
||||||
)
|
)
|
||||||
summary_embedding = config.embedding_model_instance.embed(summary_text)
|
summary_embedding = config.embedding_model_instance.embed(
|
||||||
|
summary_text
|
||||||
|
)
|
||||||
|
|
||||||
# Chunk the full digest content for granular search
|
# Chunk the full digest content for granular search
|
||||||
try:
|
try:
|
||||||
chunks_data = await create_document_chunks(digest.content)
|
chunks_data = await create_document_chunks(digest.content)
|
||||||
except Exception as chunk_err:
|
except Exception as chunk_err:
|
||||||
logger.error(f"Failed to chunk repository {repo_full_name}: {chunk_err}")
|
logger.error(
|
||||||
|
f"Failed to chunk repository {repo_full_name}: {chunk_err}"
|
||||||
|
)
|
||||||
chunks_data = await _simple_chunk_content(digest.content)
|
chunks_data = await _simple_chunk_content(digest.content)
|
||||||
|
|
||||||
# Update document to READY with actual content
|
# Update document to READY with actual content
|
||||||
|
|
@ -401,7 +415,7 @@ async def index_github_repos(
|
||||||
|
|
||||||
document.title = repo_full_name
|
document.title = repo_full_name
|
||||||
document.content = summary_text
|
document.content = summary_text
|
||||||
document.content_hash = item['content_hash']
|
document.content_hash = item["content_hash"]
|
||||||
document.embedding = summary_embedding
|
document.embedding = summary_embedding
|
||||||
document.document_metadata = doc_metadata
|
document.document_metadata = doc_metadata
|
||||||
safe_set_chunks(document, chunks_data)
|
safe_set_chunks(document, chunks_data)
|
||||||
|
|
@ -433,7 +447,9 @@ async def index_github_repos(
|
||||||
document.status = DocumentStatus.failed(str(repo_err))
|
document.status = DocumentStatus.failed(str(repo_err))
|
||||||
document.updated_at = get_current_timestamp()
|
document.updated_at = get_current_timestamp()
|
||||||
except Exception as status_error:
|
except Exception as status_error:
|
||||||
logger.error(f"Failed to update document status to failed: {status_error}")
|
logger.error(
|
||||||
|
f"Failed to update document status to failed: {status_error}"
|
||||||
|
)
|
||||||
errors.append(f"Failed processing {repo_full_name}: {repo_err}")
|
errors.append(f"Failed processing {repo_full_name}: {repo_err}")
|
||||||
documents_failed += 1
|
documents_failed += 1
|
||||||
continue
|
continue
|
||||||
|
|
@ -442,7 +458,9 @@ async def index_github_repos(
|
||||||
await update_connector_last_indexed(session, connector, update_last_indexed)
|
await update_connector_last_indexed(session, connector, update_last_indexed)
|
||||||
|
|
||||||
# Final commit
|
# Final commit
|
||||||
logger.info(f"Final commit: Total {documents_processed} GitHub repositories processed")
|
logger.info(
|
||||||
|
f"Final commit: Total {documents_processed} GitHub repositories processed"
|
||||||
|
)
|
||||||
try:
|
try:
|
||||||
await session.commit()
|
await session.commit()
|
||||||
logger.info(
|
logger.info(
|
||||||
|
|
|
||||||
|
|
@ -345,25 +345,29 @@ async def index_google_calendar_events(
|
||||||
# Document exists - check if content has changed
|
# Document exists - check if content has changed
|
||||||
if existing_document.content_hash == content_hash:
|
if existing_document.content_hash == content_hash:
|
||||||
# Ensure status is ready (might have been stuck in processing/pending)
|
# Ensure status is ready (might have been stuck in processing/pending)
|
||||||
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
|
if not DocumentStatus.is_state(
|
||||||
|
existing_document.status, DocumentStatus.READY
|
||||||
|
):
|
||||||
existing_document.status = DocumentStatus.ready()
|
existing_document.status = DocumentStatus.ready()
|
||||||
documents_skipped += 1
|
documents_skipped += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Queue existing document for update (will be set to processing in Phase 2)
|
# Queue existing document for update (will be set to processing in Phase 2)
|
||||||
events_to_process.append({
|
events_to_process.append(
|
||||||
'document': existing_document,
|
{
|
||||||
'is_new': False,
|
"document": existing_document,
|
||||||
'event_markdown': event_markdown,
|
"is_new": False,
|
||||||
'content_hash': content_hash,
|
"event_markdown": event_markdown,
|
||||||
'event_id': event_id,
|
"content_hash": content_hash,
|
||||||
'event_summary': event_summary,
|
"event_id": event_id,
|
||||||
'calendar_id': calendar_id,
|
"event_summary": event_summary,
|
||||||
'start_time': start_time,
|
"calendar_id": calendar_id,
|
||||||
'end_time': end_time,
|
"start_time": start_time,
|
||||||
'location': location,
|
"end_time": end_time,
|
||||||
'description': description,
|
"location": location,
|
||||||
})
|
"description": description,
|
||||||
|
}
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Document doesn't exist by unique_identifier_hash
|
# Document doesn't exist by unique_identifier_hash
|
||||||
|
|
@ -411,19 +415,21 @@ async def index_google_calendar_events(
|
||||||
session.add(document)
|
session.add(document)
|
||||||
new_documents_created = True
|
new_documents_created = True
|
||||||
|
|
||||||
events_to_process.append({
|
events_to_process.append(
|
||||||
'document': document,
|
{
|
||||||
'is_new': True,
|
"document": document,
|
||||||
'event_markdown': event_markdown,
|
"is_new": True,
|
||||||
'content_hash': content_hash,
|
"event_markdown": event_markdown,
|
||||||
'event_id': event_id,
|
"content_hash": content_hash,
|
||||||
'event_summary': event_summary,
|
"event_id": event_id,
|
||||||
'calendar_id': calendar_id,
|
"event_summary": event_summary,
|
||||||
'start_time': start_time,
|
"calendar_id": calendar_id,
|
||||||
'end_time': end_time,
|
"start_time": start_time,
|
||||||
'location': location,
|
"end_time": end_time,
|
||||||
'description': description,
|
"location": location,
|
||||||
})
|
"description": description,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in Phase 1 for event: {e!s}", exc_info=True)
|
logger.error(f"Error in Phase 1 for event: {e!s}", exc_info=True)
|
||||||
|
|
@ -432,7 +438,9 @@ async def index_google_calendar_events(
|
||||||
|
|
||||||
# Commit all pending documents - they all appear in UI now
|
# Commit all pending documents - they all appear in UI now
|
||||||
if new_documents_created:
|
if new_documents_created:
|
||||||
logger.info(f"Phase 1: Committing {len([e for e in events_to_process if e['is_new']])} pending documents")
|
logger.info(
|
||||||
|
f"Phase 1: Committing {len([e for e in events_to_process if e['is_new']])} pending documents"
|
||||||
|
)
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
# =======================================================================
|
# =======================================================================
|
||||||
|
|
@ -449,7 +457,7 @@ async def index_google_calendar_events(
|
||||||
await on_heartbeat_callback(documents_indexed)
|
await on_heartbeat_callback(documents_indexed)
|
||||||
last_heartbeat_time = current_time
|
last_heartbeat_time = current_time
|
||||||
|
|
||||||
document = item['document']
|
document = item["document"]
|
||||||
try:
|
try:
|
||||||
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
||||||
document.status = DocumentStatus.processing()
|
document.status = DocumentStatus.processing()
|
||||||
|
|
@ -462,48 +470,53 @@ async def index_google_calendar_events(
|
||||||
|
|
||||||
if user_llm:
|
if user_llm:
|
||||||
document_metadata_for_summary = {
|
document_metadata_for_summary = {
|
||||||
"event_id": item['event_id'],
|
"event_id": item["event_id"],
|
||||||
"event_summary": item['event_summary'],
|
"event_summary": item["event_summary"],
|
||||||
"calendar_id": item['calendar_id'],
|
"calendar_id": item["calendar_id"],
|
||||||
"start_time": item['start_time'],
|
"start_time": item["start_time"],
|
||||||
"end_time": item['end_time'],
|
"end_time": item["end_time"],
|
||||||
"location": item['location'] or "No location",
|
"location": item["location"] or "No location",
|
||||||
"document_type": "Google Calendar Event",
|
"document_type": "Google Calendar Event",
|
||||||
"connector_type": "Google Calendar",
|
"connector_type": "Google Calendar",
|
||||||
}
|
}
|
||||||
summary_content, summary_embedding = await generate_document_summary(
|
(
|
||||||
item['event_markdown'], user_llm, document_metadata_for_summary
|
summary_content,
|
||||||
|
summary_embedding,
|
||||||
|
) = await generate_document_summary(
|
||||||
|
item["event_markdown"], user_llm, document_metadata_for_summary
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
summary_content = f"Google Calendar Event: {item['event_summary']}\n\n"
|
summary_content = (
|
||||||
|
f"Google Calendar Event: {item['event_summary']}\n\n"
|
||||||
|
)
|
||||||
summary_content += f"Calendar: {item['calendar_id']}\n"
|
summary_content += f"Calendar: {item['calendar_id']}\n"
|
||||||
summary_content += f"Start: {item['start_time']}\n"
|
summary_content += f"Start: {item['start_time']}\n"
|
||||||
summary_content += f"End: {item['end_time']}\n"
|
summary_content += f"End: {item['end_time']}\n"
|
||||||
if item['location']:
|
if item["location"]:
|
||||||
summary_content += f"Location: {item['location']}\n"
|
summary_content += f"Location: {item['location']}\n"
|
||||||
if item['description']:
|
if item["description"]:
|
||||||
desc_preview = item['description'][:1000]
|
desc_preview = item["description"][:1000]
|
||||||
if len(item['description']) > 1000:
|
if len(item["description"]) > 1000:
|
||||||
desc_preview += "..."
|
desc_preview += "..."
|
||||||
summary_content += f"Description: {desc_preview}\n"
|
summary_content += f"Description: {desc_preview}\n"
|
||||||
summary_embedding = config.embedding_model_instance.embed(
|
summary_embedding = config.embedding_model_instance.embed(
|
||||||
summary_content
|
summary_content
|
||||||
)
|
)
|
||||||
|
|
||||||
chunks = await create_document_chunks(item['event_markdown'])
|
chunks = await create_document_chunks(item["event_markdown"])
|
||||||
|
|
||||||
# Update document to READY with actual content
|
# Update document to READY with actual content
|
||||||
document.title = item['event_summary']
|
document.title = item["event_summary"]
|
||||||
document.content = summary_content
|
document.content = summary_content
|
||||||
document.content_hash = item['content_hash']
|
document.content_hash = item["content_hash"]
|
||||||
document.embedding = summary_embedding
|
document.embedding = summary_embedding
|
||||||
document.document_metadata = {
|
document.document_metadata = {
|
||||||
"event_id": item['event_id'],
|
"event_id": item["event_id"],
|
||||||
"event_summary": item['event_summary'],
|
"event_summary": item["event_summary"],
|
||||||
"calendar_id": item['calendar_id'],
|
"calendar_id": item["calendar_id"],
|
||||||
"start_time": item['start_time'],
|
"start_time": item["start_time"],
|
||||||
"end_time": item['end_time'],
|
"end_time": item["end_time"],
|
||||||
"location": item['location'],
|
"location": item["location"],
|
||||||
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||||
"connector_id": connector_id,
|
"connector_id": connector_id,
|
||||||
}
|
}
|
||||||
|
|
@ -527,7 +540,9 @@ async def index_google_calendar_events(
|
||||||
document.status = DocumentStatus.failed(str(e))
|
document.status = DocumentStatus.failed(str(e))
|
||||||
document.updated_at = get_current_timestamp()
|
document.updated_at = get_current_timestamp()
|
||||||
except Exception as status_error:
|
except Exception as status_error:
|
||||||
logger.error(f"Failed to update document status to failed: {status_error}")
|
logger.error(
|
||||||
|
f"Failed to update document status to failed: {status_error}"
|
||||||
|
)
|
||||||
documents_failed += 1
|
documents_failed += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -435,7 +435,7 @@ async def _index_full_scan(
|
||||||
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||||
) -> tuple[int, int]:
|
) -> tuple[int, int]:
|
||||||
"""Perform full scan indexing of a folder.
|
"""Perform full scan indexing of a folder.
|
||||||
|
|
||||||
Implements 2-phase document status updates for real-time UI feedback:
|
Implements 2-phase document status updates for real-time UI feedback:
|
||||||
- Phase 1: Collect all files and create pending documents (visible in UI immediately)
|
- Phase 1: Collect all files and create pending documents (visible in UI immediately)
|
||||||
- Phase 2: Process each file: pending → processing → ready/failed
|
- Phase 2: Process each file: pending → processing → ready/failed
|
||||||
|
|
@ -533,7 +533,9 @@ async def _index_full_scan(
|
||||||
|
|
||||||
# Commit all pending documents - they all appear in UI now
|
# Commit all pending documents - they all appear in UI now
|
||||||
if new_documents_created:
|
if new_documents_created:
|
||||||
logger.info(f"Phase 1: Committing {len([f for f in files_to_process if f[1] and f[1].id is None])} pending documents")
|
logger.info(
|
||||||
|
f"Phase 1: Committing {len([f for f in files_to_process if f[1] and f[1].id is None])} pending documents"
|
||||||
|
)
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
# =======================================================================
|
# =======================================================================
|
||||||
|
|
@ -568,9 +570,7 @@ async def _index_full_scan(
|
||||||
|
|
||||||
if documents_indexed % 10 == 0 and documents_indexed > 0:
|
if documents_indexed % 10 == 0 and documents_indexed > 0:
|
||||||
await session.commit()
|
await session.commit()
|
||||||
logger.info(
|
logger.info(f"Committed batch: {documents_indexed} files indexed so far")
|
||||||
f"Committed batch: {documents_indexed} files indexed so far"
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Full scan complete: {documents_indexed} indexed, {documents_skipped} skipped, {documents_failed} failed"
|
f"Full scan complete: {documents_indexed} indexed, {documents_skipped} skipped, {documents_failed} failed"
|
||||||
|
|
@ -597,7 +597,7 @@ async def _index_with_delta_sync(
|
||||||
|
|
||||||
Note: include_subfolders is accepted for API consistency but delta sync
|
Note: include_subfolders is accepted for API consistency but delta sync
|
||||||
automatically tracks changes across all folders including subfolders.
|
automatically tracks changes across all folders including subfolders.
|
||||||
|
|
||||||
Implements 2-phase document status updates for real-time UI feedback:
|
Implements 2-phase document status updates for real-time UI feedback:
|
||||||
- Phase 1: Collect all changes and create pending documents (visible in UI immediately)
|
- Phase 1: Collect all changes and create pending documents (visible in UI immediately)
|
||||||
- Phase 2: Process each file: pending → processing → ready/failed
|
- Phase 2: Process each file: pending → processing → ready/failed
|
||||||
|
|
@ -676,7 +676,7 @@ async def _index_with_delta_sync(
|
||||||
|
|
||||||
# Commit all pending documents - they all appear in UI now
|
# Commit all pending documents - they all appear in UI now
|
||||||
if new_documents_created:
|
if new_documents_created:
|
||||||
logger.info(f"Phase 1: Committing pending documents")
|
logger.info("Phase 1: Committing pending documents")
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
# =======================================================================
|
# =======================================================================
|
||||||
|
|
@ -685,7 +685,7 @@ async def _index_with_delta_sync(
|
||||||
# =======================================================================
|
# =======================================================================
|
||||||
logger.info(f"Phase 2: Processing {len(changes_to_process)} changes")
|
logger.info(f"Phase 2: Processing {len(changes_to_process)} changes")
|
||||||
|
|
||||||
for change, file, pending_doc in changes_to_process:
|
for _, file, pending_doc in changes_to_process:
|
||||||
# Check if it's time for a heartbeat update
|
# Check if it's time for a heartbeat update
|
||||||
if on_heartbeat_callback:
|
if on_heartbeat_callback:
|
||||||
current_time = time.time()
|
current_time = time.time()
|
||||||
|
|
@ -728,17 +728,17 @@ async def _create_pending_document_for_file(
|
||||||
) -> tuple[Document | None, bool]:
|
) -> tuple[Document | None, bool]:
|
||||||
"""
|
"""
|
||||||
Create a pending document for a Google Drive file if it doesn't exist.
|
Create a pending document for a Google Drive file if it doesn't exist.
|
||||||
|
|
||||||
This is Phase 1 of the 2-phase document status update pattern.
|
This is Phase 1 of the 2-phase document status update pattern.
|
||||||
Creates documents with 'pending' status so they appear in UI immediately.
|
Creates documents with 'pending' status so they appear in UI immediately.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
session: Database session
|
session: Database session
|
||||||
file: File metadata from Google Drive API
|
file: File metadata from Google Drive API
|
||||||
connector_id: ID of the Drive connector
|
connector_id: ID of the Drive connector
|
||||||
search_space_id: ID of the search space
|
search_space_id: ID of the search space
|
||||||
user_id: ID of the user
|
user_id: ID of the user
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple of (document, should_skip):
|
Tuple of (document, should_skip):
|
||||||
- (existing_doc, False): Existing document that needs update
|
- (existing_doc, False): Existing document that needs update
|
||||||
|
|
@ -746,28 +746,28 @@ async def _create_pending_document_for_file(
|
||||||
- (None, True): File should be skipped (unchanged, rename-only, or folder)
|
- (None, True): File should be skipped (unchanged, rename-only, or folder)
|
||||||
"""
|
"""
|
||||||
from app.connectors.google_drive.file_types import should_skip_file
|
from app.connectors.google_drive.file_types import should_skip_file
|
||||||
|
|
||||||
file_id = file.get("id")
|
file_id = file.get("id")
|
||||||
file_name = file.get("name", "Unknown")
|
file_name = file.get("name", "Unknown")
|
||||||
mime_type = file.get("mimeType", "")
|
mime_type = file.get("mimeType", "")
|
||||||
|
|
||||||
# Skip folders and shortcuts
|
# Skip folders and shortcuts
|
||||||
if should_skip_file(mime_type):
|
if should_skip_file(mime_type):
|
||||||
return None, True
|
return None, True
|
||||||
|
|
||||||
if not file_id:
|
if not file_id:
|
||||||
return None, True
|
return None, True
|
||||||
|
|
||||||
# Generate unique identifier hash for this file
|
# Generate unique identifier hash for this file
|
||||||
unique_identifier_hash = generate_unique_identifier_hash(
|
unique_identifier_hash = generate_unique_identifier_hash(
|
||||||
DocumentType.GOOGLE_DRIVE_FILE, file_id, search_space_id
|
DocumentType.GOOGLE_DRIVE_FILE, file_id, search_space_id
|
||||||
)
|
)
|
||||||
|
|
||||||
# Check if document exists
|
# Check if document exists
|
||||||
existing_document = await check_document_by_unique_identifier(
|
existing_document = await check_document_by_unique_identifier(
|
||||||
session, unique_identifier_hash
|
session, unique_identifier_hash
|
||||||
)
|
)
|
||||||
|
|
||||||
if existing_document:
|
if existing_document:
|
||||||
# Check if this is a rename-only update (content unchanged)
|
# Check if this is a rename-only update (content unchanged)
|
||||||
incoming_md5 = file.get("md5Checksum")
|
incoming_md5 = file.get("md5Checksum")
|
||||||
|
|
@ -775,7 +775,7 @@ async def _create_pending_document_for_file(
|
||||||
doc_metadata = existing_document.document_metadata or {}
|
doc_metadata = existing_document.document_metadata or {}
|
||||||
stored_md5 = doc_metadata.get("md5_checksum")
|
stored_md5 = doc_metadata.get("md5_checksum")
|
||||||
stored_modified_time = doc_metadata.get("modified_time")
|
stored_modified_time = doc_metadata.get("modified_time")
|
||||||
|
|
||||||
# Determine if content changed
|
# Determine if content changed
|
||||||
content_unchanged = False
|
content_unchanged = False
|
||||||
if incoming_md5 and stored_md5:
|
if incoming_md5 and stored_md5:
|
||||||
|
|
@ -783,16 +783,18 @@ async def _create_pending_document_for_file(
|
||||||
elif not incoming_md5 and incoming_modified_time and stored_modified_time:
|
elif not incoming_md5 and incoming_modified_time and stored_modified_time:
|
||||||
# Google Workspace file - use modifiedTime as fallback
|
# Google Workspace file - use modifiedTime as fallback
|
||||||
content_unchanged = incoming_modified_time == stored_modified_time
|
content_unchanged = incoming_modified_time == stored_modified_time
|
||||||
|
|
||||||
if content_unchanged:
|
if content_unchanged:
|
||||||
# Ensure status is ready (might have been stuck in processing/pending)
|
# Ensure status is ready (might have been stuck in processing/pending)
|
||||||
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
|
if not DocumentStatus.is_state(
|
||||||
|
existing_document.status, DocumentStatus.READY
|
||||||
|
):
|
||||||
existing_document.status = DocumentStatus.ready()
|
existing_document.status = DocumentStatus.ready()
|
||||||
return None, True
|
return None, True
|
||||||
|
|
||||||
# Content changed - return existing document for update
|
# Content changed - return existing document for update
|
||||||
return existing_document, False
|
return existing_document, False
|
||||||
|
|
||||||
# Create new pending document
|
# Create new pending document
|
||||||
document = Document(
|
document = Document(
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
|
|
@ -815,7 +817,7 @@ async def _create_pending_document_for_file(
|
||||||
connector_id=connector_id,
|
connector_id=connector_id,
|
||||||
)
|
)
|
||||||
session.add(document)
|
session.add(document)
|
||||||
|
|
||||||
return document, False
|
return document, False
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -958,7 +960,7 @@ async def _process_single_file(
|
||||||
) -> tuple[int, int, int]:
|
) -> tuple[int, int, int]:
|
||||||
"""
|
"""
|
||||||
Process a single file by downloading and using Surfsense's file processor.
|
Process a single file by downloading and using Surfsense's file processor.
|
||||||
|
|
||||||
Implements Phase 2 of the 2-phase document status update pattern.
|
Implements Phase 2 of the 2-phase document status update pattern.
|
||||||
Updates document status: pending → processing → ready/failed
|
Updates document status: pending → processing → ready/failed
|
||||||
|
|
||||||
|
|
@ -1042,12 +1044,13 @@ async def _process_single_file(
|
||||||
processed_doc = await check_document_by_unique_identifier(
|
processed_doc = await check_document_by_unique_identifier(
|
||||||
session, unique_identifier_hash
|
session, unique_identifier_hash
|
||||||
)
|
)
|
||||||
if processed_doc:
|
# Ensure status is READY
|
||||||
# Ensure status is READY
|
if processed_doc and not DocumentStatus.is_state(
|
||||||
if not DocumentStatus.is_state(processed_doc.status, DocumentStatus.READY):
|
processed_doc.status, DocumentStatus.READY
|
||||||
processed_doc.status = DocumentStatus.ready()
|
):
|
||||||
processed_doc.updated_at = get_current_timestamp()
|
processed_doc.status = DocumentStatus.ready()
|
||||||
await session.commit()
|
processed_doc.updated_at = get_current_timestamp()
|
||||||
|
await session.commit()
|
||||||
|
|
||||||
logger.info(f"Successfully indexed Google Drive file: {file_name}")
|
logger.info(f"Successfully indexed Google Drive file: {file_name}")
|
||||||
return 1, 0, 0
|
return 1, 0, 0
|
||||||
|
|
@ -1061,7 +1064,9 @@ async def _process_single_file(
|
||||||
pending_document.updated_at = get_current_timestamp()
|
pending_document.updated_at = get_current_timestamp()
|
||||||
await session.commit()
|
await session.commit()
|
||||||
except Exception as status_error:
|
except Exception as status_error:
|
||||||
logger.error(f"Failed to update document status to failed: {status_error}")
|
logger.error(
|
||||||
|
f"Failed to update document status to failed: {status_error}"
|
||||||
|
)
|
||||||
return 0, 0, 1
|
return 0, 0, 1
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -228,7 +228,9 @@ async def index_google_gmail_messages(
|
||||||
documents_indexed = 0
|
documents_indexed = 0
|
||||||
documents_skipped = 0
|
documents_skipped = 0
|
||||||
documents_failed = 0 # Track messages that failed processing
|
documents_failed = 0 # Track messages that failed processing
|
||||||
duplicate_content_count = 0 # Track messages skipped due to duplicate content_hash
|
duplicate_content_count = (
|
||||||
|
0 # Track messages skipped due to duplicate content_hash
|
||||||
|
)
|
||||||
|
|
||||||
# Heartbeat tracking - update notification periodically to prevent appearing stuck
|
# Heartbeat tracking - update notification periodically to prevent appearing stuck
|
||||||
last_heartbeat_time = time.time()
|
last_heartbeat_time = time.time()
|
||||||
|
|
@ -294,23 +296,27 @@ async def index_google_gmail_messages(
|
||||||
# Document exists - check if content has changed
|
# Document exists - check if content has changed
|
||||||
if existing_document.content_hash == content_hash:
|
if existing_document.content_hash == content_hash:
|
||||||
# Ensure status is ready (might have been stuck in processing/pending)
|
# Ensure status is ready (might have been stuck in processing/pending)
|
||||||
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
|
if not DocumentStatus.is_state(
|
||||||
|
existing_document.status, DocumentStatus.READY
|
||||||
|
):
|
||||||
existing_document.status = DocumentStatus.ready()
|
existing_document.status = DocumentStatus.ready()
|
||||||
documents_skipped += 1
|
documents_skipped += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Queue existing document for update (will be set to processing in Phase 2)
|
# Queue existing document for update (will be set to processing in Phase 2)
|
||||||
messages_to_process.append({
|
messages_to_process.append(
|
||||||
'document': existing_document,
|
{
|
||||||
'is_new': False,
|
"document": existing_document,
|
||||||
'markdown_content': markdown_content,
|
"is_new": False,
|
||||||
'content_hash': content_hash,
|
"markdown_content": markdown_content,
|
||||||
'message_id': message_id,
|
"content_hash": content_hash,
|
||||||
'thread_id': thread_id,
|
"message_id": message_id,
|
||||||
'subject': subject,
|
"thread_id": thread_id,
|
||||||
'sender': sender,
|
"subject": subject,
|
||||||
'date_str': date_str,
|
"sender": sender,
|
||||||
})
|
"date_str": date_str,
|
||||||
|
}
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Document doesn't exist by unique_identifier_hash
|
# Document doesn't exist by unique_identifier_hash
|
||||||
|
|
@ -356,17 +362,19 @@ async def index_google_gmail_messages(
|
||||||
session.add(document)
|
session.add(document)
|
||||||
new_documents_created = True
|
new_documents_created = True
|
||||||
|
|
||||||
messages_to_process.append({
|
messages_to_process.append(
|
||||||
'document': document,
|
{
|
||||||
'is_new': True,
|
"document": document,
|
||||||
'markdown_content': markdown_content,
|
"is_new": True,
|
||||||
'content_hash': content_hash,
|
"markdown_content": markdown_content,
|
||||||
'message_id': message_id,
|
"content_hash": content_hash,
|
||||||
'thread_id': thread_id,
|
"message_id": message_id,
|
||||||
'subject': subject,
|
"thread_id": thread_id,
|
||||||
'sender': sender,
|
"subject": subject,
|
||||||
'date_str': date_str,
|
"sender": sender,
|
||||||
})
|
"date_str": date_str,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in Phase 1 for message: {e!s}", exc_info=True)
|
logger.error(f"Error in Phase 1 for message: {e!s}", exc_info=True)
|
||||||
|
|
@ -375,7 +383,9 @@ async def index_google_gmail_messages(
|
||||||
|
|
||||||
# Commit all pending documents - they all appear in UI now
|
# Commit all pending documents - they all appear in UI now
|
||||||
if new_documents_created:
|
if new_documents_created:
|
||||||
logger.info(f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents")
|
logger.info(
|
||||||
|
f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents"
|
||||||
|
)
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
# =======================================================================
|
# =======================================================================
|
||||||
|
|
@ -392,7 +402,7 @@ async def index_google_gmail_messages(
|
||||||
await on_heartbeat_callback(documents_indexed)
|
await on_heartbeat_callback(documents_indexed)
|
||||||
last_heartbeat_time = current_time
|
last_heartbeat_time = current_time
|
||||||
|
|
||||||
document = item['document']
|
document = item["document"]
|
||||||
try:
|
try:
|
||||||
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
||||||
document.status = DocumentStatus.processing()
|
document.status = DocumentStatus.processing()
|
||||||
|
|
@ -405,16 +415,21 @@ async def index_google_gmail_messages(
|
||||||
|
|
||||||
if user_llm:
|
if user_llm:
|
||||||
document_metadata_for_summary = {
|
document_metadata_for_summary = {
|
||||||
"message_id": item['message_id'],
|
"message_id": item["message_id"],
|
||||||
"thread_id": item['thread_id'],
|
"thread_id": item["thread_id"],
|
||||||
"subject": item['subject'],
|
"subject": item["subject"],
|
||||||
"sender": item['sender'],
|
"sender": item["sender"],
|
||||||
"date": item['date_str'],
|
"date": item["date_str"],
|
||||||
"document_type": "Gmail Message",
|
"document_type": "Gmail Message",
|
||||||
"connector_type": "Google Gmail",
|
"connector_type": "Google Gmail",
|
||||||
}
|
}
|
||||||
summary_content, summary_embedding = await generate_document_summary(
|
(
|
||||||
item['markdown_content'], user_llm, document_metadata_for_summary
|
summary_content,
|
||||||
|
summary_embedding,
|
||||||
|
) = await generate_document_summary(
|
||||||
|
item["markdown_content"],
|
||||||
|
user_llm,
|
||||||
|
document_metadata_for_summary,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
summary_content = f"Google Gmail Message: {item['subject']}\n\n"
|
summary_content = f"Google Gmail Message: {item['subject']}\n\n"
|
||||||
|
|
@ -424,19 +439,19 @@ async def index_google_gmail_messages(
|
||||||
summary_content
|
summary_content
|
||||||
)
|
)
|
||||||
|
|
||||||
chunks = await create_document_chunks(item['markdown_content'])
|
chunks = await create_document_chunks(item["markdown_content"])
|
||||||
|
|
||||||
# Update document to READY with actual content
|
# Update document to READY with actual content
|
||||||
document.title = item['subject']
|
document.title = item["subject"]
|
||||||
document.content = summary_content
|
document.content = summary_content
|
||||||
document.content_hash = item['content_hash']
|
document.content_hash = item["content_hash"]
|
||||||
document.embedding = summary_embedding
|
document.embedding = summary_embedding
|
||||||
document.document_metadata = {
|
document.document_metadata = {
|
||||||
"message_id": item['message_id'],
|
"message_id": item["message_id"],
|
||||||
"thread_id": item['thread_id'],
|
"thread_id": item["thread_id"],
|
||||||
"subject": item['subject'],
|
"subject": item["subject"],
|
||||||
"sender": item['sender'],
|
"sender": item["sender"],
|
||||||
"date": item['date_str'],
|
"date": item["date_str"],
|
||||||
"connector_id": connector_id,
|
"connector_id": connector_id,
|
||||||
}
|
}
|
||||||
safe_set_chunks(document, chunks)
|
safe_set_chunks(document, chunks)
|
||||||
|
|
@ -459,7 +474,9 @@ async def index_google_gmail_messages(
|
||||||
document.status = DocumentStatus.failed(str(e))
|
document.status = DocumentStatus.failed(str(e))
|
||||||
document.updated_at = get_current_timestamp()
|
document.updated_at = get_current_timestamp()
|
||||||
except Exception as status_error:
|
except Exception as status_error:
|
||||||
logger.error(f"Failed to update document status to failed: {status_error}")
|
logger.error(
|
||||||
|
f"Failed to update document status to failed: {status_error}"
|
||||||
|
)
|
||||||
documents_failed += 1
|
documents_failed += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -239,23 +239,27 @@ async def index_jira_issues(
|
||||||
# Document exists - check if content has changed
|
# Document exists - check if content has changed
|
||||||
if existing_document.content_hash == content_hash:
|
if existing_document.content_hash == content_hash:
|
||||||
# Ensure status is ready (might have been stuck in processing/pending)
|
# Ensure status is ready (might have been stuck in processing/pending)
|
||||||
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
|
if not DocumentStatus.is_state(
|
||||||
|
existing_document.status, DocumentStatus.READY
|
||||||
|
):
|
||||||
existing_document.status = DocumentStatus.ready()
|
existing_document.status = DocumentStatus.ready()
|
||||||
documents_skipped += 1
|
documents_skipped += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Queue existing document for update (will be set to processing in Phase 2)
|
# Queue existing document for update (will be set to processing in Phase 2)
|
||||||
issues_to_process.append({
|
issues_to_process.append(
|
||||||
'document': existing_document,
|
{
|
||||||
'is_new': False,
|
"document": existing_document,
|
||||||
'issue_content': issue_content,
|
"is_new": False,
|
||||||
'content_hash': content_hash,
|
"issue_content": issue_content,
|
||||||
'issue_id': issue_id,
|
"content_hash": content_hash,
|
||||||
'issue_identifier': issue_identifier,
|
"issue_id": issue_id,
|
||||||
'issue_title': issue_title,
|
"issue_identifier": issue_identifier,
|
||||||
'formatted_issue': formatted_issue,
|
"issue_title": issue_title,
|
||||||
'comment_count': comment_count,
|
"formatted_issue": formatted_issue,
|
||||||
})
|
"comment_count": comment_count,
|
||||||
|
}
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Document doesn't exist by unique_identifier_hash
|
# Document doesn't exist by unique_identifier_hash
|
||||||
|
|
@ -301,17 +305,19 @@ async def index_jira_issues(
|
||||||
session.add(document)
|
session.add(document)
|
||||||
new_documents_created = True
|
new_documents_created = True
|
||||||
|
|
||||||
issues_to_process.append({
|
issues_to_process.append(
|
||||||
'document': document,
|
{
|
||||||
'is_new': True,
|
"document": document,
|
||||||
'issue_content': issue_content,
|
"is_new": True,
|
||||||
'content_hash': content_hash,
|
"issue_content": issue_content,
|
||||||
'issue_id': issue_id,
|
"content_hash": content_hash,
|
||||||
'issue_identifier': issue_identifier,
|
"issue_id": issue_id,
|
||||||
'issue_title': issue_title,
|
"issue_identifier": issue_identifier,
|
||||||
'formatted_issue': formatted_issue,
|
"issue_title": issue_title,
|
||||||
'comment_count': comment_count,
|
"formatted_issue": formatted_issue,
|
||||||
})
|
"comment_count": comment_count,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in Phase 1 for issue: {e!s}", exc_info=True)
|
logger.error(f"Error in Phase 1 for issue: {e!s}", exc_info=True)
|
||||||
|
|
@ -320,7 +326,9 @@ async def index_jira_issues(
|
||||||
|
|
||||||
# Commit all pending documents - they all appear in UI now
|
# Commit all pending documents - they all appear in UI now
|
||||||
if new_documents_created:
|
if new_documents_created:
|
||||||
logger.info(f"Phase 1: Committing {len([i for i in issues_to_process if i['is_new']])} pending documents")
|
logger.info(
|
||||||
|
f"Phase 1: Committing {len([i for i in issues_to_process if i['is_new']])} pending documents"
|
||||||
|
)
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
# =======================================================================
|
# =======================================================================
|
||||||
|
|
@ -337,7 +345,7 @@ async def index_jira_issues(
|
||||||
await on_heartbeat_callback(documents_indexed)
|
await on_heartbeat_callback(documents_indexed)
|
||||||
last_heartbeat_time = current_time
|
last_heartbeat_time = current_time
|
||||||
|
|
||||||
document = item['document']
|
document = item["document"]
|
||||||
try:
|
try:
|
||||||
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
||||||
document.status = DocumentStatus.processing()
|
document.status = DocumentStatus.processing()
|
||||||
|
|
@ -350,11 +358,11 @@ async def index_jira_issues(
|
||||||
|
|
||||||
if user_llm:
|
if user_llm:
|
||||||
document_metadata = {
|
document_metadata = {
|
||||||
"issue_key": item['issue_identifier'],
|
"issue_key": item["issue_identifier"],
|
||||||
"issue_title": item['issue_title'],
|
"issue_title": item["issue_title"],
|
||||||
"status": item['formatted_issue'].get("status", "Unknown"),
|
"status": item["formatted_issue"].get("status", "Unknown"),
|
||||||
"priority": item['formatted_issue'].get("priority", "Unknown"),
|
"priority": item["formatted_issue"].get("priority", "Unknown"),
|
||||||
"comment_count": item['comment_count'],
|
"comment_count": item["comment_count"],
|
||||||
"document_type": "Jira Issue",
|
"document_type": "Jira Issue",
|
||||||
"connector_type": "Jira",
|
"connector_type": "Jira",
|
||||||
}
|
}
|
||||||
|
|
@ -362,34 +370,32 @@ async def index_jira_issues(
|
||||||
summary_content,
|
summary_content,
|
||||||
summary_embedding,
|
summary_embedding,
|
||||||
) = await generate_document_summary(
|
) = await generate_document_summary(
|
||||||
item['issue_content'], user_llm, document_metadata
|
item["issue_content"], user_llm, document_metadata
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# Fallback to simple summary if no LLM configured
|
# Fallback to simple summary if no LLM configured
|
||||||
summary_content = f"Jira Issue {item['issue_identifier']}: {item['issue_title']}\n\nStatus: {item['formatted_issue'].get('status', 'Unknown')}\n\n"
|
summary_content = f"Jira Issue {item['issue_identifier']}: {item['issue_title']}\n\nStatus: {item['formatted_issue'].get('status', 'Unknown')}\n\n"
|
||||||
if item['formatted_issue'].get("description"):
|
if item["formatted_issue"].get("description"):
|
||||||
summary_content += (
|
summary_content += f"Description: {item['formatted_issue'].get('description')}\n\n"
|
||||||
f"Description: {item['formatted_issue'].get('description')}\n\n"
|
|
||||||
)
|
|
||||||
summary_content += f"Comments: {item['comment_count']}"
|
summary_content += f"Comments: {item['comment_count']}"
|
||||||
summary_embedding = config.embedding_model_instance.embed(
|
summary_embedding = config.embedding_model_instance.embed(
|
||||||
summary_content
|
summary_content
|
||||||
)
|
)
|
||||||
|
|
||||||
# Process chunks - using the full issue content with comments
|
# Process chunks - using the full issue content with comments
|
||||||
chunks = await create_document_chunks(item['issue_content'])
|
chunks = await create_document_chunks(item["issue_content"])
|
||||||
|
|
||||||
# Update document to READY with actual content
|
# Update document to READY with actual content
|
||||||
document.title = f"{item['issue_identifier']}: {item['issue_title']}"
|
document.title = f"{item['issue_identifier']}: {item['issue_title']}"
|
||||||
document.content = summary_content
|
document.content = summary_content
|
||||||
document.content_hash = item['content_hash']
|
document.content_hash = item["content_hash"]
|
||||||
document.embedding = summary_embedding
|
document.embedding = summary_embedding
|
||||||
document.document_metadata = {
|
document.document_metadata = {
|
||||||
"issue_id": item['issue_id'],
|
"issue_id": item["issue_id"],
|
||||||
"issue_identifier": item['issue_identifier'],
|
"issue_identifier": item["issue_identifier"],
|
||||||
"issue_title": item['issue_title'],
|
"issue_title": item["issue_title"],
|
||||||
"state": item['formatted_issue'].get("status", "Unknown"),
|
"state": item["formatted_issue"].get("status", "Unknown"),
|
||||||
"comment_count": item['comment_count'],
|
"comment_count": item["comment_count"],
|
||||||
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||||
"connector_id": connector_id,
|
"connector_id": connector_id,
|
||||||
}
|
}
|
||||||
|
|
@ -416,7 +422,9 @@ async def index_jira_issues(
|
||||||
document.status = DocumentStatus.failed(str(e))
|
document.status = DocumentStatus.failed(str(e))
|
||||||
document.updated_at = get_current_timestamp()
|
document.updated_at = get_current_timestamp()
|
||||||
except Exception as status_error:
|
except Exception as status_error:
|
||||||
logger.error(f"Failed to update document status to failed: {status_error}")
|
logger.error(
|
||||||
|
f"Failed to update document status to failed: {status_error}"
|
||||||
|
)
|
||||||
documents_failed += 1
|
documents_failed += 1
|
||||||
continue # Skip this issue and continue with others
|
continue # Skip this issue and continue with others
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -272,7 +272,9 @@ async def index_linear_issues(
|
||||||
# Document exists - check if content has changed
|
# Document exists - check if content has changed
|
||||||
if existing_document.content_hash == content_hash:
|
if existing_document.content_hash == content_hash:
|
||||||
# Ensure status is ready (might have been stuck in processing/pending)
|
# Ensure status is ready (might have been stuck in processing/pending)
|
||||||
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
|
if not DocumentStatus.is_state(
|
||||||
|
existing_document.status, DocumentStatus.READY
|
||||||
|
):
|
||||||
existing_document.status = DocumentStatus.ready()
|
existing_document.status = DocumentStatus.ready()
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Document for Linear issue {issue_identifier} unchanged. Skipping."
|
f"Document for Linear issue {issue_identifier} unchanged. Skipping."
|
||||||
|
|
@ -281,19 +283,21 @@ async def index_linear_issues(
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Queue existing document for update (will be set to processing in Phase 2)
|
# Queue existing document for update (will be set to processing in Phase 2)
|
||||||
issues_to_process.append({
|
issues_to_process.append(
|
||||||
'document': existing_document,
|
{
|
||||||
'is_new': False,
|
"document": existing_document,
|
||||||
'issue_content': issue_content,
|
"is_new": False,
|
||||||
'content_hash': content_hash,
|
"issue_content": issue_content,
|
||||||
'issue_id': issue_id,
|
"content_hash": content_hash,
|
||||||
'issue_identifier': issue_identifier,
|
"issue_id": issue_id,
|
||||||
'issue_title': issue_title,
|
"issue_identifier": issue_identifier,
|
||||||
'state': state,
|
"issue_title": issue_title,
|
||||||
'description': description,
|
"state": state,
|
||||||
'comment_count': comment_count,
|
"description": description,
|
||||||
'priority': priority,
|
"comment_count": comment_count,
|
||||||
})
|
"priority": priority,
|
||||||
|
}
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Document doesn't exist by unique_identifier_hash
|
# Document doesn't exist by unique_identifier_hash
|
||||||
|
|
@ -338,19 +342,21 @@ async def index_linear_issues(
|
||||||
session.add(document)
|
session.add(document)
|
||||||
new_documents_created = True
|
new_documents_created = True
|
||||||
|
|
||||||
issues_to_process.append({
|
issues_to_process.append(
|
||||||
'document': document,
|
{
|
||||||
'is_new': True,
|
"document": document,
|
||||||
'issue_content': issue_content,
|
"is_new": True,
|
||||||
'content_hash': content_hash,
|
"issue_content": issue_content,
|
||||||
'issue_id': issue_id,
|
"content_hash": content_hash,
|
||||||
'issue_identifier': issue_identifier,
|
"issue_id": issue_id,
|
||||||
'issue_title': issue_title,
|
"issue_identifier": issue_identifier,
|
||||||
'state': state,
|
"issue_title": issue_title,
|
||||||
'description': description,
|
"state": state,
|
||||||
'comment_count': comment_count,
|
"description": description,
|
||||||
'priority': priority,
|
"comment_count": comment_count,
|
||||||
})
|
"priority": priority,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in Phase 1 for issue: {e!s}", exc_info=True)
|
logger.error(f"Error in Phase 1 for issue: {e!s}", exc_info=True)
|
||||||
|
|
@ -359,7 +365,9 @@ async def index_linear_issues(
|
||||||
|
|
||||||
# Commit all pending documents - they all appear in UI now
|
# Commit all pending documents - they all appear in UI now
|
||||||
if new_documents_created:
|
if new_documents_created:
|
||||||
logger.info(f"Phase 1: Committing {len([i for i in issues_to_process if i['is_new']])} pending documents")
|
logger.info(
|
||||||
|
f"Phase 1: Committing {len([i for i in issues_to_process if i['is_new']])} pending documents"
|
||||||
|
)
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
# =======================================================================
|
# =======================================================================
|
||||||
|
|
@ -376,7 +384,7 @@ async def index_linear_issues(
|
||||||
await on_heartbeat_callback(documents_indexed)
|
await on_heartbeat_callback(documents_indexed)
|
||||||
last_heartbeat_time = current_time
|
last_heartbeat_time = current_time
|
||||||
|
|
||||||
document = item['document']
|
document = item["document"]
|
||||||
try:
|
try:
|
||||||
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
||||||
document.status = DocumentStatus.processing()
|
document.status = DocumentStatus.processing()
|
||||||
|
|
@ -389,20 +397,23 @@ async def index_linear_issues(
|
||||||
|
|
||||||
if user_llm:
|
if user_llm:
|
||||||
document_metadata_for_summary = {
|
document_metadata_for_summary = {
|
||||||
"issue_id": item['issue_identifier'],
|
"issue_id": item["issue_identifier"],
|
||||||
"issue_title": item['issue_title'],
|
"issue_title": item["issue_title"],
|
||||||
"state": item['state'],
|
"state": item["state"],
|
||||||
"priority": item['priority'],
|
"priority": item["priority"],
|
||||||
"comment_count": item['comment_count'],
|
"comment_count": item["comment_count"],
|
||||||
"document_type": "Linear Issue",
|
"document_type": "Linear Issue",
|
||||||
"connector_type": "Linear",
|
"connector_type": "Linear",
|
||||||
}
|
}
|
||||||
summary_content, summary_embedding = await generate_document_summary(
|
(
|
||||||
item['issue_content'], user_llm, document_metadata_for_summary
|
summary_content,
|
||||||
|
summary_embedding,
|
||||||
|
) = await generate_document_summary(
|
||||||
|
item["issue_content"], user_llm, document_metadata_for_summary
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# Fallback to simple summary if no LLM configured
|
# Fallback to simple summary if no LLM configured
|
||||||
description = item['description']
|
description = item["description"]
|
||||||
if description and len(description) > 1000:
|
if description and len(description) > 1000:
|
||||||
description = description[:997] + "..."
|
description = description[:997] + "..."
|
||||||
summary_content = f"Linear Issue {item['issue_identifier']}: {item['issue_title']}\n\nStatus: {item['state']}\n\n"
|
summary_content = f"Linear Issue {item['issue_identifier']}: {item['issue_title']}\n\nStatus: {item['state']}\n\n"
|
||||||
|
|
@ -413,19 +424,19 @@ async def index_linear_issues(
|
||||||
summary_content
|
summary_content
|
||||||
)
|
)
|
||||||
|
|
||||||
chunks = await create_document_chunks(item['issue_content'])
|
chunks = await create_document_chunks(item["issue_content"])
|
||||||
|
|
||||||
# Update document to READY with actual content
|
# Update document to READY with actual content
|
||||||
document.title = f"{item['issue_identifier']}: {item['issue_title']}"
|
document.title = f"{item['issue_identifier']}: {item['issue_title']}"
|
||||||
document.content = summary_content
|
document.content = summary_content
|
||||||
document.content_hash = item['content_hash']
|
document.content_hash = item["content_hash"]
|
||||||
document.embedding = summary_embedding
|
document.embedding = summary_embedding
|
||||||
document.document_metadata = {
|
document.document_metadata = {
|
||||||
"issue_id": item['issue_id'],
|
"issue_id": item["issue_id"],
|
||||||
"issue_identifier": item['issue_identifier'],
|
"issue_identifier": item["issue_identifier"],
|
||||||
"issue_title": item['issue_title'],
|
"issue_title": item["issue_title"],
|
||||||
"state": item['state'],
|
"state": item["state"],
|
||||||
"comment_count": item['comment_count'],
|
"comment_count": item["comment_count"],
|
||||||
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||||
"connector_id": connector_id,
|
"connector_id": connector_id,
|
||||||
}
|
}
|
||||||
|
|
@ -452,7 +463,9 @@ async def index_linear_issues(
|
||||||
document.status = DocumentStatus.failed(str(e))
|
document.status = DocumentStatus.failed(str(e))
|
||||||
document.updated_at = get_current_timestamp()
|
document.updated_at = get_current_timestamp()
|
||||||
except Exception as status_error:
|
except Exception as status_error:
|
||||||
logger.error(f"Failed to update document status to failed: {status_error}")
|
logger.error(
|
||||||
|
f"Failed to update document status to failed: {status_error}"
|
||||||
|
)
|
||||||
skipped_issues.append(
|
skipped_issues.append(
|
||||||
f"{item.get('issue_identifier', 'Unknown')} (processing error)"
|
f"{item.get('issue_identifier', 'Unknown')} (processing error)"
|
||||||
)
|
)
|
||||||
|
|
@ -466,7 +479,9 @@ async def index_linear_issues(
|
||||||
logger.info(f"Final commit: Total {documents_indexed} Linear issues processed")
|
logger.info(f"Final commit: Total {documents_indexed} Linear issues processed")
|
||||||
try:
|
try:
|
||||||
await session.commit()
|
await session.commit()
|
||||||
logger.info("Successfully committed all Linear document changes to database")
|
logger.info(
|
||||||
|
"Successfully committed all Linear document changes to database"
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Handle any remaining integrity errors gracefully (race conditions, etc.)
|
# Handle any remaining integrity errors gracefully (race conditions, etc.)
|
||||||
if (
|
if (
|
||||||
|
|
|
||||||
|
|
@ -305,7 +305,9 @@ async def index_luma_events(
|
||||||
# Document exists - check if content has changed
|
# Document exists - check if content has changed
|
||||||
if existing_document.content_hash == content_hash:
|
if existing_document.content_hash == content_hash:
|
||||||
# Ensure status is ready (might have been stuck in processing/pending)
|
# Ensure status is ready (might have been stuck in processing/pending)
|
||||||
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
|
if not DocumentStatus.is_state(
|
||||||
|
existing_document.status, DocumentStatus.READY
|
||||||
|
):
|
||||||
existing_document.status = DocumentStatus.ready()
|
existing_document.status = DocumentStatus.ready()
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Document for Luma event {event_name} unchanged. Skipping."
|
f"Document for Luma event {event_name} unchanged. Skipping."
|
||||||
|
|
@ -314,23 +316,25 @@ async def index_luma_events(
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Queue existing document for update (will be set to processing in Phase 2)
|
# Queue existing document for update (will be set to processing in Phase 2)
|
||||||
events_to_process.append({
|
events_to_process.append(
|
||||||
'document': existing_document,
|
{
|
||||||
'is_new': False,
|
"document": existing_document,
|
||||||
'event_id': event_id,
|
"is_new": False,
|
||||||
'event_name': event_name,
|
"event_id": event_id,
|
||||||
'event_url': event_url,
|
"event_name": event_name,
|
||||||
'event_markdown': event_markdown,
|
"event_url": event_url,
|
||||||
'content_hash': content_hash,
|
"event_markdown": event_markdown,
|
||||||
'start_at': start_at,
|
"content_hash": content_hash,
|
||||||
'end_at': end_at,
|
"start_at": start_at,
|
||||||
'timezone': timezone,
|
"end_at": end_at,
|
||||||
'location': location,
|
"timezone": timezone,
|
||||||
'city': city,
|
"location": location,
|
||||||
'host_names': host_names,
|
"city": city,
|
||||||
'description': description,
|
"host_names": host_names,
|
||||||
'cover_url': cover_url,
|
"description": description,
|
||||||
})
|
"cover_url": cover_url,
|
||||||
|
}
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Document doesn't exist by unique_identifier_hash
|
# Document doesn't exist by unique_identifier_hash
|
||||||
|
|
@ -380,23 +384,25 @@ async def index_luma_events(
|
||||||
session.add(document)
|
session.add(document)
|
||||||
new_documents_created = True
|
new_documents_created = True
|
||||||
|
|
||||||
events_to_process.append({
|
events_to_process.append(
|
||||||
'document': document,
|
{
|
||||||
'is_new': True,
|
"document": document,
|
||||||
'event_id': event_id,
|
"is_new": True,
|
||||||
'event_name': event_name,
|
"event_id": event_id,
|
||||||
'event_url': event_url,
|
"event_name": event_name,
|
||||||
'event_markdown': event_markdown,
|
"event_url": event_url,
|
||||||
'content_hash': content_hash,
|
"event_markdown": event_markdown,
|
||||||
'start_at': start_at,
|
"content_hash": content_hash,
|
||||||
'end_at': end_at,
|
"start_at": start_at,
|
||||||
'timezone': timezone,
|
"end_at": end_at,
|
||||||
'location': location,
|
"timezone": timezone,
|
||||||
'city': city,
|
"location": location,
|
||||||
'host_names': host_names,
|
"city": city,
|
||||||
'description': description,
|
"host_names": host_names,
|
||||||
'cover_url': cover_url,
|
"description": description,
|
||||||
})
|
"cover_url": cover_url,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in Phase 1 for event: {e!s}", exc_info=True)
|
logger.error(f"Error in Phase 1 for event: {e!s}", exc_info=True)
|
||||||
|
|
@ -405,7 +411,9 @@ async def index_luma_events(
|
||||||
|
|
||||||
# Commit all pending documents - they all appear in UI now
|
# Commit all pending documents - they all appear in UI now
|
||||||
if new_documents_created:
|
if new_documents_created:
|
||||||
logger.info(f"Phase 1: Committing {len([e for e in events_to_process if e['is_new']])} pending documents")
|
logger.info(
|
||||||
|
f"Phase 1: Committing {len([e for e in events_to_process if e['is_new']])} pending documents"
|
||||||
|
)
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
# =======================================================================
|
# =======================================================================
|
||||||
|
|
@ -422,7 +430,7 @@ async def index_luma_events(
|
||||||
await on_heartbeat_callback(documents_indexed)
|
await on_heartbeat_callback(documents_indexed)
|
||||||
last_heartbeat_time = current_time
|
last_heartbeat_time = current_time
|
||||||
|
|
||||||
document = item['document']
|
document = item["document"]
|
||||||
try:
|
try:
|
||||||
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
||||||
document.status = DocumentStatus.processing()
|
document.status = DocumentStatus.processing()
|
||||||
|
|
@ -435,15 +443,15 @@ async def index_luma_events(
|
||||||
|
|
||||||
if user_llm:
|
if user_llm:
|
||||||
document_metadata_for_summary = {
|
document_metadata_for_summary = {
|
||||||
"event_id": item['event_id'],
|
"event_id": item["event_id"],
|
||||||
"event_name": item['event_name'],
|
"event_name": item["event_name"],
|
||||||
"event_url": item['event_url'],
|
"event_url": item["event_url"],
|
||||||
"start_at": item['start_at'],
|
"start_at": item["start_at"],
|
||||||
"end_at": item['end_at'],
|
"end_at": item["end_at"],
|
||||||
"timezone": item['timezone'],
|
"timezone": item["timezone"],
|
||||||
"location": item['location'] or "No location",
|
"location": item["location"] or "No location",
|
||||||
"city": item['city'],
|
"city": item["city"],
|
||||||
"hosts": item['host_names'],
|
"hosts": item["host_names"],
|
||||||
"document_type": "Luma Event",
|
"document_type": "Luma Event",
|
||||||
"connector_type": "Luma",
|
"connector_type": "Luma",
|
||||||
}
|
}
|
||||||
|
|
@ -451,26 +459,26 @@ async def index_luma_events(
|
||||||
summary_content,
|
summary_content,
|
||||||
summary_embedding,
|
summary_embedding,
|
||||||
) = await generate_document_summary(
|
) = await generate_document_summary(
|
||||||
item['event_markdown'], user_llm, document_metadata_for_summary
|
item["event_markdown"], user_llm, document_metadata_for_summary
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# Fallback to simple summary if no LLM configured
|
# Fallback to simple summary if no LLM configured
|
||||||
summary_content = f"Luma Event: {item['event_name']}\n\n"
|
summary_content = f"Luma Event: {item['event_name']}\n\n"
|
||||||
if item['event_url']:
|
if item["event_url"]:
|
||||||
summary_content += f"URL: {item['event_url']}\n"
|
summary_content += f"URL: {item['event_url']}\n"
|
||||||
summary_content += f"Start: {item['start_at']}\n"
|
summary_content += f"Start: {item['start_at']}\n"
|
||||||
summary_content += f"End: {item['end_at']}\n"
|
summary_content += f"End: {item['end_at']}\n"
|
||||||
if item['timezone']:
|
if item["timezone"]:
|
||||||
summary_content += f"Timezone: {item['timezone']}\n"
|
summary_content += f"Timezone: {item['timezone']}\n"
|
||||||
if item['location']:
|
if item["location"]:
|
||||||
summary_content += f"Location: {item['location']}\n"
|
summary_content += f"Location: {item['location']}\n"
|
||||||
if item['city']:
|
if item["city"]:
|
||||||
summary_content += f"City: {item['city']}\n"
|
summary_content += f"City: {item['city']}\n"
|
||||||
if item['host_names']:
|
if item["host_names"]:
|
||||||
summary_content += f"Hosts: {item['host_names']}\n"
|
summary_content += f"Hosts: {item['host_names']}\n"
|
||||||
if item['description']:
|
if item["description"]:
|
||||||
desc_preview = item['description'][:1000]
|
desc_preview = item["description"][:1000]
|
||||||
if len(item['description']) > 1000:
|
if len(item["description"]) > 1000:
|
||||||
desc_preview += "..."
|
desc_preview += "..."
|
||||||
summary_content += f"Description: {desc_preview}\n"
|
summary_content += f"Description: {desc_preview}\n"
|
||||||
|
|
||||||
|
|
@ -478,24 +486,24 @@ async def index_luma_events(
|
||||||
summary_content
|
summary_content
|
||||||
)
|
)
|
||||||
|
|
||||||
chunks = await create_document_chunks(item['event_markdown'])
|
chunks = await create_document_chunks(item["event_markdown"])
|
||||||
|
|
||||||
# Update document to READY with actual content
|
# Update document to READY with actual content
|
||||||
document.title = item['event_name']
|
document.title = item["event_name"]
|
||||||
document.content = summary_content
|
document.content = summary_content
|
||||||
document.content_hash = item['content_hash']
|
document.content_hash = item["content_hash"]
|
||||||
document.embedding = summary_embedding
|
document.embedding = summary_embedding
|
||||||
document.document_metadata = {
|
document.document_metadata = {
|
||||||
"event_id": item['event_id'],
|
"event_id": item["event_id"],
|
||||||
"event_name": item['event_name'],
|
"event_name": item["event_name"],
|
||||||
"event_url": item['event_url'],
|
"event_url": item["event_url"],
|
||||||
"start_at": item['start_at'],
|
"start_at": item["start_at"],
|
||||||
"end_at": item['end_at'],
|
"end_at": item["end_at"],
|
||||||
"timezone": item['timezone'],
|
"timezone": item["timezone"],
|
||||||
"location": item['location'],
|
"location": item["location"],
|
||||||
"city": item['city'],
|
"city": item["city"],
|
||||||
"hosts": item['host_names'],
|
"hosts": item["host_names"],
|
||||||
"cover_url": item['cover_url'],
|
"cover_url": item["cover_url"],
|
||||||
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||||
"connector_id": connector_id,
|
"connector_id": connector_id,
|
||||||
}
|
}
|
||||||
|
|
@ -522,7 +530,9 @@ async def index_luma_events(
|
||||||
document.status = DocumentStatus.failed(str(e))
|
document.status = DocumentStatus.failed(str(e))
|
||||||
document.updated_at = get_current_timestamp()
|
document.updated_at = get_current_timestamp()
|
||||||
except Exception as status_error:
|
except Exception as status_error:
|
||||||
logger.error(f"Failed to update document status to failed: {status_error}")
|
logger.error(
|
||||||
|
f"Failed to update document status to failed: {status_error}"
|
||||||
|
)
|
||||||
skipped_events.append(
|
skipped_events.append(
|
||||||
f"{item.get('event_name', 'Unknown')} (processing error)"
|
f"{item.get('event_name', 'Unknown')} (processing error)"
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -354,20 +354,24 @@ async def index_notion_pages(
|
||||||
# Document exists - check if content has changed
|
# Document exists - check if content has changed
|
||||||
if existing_document.content_hash == content_hash:
|
if existing_document.content_hash == content_hash:
|
||||||
# Ensure status is ready (might have been stuck in processing/pending)
|
# Ensure status is ready (might have been stuck in processing/pending)
|
||||||
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
|
if not DocumentStatus.is_state(
|
||||||
|
existing_document.status, DocumentStatus.READY
|
||||||
|
):
|
||||||
existing_document.status = DocumentStatus.ready()
|
existing_document.status = DocumentStatus.ready()
|
||||||
documents_skipped += 1
|
documents_skipped += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Queue existing document for update (will be set to processing in Phase 2)
|
# Queue existing document for update (will be set to processing in Phase 2)
|
||||||
pages_to_process.append({
|
pages_to_process.append(
|
||||||
'document': existing_document,
|
{
|
||||||
'is_new': False,
|
"document": existing_document,
|
||||||
'markdown_content': markdown_content,
|
"is_new": False,
|
||||||
'content_hash': content_hash,
|
"markdown_content": markdown_content,
|
||||||
'page_id': page_id,
|
"content_hash": content_hash,
|
||||||
'page_title': page_title,
|
"page_id": page_id,
|
||||||
})
|
"page_title": page_title,
|
||||||
|
}
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Document doesn't exist by unique_identifier_hash
|
# Document doesn't exist by unique_identifier_hash
|
||||||
|
|
@ -410,14 +414,16 @@ async def index_notion_pages(
|
||||||
session.add(document)
|
session.add(document)
|
||||||
new_documents_created = True
|
new_documents_created = True
|
||||||
|
|
||||||
pages_to_process.append({
|
pages_to_process.append(
|
||||||
'document': document,
|
{
|
||||||
'is_new': True,
|
"document": document,
|
||||||
'markdown_content': markdown_content,
|
"is_new": True,
|
||||||
'content_hash': content_hash,
|
"markdown_content": markdown_content,
|
||||||
'page_id': page_id,
|
"content_hash": content_hash,
|
||||||
'page_title': page_title,
|
"page_id": page_id,
|
||||||
})
|
"page_title": page_title,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in Phase 1 for page: {e!s}", exc_info=True)
|
logger.error(f"Error in Phase 1 for page: {e!s}", exc_info=True)
|
||||||
|
|
@ -426,7 +432,9 @@ async def index_notion_pages(
|
||||||
|
|
||||||
# Commit all pending documents - they all appear in UI now
|
# Commit all pending documents - they all appear in UI now
|
||||||
if new_documents_created:
|
if new_documents_created:
|
||||||
logger.info(f"Phase 1: Committing {len([p for p in pages_to_process if p['is_new']])} pending documents")
|
logger.info(
|
||||||
|
f"Phase 1: Committing {len([p for p in pages_to_process if p['is_new']])} pending documents"
|
||||||
|
)
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
# =======================================================================
|
# =======================================================================
|
||||||
|
|
@ -443,7 +451,7 @@ async def index_notion_pages(
|
||||||
await on_heartbeat_callback(documents_indexed)
|
await on_heartbeat_callback(documents_indexed)
|
||||||
last_heartbeat_time = current_time
|
last_heartbeat_time = current_time
|
||||||
|
|
||||||
document = item['document']
|
document = item["document"]
|
||||||
try:
|
try:
|
||||||
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
||||||
document.status = DocumentStatus.processing()
|
document.status = DocumentStatus.processing()
|
||||||
|
|
@ -456,13 +464,18 @@ async def index_notion_pages(
|
||||||
|
|
||||||
if user_llm:
|
if user_llm:
|
||||||
document_metadata_for_summary = {
|
document_metadata_for_summary = {
|
||||||
"page_title": item['page_title'],
|
"page_title": item["page_title"],
|
||||||
"page_id": item['page_id'],
|
"page_id": item["page_id"],
|
||||||
"document_type": "Notion Page",
|
"document_type": "Notion Page",
|
||||||
"connector_type": "Notion",
|
"connector_type": "Notion",
|
||||||
}
|
}
|
||||||
summary_content, summary_embedding = await generate_document_summary(
|
(
|
||||||
item['markdown_content'], user_llm, document_metadata_for_summary
|
summary_content,
|
||||||
|
summary_embedding,
|
||||||
|
) = await generate_document_summary(
|
||||||
|
item["markdown_content"],
|
||||||
|
user_llm,
|
||||||
|
document_metadata_for_summary,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# Fallback to simple summary if no LLM configured
|
# Fallback to simple summary if no LLM configured
|
||||||
|
|
@ -471,16 +484,16 @@ async def index_notion_pages(
|
||||||
summary_content
|
summary_content
|
||||||
)
|
)
|
||||||
|
|
||||||
chunks = await create_document_chunks(item['markdown_content'])
|
chunks = await create_document_chunks(item["markdown_content"])
|
||||||
|
|
||||||
# Update document to READY with actual content
|
# Update document to READY with actual content
|
||||||
document.title = item['page_title']
|
document.title = item["page_title"]
|
||||||
document.content = summary_content
|
document.content = summary_content
|
||||||
document.content_hash = item['content_hash']
|
document.content_hash = item["content_hash"]
|
||||||
document.embedding = summary_embedding
|
document.embedding = summary_embedding
|
||||||
document.document_metadata = {
|
document.document_metadata = {
|
||||||
"page_title": item['page_title'],
|
"page_title": item["page_title"],
|
||||||
"page_id": item['page_id'],
|
"page_id": item["page_id"],
|
||||||
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||||
"connector_id": connector_id,
|
"connector_id": connector_id,
|
||||||
}
|
}
|
||||||
|
|
@ -504,7 +517,9 @@ async def index_notion_pages(
|
||||||
document.status = DocumentStatus.failed(str(e))
|
document.status = DocumentStatus.failed(str(e))
|
||||||
document.updated_at = get_current_timestamp()
|
document.updated_at = get_current_timestamp()
|
||||||
except Exception as status_error:
|
except Exception as status_error:
|
||||||
logger.error(f"Failed to update document status to failed: {status_error}")
|
logger.error(
|
||||||
|
f"Failed to update document status to failed: {status_error}"
|
||||||
|
)
|
||||||
skipped_pages.append(f"{item['page_title']} (processing error)")
|
skipped_pages.append(f"{item['page_title']} (processing error)")
|
||||||
documents_failed += 1
|
documents_failed += 1
|
||||||
continue
|
continue
|
||||||
|
|
|
||||||
|
|
@ -382,27 +382,31 @@ async def index_obsidian_vault(
|
||||||
# Document exists - check if content has changed
|
# Document exists - check if content has changed
|
||||||
if existing_document.content_hash == content_hash:
|
if existing_document.content_hash == content_hash:
|
||||||
# Ensure status is ready (might have been stuck in processing/pending)
|
# Ensure status is ready (might have been stuck in processing/pending)
|
||||||
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
|
if not DocumentStatus.is_state(
|
||||||
|
existing_document.status, DocumentStatus.READY
|
||||||
|
):
|
||||||
existing_document.status = DocumentStatus.ready()
|
existing_document.status = DocumentStatus.ready()
|
||||||
logger.debug(f"Note {title} unchanged, skipping")
|
logger.debug(f"Note {title} unchanged, skipping")
|
||||||
skipped_count += 1
|
skipped_count += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Queue existing document for update (will be set to processing in Phase 2)
|
# Queue existing document for update (will be set to processing in Phase 2)
|
||||||
files_to_process.append({
|
files_to_process.append(
|
||||||
'document': existing_document,
|
{
|
||||||
'is_new': False,
|
"document": existing_document,
|
||||||
'file_info': file_info,
|
"is_new": False,
|
||||||
'content': content,
|
"file_info": file_info,
|
||||||
'body_content': body_content,
|
"content": content,
|
||||||
'frontmatter': frontmatter,
|
"body_content": body_content,
|
||||||
'wiki_links': wiki_links,
|
"frontmatter": frontmatter,
|
||||||
'tags': tags,
|
"wiki_links": wiki_links,
|
||||||
'title': title,
|
"tags": tags,
|
||||||
'relative_path': relative_path,
|
"title": title,
|
||||||
'content_hash': content_hash,
|
"relative_path": relative_path,
|
||||||
'unique_identifier_hash': unique_identifier_hash,
|
"content_hash": content_hash,
|
||||||
})
|
"unique_identifier_hash": unique_identifier_hash,
|
||||||
|
}
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Document doesn't exist by unique_identifier_hash
|
# Document doesn't exist by unique_identifier_hash
|
||||||
|
|
@ -445,20 +449,22 @@ async def index_obsidian_vault(
|
||||||
session.add(document)
|
session.add(document)
|
||||||
new_documents_created = True
|
new_documents_created = True
|
||||||
|
|
||||||
files_to_process.append({
|
files_to_process.append(
|
||||||
'document': document,
|
{
|
||||||
'is_new': True,
|
"document": document,
|
||||||
'file_info': file_info,
|
"is_new": True,
|
||||||
'content': content,
|
"file_info": file_info,
|
||||||
'body_content': body_content,
|
"content": content,
|
||||||
'frontmatter': frontmatter,
|
"body_content": body_content,
|
||||||
'wiki_links': wiki_links,
|
"frontmatter": frontmatter,
|
||||||
'tags': tags,
|
"wiki_links": wiki_links,
|
||||||
'title': title,
|
"tags": tags,
|
||||||
'relative_path': relative_path,
|
"title": title,
|
||||||
'content_hash': content_hash,
|
"relative_path": relative_path,
|
||||||
'unique_identifier_hash': unique_identifier_hash,
|
"content_hash": content_hash,
|
||||||
})
|
"unique_identifier_hash": unique_identifier_hash,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.exception(
|
logger.exception(
|
||||||
|
|
@ -469,7 +475,9 @@ async def index_obsidian_vault(
|
||||||
|
|
||||||
# Commit all pending documents - they all appear in UI now
|
# Commit all pending documents - they all appear in UI now
|
||||||
if new_documents_created:
|
if new_documents_created:
|
||||||
logger.info(f"Phase 1: Committing {len([f for f in files_to_process if f['is_new']])} pending documents")
|
logger.info(
|
||||||
|
f"Phase 1: Committing {len([f for f in files_to_process if f['is_new']])} pending documents"
|
||||||
|
)
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
# =======================================================================
|
# =======================================================================
|
||||||
|
|
@ -491,22 +499,22 @@ async def index_obsidian_vault(
|
||||||
await on_heartbeat_callback(indexed_count)
|
await on_heartbeat_callback(indexed_count)
|
||||||
last_heartbeat_time = current_time
|
last_heartbeat_time = current_time
|
||||||
|
|
||||||
document = item['document']
|
document = item["document"]
|
||||||
try:
|
try:
|
||||||
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
||||||
document.status = DocumentStatus.processing()
|
document.status = DocumentStatus.processing()
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
# Extract data from item
|
# Extract data from item
|
||||||
title = item['title']
|
title = item["title"]
|
||||||
relative_path = item['relative_path']
|
relative_path = item["relative_path"]
|
||||||
content = item['content']
|
content = item["content"]
|
||||||
body_content = item['body_content']
|
body_content = item["body_content"]
|
||||||
frontmatter = item['frontmatter']
|
frontmatter = item["frontmatter"]
|
||||||
wiki_links = item['wiki_links']
|
wiki_links = item["wiki_links"]
|
||||||
tags = item['tags']
|
tags = item["tags"]
|
||||||
content_hash = item['content_hash']
|
content_hash = item["content_hash"]
|
||||||
file_info = item['file_info']
|
file_info = item["file_info"]
|
||||||
|
|
||||||
# Build metadata
|
# Build metadata
|
||||||
document_metadata = {
|
document_metadata = {
|
||||||
|
|
@ -584,7 +592,9 @@ async def index_obsidian_vault(
|
||||||
document.status = DocumentStatus.failed(str(e))
|
document.status = DocumentStatus.failed(str(e))
|
||||||
document.updated_at = get_current_timestamp()
|
document.updated_at = get_current_timestamp()
|
||||||
except Exception as status_error:
|
except Exception as status_error:
|
||||||
logger.error(f"Failed to update document status to failed: {status_error}")
|
logger.error(
|
||||||
|
f"Failed to update document status to failed: {status_error}"
|
||||||
|
)
|
||||||
failed_count += 1
|
failed_count += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
@ -592,9 +602,7 @@ async def index_obsidian_vault(
|
||||||
await update_connector_last_indexed(session, connector, update_last_indexed)
|
await update_connector_last_indexed(session, connector, update_last_indexed)
|
||||||
|
|
||||||
# Final commit for any remaining documents not yet committed in batches
|
# Final commit for any remaining documents not yet committed in batches
|
||||||
logger.info(
|
logger.info(f"Final commit: Total {indexed_count} Obsidian notes processed")
|
||||||
f"Final commit: Total {indexed_count} Obsidian notes processed"
|
|
||||||
)
|
|
||||||
try:
|
try:
|
||||||
await session.commit()
|
await session.commit()
|
||||||
logger.info(
|
logger.info(
|
||||||
|
|
|
||||||
|
|
@ -314,7 +314,9 @@ async def index_slack_messages(
|
||||||
# Document exists - check if content has changed
|
# Document exists - check if content has changed
|
||||||
if existing_document.content_hash == content_hash:
|
if existing_document.content_hash == content_hash:
|
||||||
# Ensure status is ready (might have been stuck in processing/pending)
|
# Ensure status is ready (might have been stuck in processing/pending)
|
||||||
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
|
if not DocumentStatus.is_state(
|
||||||
|
existing_document.status, DocumentStatus.READY
|
||||||
|
):
|
||||||
existing_document.status = DocumentStatus.ready()
|
existing_document.status = DocumentStatus.ready()
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Document for Slack message {msg_ts} in channel {channel_name} unchanged. Skipping."
|
f"Document for Slack message {msg_ts} in channel {channel_name} unchanged. Skipping."
|
||||||
|
|
@ -323,18 +325,20 @@ async def index_slack_messages(
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Queue existing document for update (will be set to processing in Phase 2)
|
# Queue existing document for update (will be set to processing in Phase 2)
|
||||||
messages_to_process.append({
|
messages_to_process.append(
|
||||||
'document': existing_document,
|
{
|
||||||
'is_new': False,
|
"document": existing_document,
|
||||||
'combined_document_string': combined_document_string,
|
"is_new": False,
|
||||||
'content_hash': content_hash,
|
"combined_document_string": combined_document_string,
|
||||||
'channel_name': channel_name,
|
"content_hash": content_hash,
|
||||||
'channel_id': channel_id,
|
"channel_name": channel_name,
|
||||||
'msg_ts': msg_ts,
|
"channel_id": channel_id,
|
||||||
'start_date': start_date_str,
|
"msg_ts": msg_ts,
|
||||||
'end_date': end_date_str,
|
"start_date": start_date_str,
|
||||||
'message_count': len(formatted_messages),
|
"end_date": end_date_str,
|
||||||
})
|
"message_count": len(formatted_messages),
|
||||||
|
}
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Document doesn't exist by unique_identifier_hash
|
# Document doesn't exist by unique_identifier_hash
|
||||||
|
|
@ -377,18 +381,20 @@ async def index_slack_messages(
|
||||||
session.add(document)
|
session.add(document)
|
||||||
new_documents_created = True
|
new_documents_created = True
|
||||||
|
|
||||||
messages_to_process.append({
|
messages_to_process.append(
|
||||||
'document': document,
|
{
|
||||||
'is_new': True,
|
"document": document,
|
||||||
'combined_document_string': combined_document_string,
|
"is_new": True,
|
||||||
'content_hash': content_hash,
|
"combined_document_string": combined_document_string,
|
||||||
'channel_name': channel_name,
|
"content_hash": content_hash,
|
||||||
'channel_id': channel_id,
|
"channel_name": channel_name,
|
||||||
'msg_ts': msg_ts,
|
"channel_id": channel_id,
|
||||||
'start_date': start_date_str,
|
"msg_ts": msg_ts,
|
||||||
'end_date': end_date_str,
|
"start_date": start_date_str,
|
||||||
'message_count': len(formatted_messages),
|
"end_date": end_date_str,
|
||||||
})
|
"message_count": len(formatted_messages),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Phase 1: Collected {len(formatted_messages)} messages from channel {channel_name}"
|
f"Phase 1: Collected {len(formatted_messages)} messages from channel {channel_name}"
|
||||||
|
|
@ -409,7 +415,9 @@ async def index_slack_messages(
|
||||||
|
|
||||||
# Commit all pending documents - they all appear in UI now
|
# Commit all pending documents - they all appear in UI now
|
||||||
if new_documents_created:
|
if new_documents_created:
|
||||||
logger.info(f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents")
|
logger.info(
|
||||||
|
f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents"
|
||||||
|
)
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
# =======================================================================
|
# =======================================================================
|
||||||
|
|
@ -426,29 +434,29 @@ async def index_slack_messages(
|
||||||
await on_heartbeat_callback(documents_indexed)
|
await on_heartbeat_callback(documents_indexed)
|
||||||
last_heartbeat_time = current_time
|
last_heartbeat_time = current_time
|
||||||
|
|
||||||
document = item['document']
|
document = item["document"]
|
||||||
try:
|
try:
|
||||||
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
||||||
document.status = DocumentStatus.processing()
|
document.status = DocumentStatus.processing()
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
# Heavy processing (embeddings, chunks)
|
# Heavy processing (embeddings, chunks)
|
||||||
chunks = await create_document_chunks(item['combined_document_string'])
|
chunks = await create_document_chunks(item["combined_document_string"])
|
||||||
doc_embedding = config.embedding_model_instance.embed(
|
doc_embedding = config.embedding_model_instance.embed(
|
||||||
item['combined_document_string']
|
item["combined_document_string"]
|
||||||
)
|
)
|
||||||
|
|
||||||
# Update document to READY with actual content
|
# Update document to READY with actual content
|
||||||
document.title = item['channel_name']
|
document.title = item["channel_name"]
|
||||||
document.content = item['combined_document_string']
|
document.content = item["combined_document_string"]
|
||||||
document.content_hash = item['content_hash']
|
document.content_hash = item["content_hash"]
|
||||||
document.embedding = doc_embedding
|
document.embedding = doc_embedding
|
||||||
document.document_metadata = {
|
document.document_metadata = {
|
||||||
"channel_name": item['channel_name'],
|
"channel_name": item["channel_name"],
|
||||||
"channel_id": item['channel_id'],
|
"channel_id": item["channel_id"],
|
||||||
"start_date": item['start_date'],
|
"start_date": item["start_date"],
|
||||||
"end_date": item['end_date'],
|
"end_date": item["end_date"],
|
||||||
"message_count": item['message_count'],
|
"message_count": item["message_count"],
|
||||||
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||||
"connector_id": connector_id,
|
"connector_id": connector_id,
|
||||||
}
|
}
|
||||||
|
|
@ -475,7 +483,9 @@ async def index_slack_messages(
|
||||||
document.status = DocumentStatus.failed(str(e))
|
document.status = DocumentStatus.failed(str(e))
|
||||||
document.updated_at = get_current_timestamp()
|
document.updated_at = get_current_timestamp()
|
||||||
except Exception as status_error:
|
except Exception as status_error:
|
||||||
logger.error(f"Failed to update document status to failed: {status_error}")
|
logger.error(
|
||||||
|
f"Failed to update document status to failed: {status_error}"
|
||||||
|
)
|
||||||
documents_failed += 1
|
documents_failed += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -332,25 +332,31 @@ async def index_teams_messages(
|
||||||
# Document exists - check if content has changed
|
# Document exists - check if content has changed
|
||||||
if existing_document.content_hash == content_hash:
|
if existing_document.content_hash == content_hash:
|
||||||
# Ensure status is ready (might have been stuck in processing/pending)
|
# Ensure status is ready (might have been stuck in processing/pending)
|
||||||
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
|
if not DocumentStatus.is_state(
|
||||||
existing_document.status = DocumentStatus.ready()
|
existing_document.status, DocumentStatus.READY
|
||||||
|
):
|
||||||
|
existing_document.status = (
|
||||||
|
DocumentStatus.ready()
|
||||||
|
)
|
||||||
documents_skipped += 1
|
documents_skipped += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Queue existing document for update (will be set to processing in Phase 2)
|
# Queue existing document for update (will be set to processing in Phase 2)
|
||||||
messages_to_process.append({
|
messages_to_process.append(
|
||||||
'document': existing_document,
|
{
|
||||||
'is_new': False,
|
"document": existing_document,
|
||||||
'combined_document_string': combined_document_string,
|
"is_new": False,
|
||||||
'content_hash': content_hash,
|
"combined_document_string": combined_document_string,
|
||||||
'team_name': team_name,
|
"content_hash": content_hash,
|
||||||
'team_id': team_id,
|
"team_name": team_name,
|
||||||
'channel_name': channel_name,
|
"team_id": team_id,
|
||||||
'channel_id': channel_id,
|
"channel_name": channel_name,
|
||||||
'message_id': message_id,
|
"channel_id": channel_id,
|
||||||
'start_date': start_date_str,
|
"message_id": message_id,
|
||||||
'end_date': end_date_str,
|
"start_date": start_date_str,
|
||||||
})
|
"end_date": end_date_str,
|
||||||
|
}
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Document doesn't exist by unique_identifier_hash
|
# Document doesn't exist by unique_identifier_hash
|
||||||
|
|
@ -400,19 +406,21 @@ async def index_teams_messages(
|
||||||
session.add(document)
|
session.add(document)
|
||||||
new_documents_created = True
|
new_documents_created = True
|
||||||
|
|
||||||
messages_to_process.append({
|
messages_to_process.append(
|
||||||
'document': document,
|
{
|
||||||
'is_new': True,
|
"document": document,
|
||||||
'combined_document_string': combined_document_string,
|
"is_new": True,
|
||||||
'content_hash': content_hash,
|
"combined_document_string": combined_document_string,
|
||||||
'team_name': team_name,
|
"content_hash": content_hash,
|
||||||
'team_id': team_id,
|
"team_name": team_name,
|
||||||
'channel_name': channel_name,
|
"team_id": team_id,
|
||||||
'channel_id': channel_id,
|
"channel_name": channel_name,
|
||||||
'message_id': message_id,
|
"channel_id": channel_id,
|
||||||
'start_date': start_date_str,
|
"message_id": message_id,
|
||||||
'end_date': end_date_str,
|
"start_date": start_date_str,
|
||||||
})
|
"end_date": end_date_str,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(
|
logger.error(
|
||||||
|
|
@ -432,7 +440,9 @@ async def index_teams_messages(
|
||||||
|
|
||||||
# Commit all pending documents - they all appear in UI now
|
# Commit all pending documents - they all appear in UI now
|
||||||
if new_documents_created:
|
if new_documents_created:
|
||||||
logger.info(f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents")
|
logger.info(
|
||||||
|
f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents"
|
||||||
|
)
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
# =======================================================================
|
# =======================================================================
|
||||||
|
|
@ -449,30 +459,30 @@ async def index_teams_messages(
|
||||||
await on_heartbeat_callback(documents_indexed)
|
await on_heartbeat_callback(documents_indexed)
|
||||||
last_heartbeat_time = current_time
|
last_heartbeat_time = current_time
|
||||||
|
|
||||||
document = item['document']
|
document = item["document"]
|
||||||
try:
|
try:
|
||||||
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
||||||
document.status = DocumentStatus.processing()
|
document.status = DocumentStatus.processing()
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
# Heavy processing (embeddings, chunks)
|
# Heavy processing (embeddings, chunks)
|
||||||
chunks = await create_document_chunks(item['combined_document_string'])
|
chunks = await create_document_chunks(item["combined_document_string"])
|
||||||
doc_embedding = config.embedding_model_instance.embed(
|
doc_embedding = config.embedding_model_instance.embed(
|
||||||
item['combined_document_string']
|
item["combined_document_string"]
|
||||||
)
|
)
|
||||||
|
|
||||||
# Update document to READY with actual content
|
# Update document to READY with actual content
|
||||||
document.title = f"{item['team_name']} - {item['channel_name']}"
|
document.title = f"{item['team_name']} - {item['channel_name']}"
|
||||||
document.content = item['combined_document_string']
|
document.content = item["combined_document_string"]
|
||||||
document.content_hash = item['content_hash']
|
document.content_hash = item["content_hash"]
|
||||||
document.embedding = doc_embedding
|
document.embedding = doc_embedding
|
||||||
document.document_metadata = {
|
document.document_metadata = {
|
||||||
"team_name": item['team_name'],
|
"team_name": item["team_name"],
|
||||||
"team_id": item['team_id'],
|
"team_id": item["team_id"],
|
||||||
"channel_name": item['channel_name'],
|
"channel_name": item["channel_name"],
|
||||||
"channel_id": item['channel_id'],
|
"channel_id": item["channel_id"],
|
||||||
"start_date": item['start_date'],
|
"start_date": item["start_date"],
|
||||||
"end_date": item['end_date'],
|
"end_date": item["end_date"],
|
||||||
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||||
"connector_id": connector_id,
|
"connector_id": connector_id,
|
||||||
}
|
}
|
||||||
|
|
@ -497,7 +507,9 @@ async def index_teams_messages(
|
||||||
document.status = DocumentStatus.failed(str(e))
|
document.status = DocumentStatus.failed(str(e))
|
||||||
document.updated_at = get_current_timestamp()
|
document.updated_at = get_current_timestamp()
|
||||||
except Exception as status_error:
|
except Exception as status_error:
|
||||||
logger.error(f"Failed to update document status to failed: {status_error}")
|
logger.error(
|
||||||
|
f"Failed to update document status to failed: {status_error}"
|
||||||
|
)
|
||||||
documents_failed += 1
|
documents_failed += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
@ -510,9 +522,7 @@ async def index_teams_messages(
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
await session.commit()
|
await session.commit()
|
||||||
logger.info(
|
logger.info("Successfully committed all Teams document changes to database")
|
||||||
"Successfully committed all Teams document changes to database"
|
|
||||||
)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Handle any remaining integrity errors gracefully (race conditions, etc.)
|
# Handle any remaining integrity errors gracefully (race conditions, etc.)
|
||||||
if (
|
if (
|
||||||
|
|
|
||||||
|
|
@ -184,22 +184,28 @@ async def index_crawled_urls(
|
||||||
|
|
||||||
if existing_document:
|
if existing_document:
|
||||||
# Document exists - check if it's already being processed
|
# Document exists - check if it's already being processed
|
||||||
if DocumentStatus.is_state(existing_document.status, DocumentStatus.PENDING):
|
if DocumentStatus.is_state(
|
||||||
|
existing_document.status, DocumentStatus.PENDING
|
||||||
|
):
|
||||||
logger.info(f"URL {url} already pending. Skipping.")
|
logger.info(f"URL {url} already pending. Skipping.")
|
||||||
documents_skipped += 1
|
documents_skipped += 1
|
||||||
continue
|
continue
|
||||||
if DocumentStatus.is_state(existing_document.status, DocumentStatus.PROCESSING):
|
if DocumentStatus.is_state(
|
||||||
|
existing_document.status, DocumentStatus.PROCESSING
|
||||||
|
):
|
||||||
logger.info(f"URL {url} already processing. Skipping.")
|
logger.info(f"URL {url} already processing. Skipping.")
|
||||||
documents_skipped += 1
|
documents_skipped += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Queue existing document for potential update check
|
# Queue existing document for potential update check
|
||||||
urls_to_process.append({
|
urls_to_process.append(
|
||||||
'document': existing_document,
|
{
|
||||||
'is_new': False,
|
"document": existing_document,
|
||||||
'url': url,
|
"is_new": False,
|
||||||
'unique_identifier_hash': unique_identifier_hash,
|
"url": url,
|
||||||
})
|
"unique_identifier_hash": unique_identifier_hash,
|
||||||
|
}
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Create new document with PENDING status (visible in UI immediately)
|
# Create new document with PENDING status (visible in UI immediately)
|
||||||
|
|
@ -224,12 +230,14 @@ async def index_crawled_urls(
|
||||||
session.add(document)
|
session.add(document)
|
||||||
new_documents_created = True
|
new_documents_created = True
|
||||||
|
|
||||||
urls_to_process.append({
|
urls_to_process.append(
|
||||||
'document': document,
|
{
|
||||||
'is_new': True,
|
"document": document,
|
||||||
'url': url,
|
"is_new": True,
|
||||||
'unique_identifier_hash': unique_identifier_hash,
|
"url": url,
|
||||||
})
|
"unique_identifier_hash": unique_identifier_hash,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in Phase 1 for URL {url}: {e!s}", exc_info=True)
|
logger.error(f"Error in Phase 1 for URL {url}: {e!s}", exc_info=True)
|
||||||
|
|
@ -238,7 +246,9 @@ async def index_crawled_urls(
|
||||||
|
|
||||||
# Commit all pending documents - they all appear in UI now
|
# Commit all pending documents - they all appear in UI now
|
||||||
if new_documents_created:
|
if new_documents_created:
|
||||||
logger.info(f"Phase 1: Committing {len([u for u in urls_to_process if u['is_new']])} pending documents")
|
logger.info(
|
||||||
|
f"Phase 1: Committing {len([u for u in urls_to_process if u['is_new']])} pending documents"
|
||||||
|
)
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
# =======================================================================
|
# =======================================================================
|
||||||
|
|
@ -255,9 +265,9 @@ async def index_crawled_urls(
|
||||||
await on_heartbeat_callback(documents_indexed + documents_updated)
|
await on_heartbeat_callback(documents_indexed + documents_updated)
|
||||||
last_heartbeat_time = current_time
|
last_heartbeat_time = current_time
|
||||||
|
|
||||||
document = item['document']
|
document = item["document"]
|
||||||
url = item['url']
|
url = item["url"]
|
||||||
is_new = item['is_new']
|
is_new = item["is_new"]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
||||||
|
|
@ -298,7 +308,9 @@ async def index_crawled_urls(
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Format content as structured document for summary generation
|
# Format content as structured document for summary generation
|
||||||
structured_document = crawler.format_to_structured_document(crawl_result)
|
structured_document = crawler.format_to_structured_document(
|
||||||
|
crawl_result
|
||||||
|
)
|
||||||
|
|
||||||
# Generate content hash using a version WITHOUT metadata
|
# Generate content hash using a version WITHOUT metadata
|
||||||
structured_document_for_hash = crawler.format_to_structured_document(
|
structured_document_for_hash = crawler.format_to_structured_document(
|
||||||
|
|
@ -339,7 +351,9 @@ async def index_crawled_urls(
|
||||||
f"(existing document ID: {duplicate_by_content.id}). "
|
f"(existing document ID: {duplicate_by_content.id}). "
|
||||||
f"Marking as failed."
|
f"Marking as failed."
|
||||||
)
|
)
|
||||||
document.status = DocumentStatus.failed("Duplicate content exists")
|
document.status = DocumentStatus.failed(
|
||||||
|
"Duplicate content exists"
|
||||||
|
)
|
||||||
document.updated_at = get_current_timestamp()
|
document.updated_at = get_current_timestamp()
|
||||||
await session.commit()
|
await session.commit()
|
||||||
duplicate_content_count += 1
|
duplicate_content_count += 1
|
||||||
|
|
@ -360,7 +374,10 @@ async def index_crawled_urls(
|
||||||
"document_type": "Crawled URL",
|
"document_type": "Crawled URL",
|
||||||
"crawler_type": crawler_type,
|
"crawler_type": crawler_type,
|
||||||
}
|
}
|
||||||
summary_content, summary_embedding = await generate_document_summary(
|
(
|
||||||
|
summary_content,
|
||||||
|
summary_embedding,
|
||||||
|
) = await generate_document_summary(
|
||||||
structured_document, user_llm, document_metadata_for_summary
|
structured_document, user_llm, document_metadata_for_summary
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
|
@ -423,7 +440,9 @@ async def index_crawled_urls(
|
||||||
document.updated_at = get_current_timestamp()
|
document.updated_at = get_current_timestamp()
|
||||||
await session.commit()
|
await session.commit()
|
||||||
except Exception as status_error:
|
except Exception as status_error:
|
||||||
logger.error(f"Failed to update document status to failed: {status_error}")
|
logger.error(
|
||||||
|
f"Failed to update document status to failed: {status_error}"
|
||||||
|
)
|
||||||
documents_failed += 1
|
documents_failed += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
@ -438,7 +457,9 @@ async def index_crawled_urls(
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
await session.commit()
|
await session.commit()
|
||||||
logger.info("Successfully committed all webcrawler document changes to database")
|
logger.info(
|
||||||
|
"Successfully committed all webcrawler document changes to database"
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Handle any remaining integrity errors gracefully
|
# Handle any remaining integrity errors gracefully
|
||||||
if "duplicate key value violates unique constraint" in str(e).lower():
|
if "duplicate key value violates unique constraint" in str(e).lower():
|
||||||
|
|
|
||||||
|
|
@ -17,29 +17,30 @@ md = MarkdownifyTransformer()
|
||||||
def safe_set_chunks(document: Document, chunks: list) -> None:
|
def safe_set_chunks(document: Document, chunks: list) -> None:
|
||||||
"""
|
"""
|
||||||
Safely assign chunks to a document without triggering lazy loading.
|
Safely assign chunks to a document without triggering lazy loading.
|
||||||
|
|
||||||
ALWAYS use this instead of `document.chunks = chunks` to avoid
|
ALWAYS use this instead of `document.chunks = chunks` to avoid
|
||||||
SQLAlchemy async errors (MissingGreenlet / greenlet_spawn).
|
SQLAlchemy async errors (MissingGreenlet / greenlet_spawn).
|
||||||
|
|
||||||
Why this is needed:
|
Why this is needed:
|
||||||
- Direct assignment `document.chunks = chunks` triggers SQLAlchemy to
|
- Direct assignment `document.chunks = chunks` triggers SQLAlchemy to
|
||||||
load the OLD chunks first (for comparison/orphan detection)
|
load the OLD chunks first (for comparison/orphan detection)
|
||||||
- This lazy loading fails in async context with asyncpg driver
|
- This lazy loading fails in async context with asyncpg driver
|
||||||
- set_committed_value bypasses this by setting the value directly
|
- set_committed_value bypasses this by setting the value directly
|
||||||
|
|
||||||
This function is safe regardless of how the document was loaded
|
This function is safe regardless of how the document was loaded
|
||||||
(with or without selectinload).
|
(with or without selectinload).
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
document: The Document object to update
|
document: The Document object to update
|
||||||
chunks: List of Chunk objects to assign
|
chunks: List of Chunk objects to assign
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
# Instead of: document.chunks = chunks (DANGEROUS!)
|
# Instead of: document.chunks = chunks (DANGEROUS!)
|
||||||
safe_set_chunks(document, chunks) # Always safe
|
safe_set_chunks(document, chunks) # Always safe
|
||||||
"""
|
"""
|
||||||
from sqlalchemy.orm.attributes import set_committed_value
|
from sqlalchemy.orm.attributes import set_committed_value
|
||||||
set_committed_value(document, 'chunks', chunks)
|
|
||||||
|
set_committed_value(document, "chunks", chunks)
|
||||||
|
|
||||||
|
|
||||||
def get_current_timestamp() -> datetime:
|
def get_current_timestamp() -> datetime:
|
||||||
|
|
|
||||||
|
|
@ -91,7 +91,9 @@ async def add_circleback_meeting_document(
|
||||||
# Document exists - check if content has changed
|
# Document exists - check if content has changed
|
||||||
if existing_document.content_hash == content_hash:
|
if existing_document.content_hash == content_hash:
|
||||||
# Ensure status is ready (might have been stuck in processing/pending)
|
# Ensure status is ready (might have been stuck in processing/pending)
|
||||||
if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
|
if not DocumentStatus.is_state(
|
||||||
|
existing_document.status, DocumentStatus.READY
|
||||||
|
):
|
||||||
existing_document.status = DocumentStatus.ready()
|
existing_document.status = DocumentStatus.ready()
|
||||||
await session.commit()
|
await session.commit()
|
||||||
logger.info(f"Circleback meeting {meeting_id} unchanged. Skipping.")
|
logger.info(f"Circleback meeting {meeting_id} unchanged. Skipping.")
|
||||||
|
|
@ -110,7 +112,7 @@ async def add_circleback_meeting_document(
|
||||||
# PHASE 1: Create document with PENDING status
|
# PHASE 1: Create document with PENDING status
|
||||||
# This makes the document visible in the UI immediately
|
# This makes the document visible in the UI immediately
|
||||||
# =======================================================================
|
# =======================================================================
|
||||||
|
|
||||||
# Fetch the user who set up the Circleback connector (preferred)
|
# Fetch the user who set up the Circleback connector (preferred)
|
||||||
# or fall back to search space owner if no connector found
|
# or fall back to search space owner if no connector found
|
||||||
created_by_user_id = None
|
created_by_user_id = None
|
||||||
|
|
@ -173,7 +175,7 @@ async def add_circleback_meeting_document(
|
||||||
# =======================================================================
|
# =======================================================================
|
||||||
# PHASE 3: Process the document content
|
# PHASE 3: Process the document content
|
||||||
# =======================================================================
|
# =======================================================================
|
||||||
|
|
||||||
# Get LLM for generating summary
|
# Get LLM for generating summary
|
||||||
llm = await get_document_summary_llm(session, search_space_id)
|
llm = await get_document_summary_llm(session, search_space_id)
|
||||||
if not llm:
|
if not llm:
|
||||||
|
|
@ -243,7 +245,7 @@ async def add_circleback_meeting_document(
|
||||||
|
|
||||||
await session.commit()
|
await session.commit()
|
||||||
await session.refresh(document)
|
await session.refresh(document)
|
||||||
|
|
||||||
if existing_document:
|
if existing_document:
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Updated Circleback meeting document {meeting_id} in search space {search_space_id}"
|
f"Updated Circleback meeting document {meeting_id} in search space {search_space_id}"
|
||||||
|
|
@ -267,7 +269,9 @@ async def add_circleback_meeting_document(
|
||||||
document.updated_at = get_current_timestamp()
|
document.updated_at = get_current_timestamp()
|
||||||
await session.commit()
|
await session.commit()
|
||||||
except Exception as status_error:
|
except Exception as status_error:
|
||||||
logger.error(f"Failed to update document status to failed: {status_error}")
|
logger.error(
|
||||||
|
f"Failed to update document status to failed: {status_error}"
|
||||||
|
)
|
||||||
raise db_error
|
raise db_error
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
await session.rollback()
|
await session.rollback()
|
||||||
|
|
@ -279,5 +283,7 @@ async def add_circleback_meeting_document(
|
||||||
document.updated_at = get_current_timestamp()
|
document.updated_at = get_current_timestamp()
|
||||||
await session.commit()
|
await session.commit()
|
||||||
except Exception as status_error:
|
except Exception as status_error:
|
||||||
logger.error(f"Failed to update document status to failed: {status_error}")
|
logger.error(
|
||||||
|
f"Failed to update document status to failed: {status_error}"
|
||||||
|
)
|
||||||
raise RuntimeError(f"Failed to process Circleback meeting: {e!s}") from e
|
raise RuntimeError(f"Failed to process Circleback meeting: {e!s}") from e
|
||||||
|
|
|
||||||
|
|
@ -1629,16 +1629,16 @@ async def process_file_in_background_with_document(
|
||||||
) -> Document | None:
|
) -> Document | None:
|
||||||
"""
|
"""
|
||||||
Process file and update existing pending document (2-phase pattern).
|
Process file and update existing pending document (2-phase pattern).
|
||||||
|
|
||||||
This function is Phase 2 of the real-time document status updates:
|
This function is Phase 2 of the real-time document status updates:
|
||||||
- Phase 1 (API): Created document with pending status
|
- Phase 1 (API): Created document with pending status
|
||||||
- Phase 2 (this): Process file and update document to ready/failed
|
- Phase 2 (this): Process file and update document to ready/failed
|
||||||
|
|
||||||
The document already exists with pending status. This function:
|
The document already exists with pending status. This function:
|
||||||
1. Parses the file content (markdown, audio, or ETL services)
|
1. Parses the file content (markdown, audio, or ETL services)
|
||||||
2. Updates the document with content, embeddings, and chunks
|
2. Updates the document with content, embeddings, and chunks
|
||||||
3. Sets status to 'ready' on success
|
3. Sets status to 'ready' on success
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
document: Existing document with pending status
|
document: Existing document with pending status
|
||||||
file_path: Path to the uploaded file
|
file_path: Path to the uploaded file
|
||||||
|
|
@ -1650,7 +1650,7 @@ async def process_file_in_background_with_document(
|
||||||
log_entry: Log entry for this task
|
log_entry: Log entry for this task
|
||||||
connector: Optional connector info for Google Drive files
|
connector: Optional connector info for Google Drive files
|
||||||
notification: Optional notification for progress updates
|
notification: Optional notification for progress updates
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Updated Document object if successful, None if duplicate content detected
|
Updated Document object if successful, None if duplicate content detected
|
||||||
"""
|
"""
|
||||||
|
|
@ -1665,13 +1665,18 @@ async def process_file_in_background_with_document(
|
||||||
etl_service = None
|
etl_service = None
|
||||||
|
|
||||||
# ===== STEP 1: Parse file content based on type =====
|
# ===== STEP 1: Parse file content based on type =====
|
||||||
|
|
||||||
# Check if the file is a markdown or text file
|
# Check if the file is a markdown or text file
|
||||||
if filename.lower().endswith((".md", ".markdown", ".txt")):
|
if filename.lower().endswith((".md", ".markdown", ".txt")):
|
||||||
# Update notification: parsing stage
|
# Update notification: parsing stage
|
||||||
if notification:
|
if notification:
|
||||||
await NotificationService.document_processing.notify_processing_progress(
|
await (
|
||||||
session, notification, stage="parsing", stage_message="Reading file"
|
NotificationService.document_processing.notify_processing_progress(
|
||||||
|
session,
|
||||||
|
notification,
|
||||||
|
stage="parsing",
|
||||||
|
stage_message="Reading file",
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
await task_logger.log_task_progress(
|
await task_logger.log_task_progress(
|
||||||
|
|
@ -1695,8 +1700,13 @@ async def process_file_in_background_with_document(
|
||||||
):
|
):
|
||||||
# Update notification: parsing stage (transcription)
|
# Update notification: parsing stage (transcription)
|
||||||
if notification:
|
if notification:
|
||||||
await NotificationService.document_processing.notify_processing_progress(
|
await (
|
||||||
session, notification, stage="parsing", stage_message="Transcribing audio"
|
NotificationService.document_processing.notify_processing_progress(
|
||||||
|
session,
|
||||||
|
notification,
|
||||||
|
stage="parsing",
|
||||||
|
stage_message="Transcribing audio",
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
await task_logger.log_task_progress(
|
await task_logger.log_task_progress(
|
||||||
|
|
@ -1708,7 +1718,8 @@ async def process_file_in_background_with_document(
|
||||||
# Transcribe audio
|
# Transcribe audio
|
||||||
stt_service_type = (
|
stt_service_type = (
|
||||||
"local"
|
"local"
|
||||||
if app_config.STT_SERVICE and app_config.STT_SERVICE.startswith("local/")
|
if app_config.STT_SERVICE
|
||||||
|
and app_config.STT_SERVICE.startswith("local/")
|
||||||
else "external"
|
else "external"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -1719,7 +1730,9 @@ async def process_file_in_background_with_document(
|
||||||
transcribed_text = result.get("text", "")
|
transcribed_text = result.get("text", "")
|
||||||
if not transcribed_text:
|
if not transcribed_text:
|
||||||
raise ValueError("Transcription returned empty text")
|
raise ValueError("Transcription returned empty text")
|
||||||
markdown_content = f"# Transcription of {filename}\n\n{transcribed_text}"
|
markdown_content = (
|
||||||
|
f"# Transcription of {filename}\n\n{transcribed_text}"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
with open(file_path, "rb") as audio_file:
|
with open(file_path, "rb") as audio_file:
|
||||||
transcription_kwargs = {
|
transcription_kwargs = {
|
||||||
|
|
@ -1728,12 +1741,18 @@ async def process_file_in_background_with_document(
|
||||||
"api_key": app_config.STT_SERVICE_API_KEY,
|
"api_key": app_config.STT_SERVICE_API_KEY,
|
||||||
}
|
}
|
||||||
if app_config.STT_SERVICE_API_BASE:
|
if app_config.STT_SERVICE_API_BASE:
|
||||||
transcription_kwargs["api_base"] = app_config.STT_SERVICE_API_BASE
|
transcription_kwargs["api_base"] = (
|
||||||
transcription_response = await atranscription(**transcription_kwargs)
|
app_config.STT_SERVICE_API_BASE
|
||||||
|
)
|
||||||
|
transcription_response = await atranscription(
|
||||||
|
**transcription_kwargs
|
||||||
|
)
|
||||||
transcribed_text = transcription_response.get("text", "")
|
transcribed_text = transcription_response.get("text", "")
|
||||||
if not transcribed_text:
|
if not transcribed_text:
|
||||||
raise ValueError("Transcription returned empty text")
|
raise ValueError("Transcription returned empty text")
|
||||||
markdown_content = f"# Transcription of {filename}\n\n{transcribed_text}"
|
markdown_content = (
|
||||||
|
f"# Transcription of {filename}\n\n{transcribed_text}"
|
||||||
|
)
|
||||||
|
|
||||||
etl_service = "AUDIO_TRANSCRIPTION"
|
etl_service = "AUDIO_TRANSCRIPTION"
|
||||||
# Clean up temp file
|
# Clean up temp file
|
||||||
|
|
@ -1742,13 +1761,18 @@ async def process_file_in_background_with_document(
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# Document files - use ETL service
|
# Document files - use ETL service
|
||||||
from app.services.page_limit_service import PageLimitExceededError, PageLimitService
|
from app.services.page_limit_service import (
|
||||||
|
PageLimitExceededError,
|
||||||
|
PageLimitService,
|
||||||
|
)
|
||||||
|
|
||||||
page_limit_service = PageLimitService(session)
|
page_limit_service = PageLimitService(session)
|
||||||
|
|
||||||
# Estimate page count
|
# Estimate page count
|
||||||
try:
|
try:
|
||||||
estimated_pages = page_limit_service.estimate_pages_before_processing(file_path)
|
estimated_pages = page_limit_service.estimate_pages_before_processing(
|
||||||
|
file_path
|
||||||
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
file_size = os.path.getsize(file_path)
|
file_size = os.path.getsize(file_path)
|
||||||
estimated_pages = max(1, file_size // (80 * 1024))
|
estimated_pages = max(1, file_size // (80 * 1024))
|
||||||
|
|
@ -1759,14 +1783,22 @@ async def process_file_in_background_with_document(
|
||||||
if app_config.ETL_SERVICE == "UNSTRUCTURED":
|
if app_config.ETL_SERVICE == "UNSTRUCTURED":
|
||||||
if notification:
|
if notification:
|
||||||
await NotificationService.document_processing.notify_processing_progress(
|
await NotificationService.document_processing.notify_processing_progress(
|
||||||
session, notification, stage="parsing", stage_message="Extracting content"
|
session,
|
||||||
|
notification,
|
||||||
|
stage="parsing",
|
||||||
|
stage_message="Extracting content",
|
||||||
)
|
)
|
||||||
|
|
||||||
from langchain_unstructured import UnstructuredLoader
|
from langchain_unstructured import UnstructuredLoader
|
||||||
|
|
||||||
loader = UnstructuredLoader(
|
loader = UnstructuredLoader(
|
||||||
file_path, mode="elements", post_processors=[], languages=["eng"],
|
file_path,
|
||||||
include_orig_elements=False, include_metadata=False, strategy="auto"
|
mode="elements",
|
||||||
|
post_processors=[],
|
||||||
|
languages=["eng"],
|
||||||
|
include_orig_elements=False,
|
||||||
|
include_metadata=False,
|
||||||
|
strategy="auto",
|
||||||
)
|
)
|
||||||
docs = await loader.aload()
|
docs = await loader.aload()
|
||||||
markdown_content = await convert_document_to_markdown(docs)
|
markdown_content = await convert_document_to_markdown(docs)
|
||||||
|
|
@ -1775,37 +1807,55 @@ async def process_file_in_background_with_document(
|
||||||
etl_service = "UNSTRUCTURED"
|
etl_service = "UNSTRUCTURED"
|
||||||
|
|
||||||
# Update page usage
|
# Update page usage
|
||||||
await page_limit_service.update_page_usage(user_id, final_page_count, allow_exceed=True)
|
await page_limit_service.update_page_usage(
|
||||||
|
user_id, final_page_count, allow_exceed=True
|
||||||
|
)
|
||||||
|
|
||||||
elif app_config.ETL_SERVICE == "LLAMACLOUD":
|
elif app_config.ETL_SERVICE == "LLAMACLOUD":
|
||||||
if notification:
|
if notification:
|
||||||
await NotificationService.document_processing.notify_processing_progress(
|
await NotificationService.document_processing.notify_processing_progress(
|
||||||
session, notification, stage="parsing", stage_message="Extracting content"
|
session,
|
||||||
|
notification,
|
||||||
|
stage="parsing",
|
||||||
|
stage_message="Extracting content",
|
||||||
)
|
)
|
||||||
|
|
||||||
result = await parse_with_llamacloud_retry(
|
result = await parse_with_llamacloud_retry(
|
||||||
file_path=file_path, estimated_pages=estimated_pages,
|
file_path=file_path,
|
||||||
task_logger=task_logger, log_entry=log_entry
|
estimated_pages=estimated_pages,
|
||||||
|
task_logger=task_logger,
|
||||||
|
log_entry=log_entry,
|
||||||
|
)
|
||||||
|
markdown_documents = await result.aget_markdown_documents(
|
||||||
|
split_by_page=False
|
||||||
)
|
)
|
||||||
markdown_documents = await result.aget_markdown_documents(split_by_page=False)
|
|
||||||
if not markdown_documents:
|
if not markdown_documents:
|
||||||
raise RuntimeError(f"LlamaCloud parsing returned no documents: {filename}")
|
raise RuntimeError(
|
||||||
|
f"LlamaCloud parsing returned no documents: {filename}"
|
||||||
|
)
|
||||||
markdown_content = markdown_documents[0].text
|
markdown_content = markdown_documents[0].text
|
||||||
etl_service = "LLAMACLOUD"
|
etl_service = "LLAMACLOUD"
|
||||||
|
|
||||||
# Update page usage
|
# Update page usage
|
||||||
await page_limit_service.update_page_usage(user_id, estimated_pages, allow_exceed=True)
|
await page_limit_service.update_page_usage(
|
||||||
|
user_id, estimated_pages, allow_exceed=True
|
||||||
|
)
|
||||||
|
|
||||||
elif app_config.ETL_SERVICE == "DOCLING":
|
elif app_config.ETL_SERVICE == "DOCLING":
|
||||||
if notification:
|
if notification:
|
||||||
await NotificationService.document_processing.notify_processing_progress(
|
await NotificationService.document_processing.notify_processing_progress(
|
||||||
session, notification, stage="parsing", stage_message="Extracting content"
|
session,
|
||||||
|
notification,
|
||||||
|
stage="parsing",
|
||||||
|
stage_message="Extracting content",
|
||||||
)
|
)
|
||||||
|
|
||||||
# Suppress logging during Docling import
|
# Suppress logging during Docling import
|
||||||
getLogger("docling.pipeline.base_pipeline").setLevel(ERROR)
|
getLogger("docling.pipeline.base_pipeline").setLevel(ERROR)
|
||||||
getLogger("docling.document_converter").setLevel(ERROR)
|
getLogger("docling.document_converter").setLevel(ERROR)
|
||||||
getLogger("docling_core.transforms.chunker.hierarchical_chunker").setLevel(ERROR)
|
getLogger(
|
||||||
|
"docling_core.transforms.chunker.hierarchical_chunker"
|
||||||
|
).setLevel(ERROR)
|
||||||
|
|
||||||
from docling.document_converter import DocumentConverter
|
from docling.document_converter import DocumentConverter
|
||||||
|
|
||||||
|
|
@ -1815,7 +1865,9 @@ async def process_file_in_background_with_document(
|
||||||
etl_service = "DOCLING"
|
etl_service = "DOCLING"
|
||||||
|
|
||||||
# Update page usage
|
# Update page usage
|
||||||
await page_limit_service.update_page_usage(user_id, estimated_pages, allow_exceed=True)
|
await page_limit_service.update_page_usage(
|
||||||
|
user_id, estimated_pages, allow_exceed=True
|
||||||
|
)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise RuntimeError(f"Unknown ETL_SERVICE: {app_config.ETL_SERVICE}")
|
raise RuntimeError(f"Unknown ETL_SERVICE: {app_config.ETL_SERVICE}")
|
||||||
|
|
@ -1829,7 +1881,7 @@ async def process_file_in_background_with_document(
|
||||||
|
|
||||||
# ===== STEP 2: Check for duplicate content =====
|
# ===== STEP 2: Check for duplicate content =====
|
||||||
content_hash = generate_content_hash(markdown_content, search_space_id)
|
content_hash = generate_content_hash(markdown_content, search_space_id)
|
||||||
|
|
||||||
existing_by_content = await check_duplicate_document(session, content_hash)
|
existing_by_content = await check_duplicate_document(session, content_hash)
|
||||||
if existing_by_content and existing_by_content.id != document.id:
|
if existing_by_content and existing_by_content.id != document.id:
|
||||||
# Duplicate content found - mark this document as failed
|
# Duplicate content found - mark this document as failed
|
||||||
|
|
@ -1846,7 +1898,7 @@ async def process_file_in_background_with_document(
|
||||||
)
|
)
|
||||||
|
|
||||||
user_llm = await get_user_long_context_llm(session, user_id, search_space_id)
|
user_llm = await get_user_long_context_llm(session, user_id, search_space_id)
|
||||||
|
|
||||||
if user_llm:
|
if user_llm:
|
||||||
document_metadata = {
|
document_metadata = {
|
||||||
"file_name": filename,
|
"file_name": filename,
|
||||||
|
|
@ -1881,10 +1933,10 @@ async def process_file_in_background_with_document(
|
||||||
**(document.document_metadata or {}),
|
**(document.document_metadata or {}),
|
||||||
}
|
}
|
||||||
flag_modified(document, "document_metadata")
|
flag_modified(document, "document_metadata")
|
||||||
|
|
||||||
# Use safe_set_chunks to avoid async issues
|
# Use safe_set_chunks to avoid async issues
|
||||||
safe_set_chunks(document, chunks)
|
safe_set_chunks(document, chunks)
|
||||||
|
|
||||||
document.blocknote_document = blocknote_json
|
document.blocknote_document = blocknote_json
|
||||||
document.content_needs_reindexing = False
|
document.content_needs_reindexing = False
|
||||||
document.updated_at = get_current_timestamp()
|
document.updated_at = get_current_timestamp()
|
||||||
|
|
@ -1922,7 +1974,11 @@ async def process_file_in_background_with_document(
|
||||||
log_entry,
|
log_entry,
|
||||||
error_message,
|
error_message,
|
||||||
str(e),
|
str(e),
|
||||||
{"error_type": type(e).__name__, "filename": filename, "document_id": document.id},
|
{
|
||||||
|
"error_type": type(e).__name__,
|
||||||
|
"filename": filename,
|
||||||
|
"document_id": document.id,
|
||||||
|
},
|
||||||
)
|
)
|
||||||
logging.error(f"Error processing file with document: {error_message}")
|
logging.error(f"Error processing file with document: {error_message}")
|
||||||
raise
|
raise
|
||||||
|
|
|
||||||
|
|
@ -136,11 +136,19 @@ async def add_youtube_video_document(
|
||||||
document = existing_document
|
document = existing_document
|
||||||
is_new_document = False
|
is_new_document = False
|
||||||
# Check if already being processed
|
# Check if already being processed
|
||||||
if DocumentStatus.is_state(existing_document.status, DocumentStatus.PENDING):
|
if DocumentStatus.is_state(
|
||||||
logging.info(f"YouTube video {video_id} already pending. Returning existing.")
|
existing_document.status, DocumentStatus.PENDING
|
||||||
|
):
|
||||||
|
logging.info(
|
||||||
|
f"YouTube video {video_id} already pending. Returning existing."
|
||||||
|
)
|
||||||
return existing_document
|
return existing_document
|
||||||
if DocumentStatus.is_state(existing_document.status, DocumentStatus.PROCESSING):
|
if DocumentStatus.is_state(
|
||||||
logging.info(f"YouTube video {video_id} already processing. Returning existing.")
|
existing_document.status, DocumentStatus.PROCESSING
|
||||||
|
):
|
||||||
|
logging.info(
|
||||||
|
f"YouTube video {video_id} already processing. Returning existing."
|
||||||
|
)
|
||||||
return existing_document
|
return existing_document
|
||||||
else:
|
else:
|
||||||
# Create new document with PENDING status (visible in UI immediately)
|
# Create new document with PENDING status (visible in UI immediately)
|
||||||
|
|
@ -300,7 +308,9 @@ async def add_youtube_video_document(
|
||||||
"video_id": video_id,
|
"video_id": video_id,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
logging.info(f"Document for YouTube video {video_id} unchanged. Marking as ready.")
|
logging.info(
|
||||||
|
f"Document for YouTube video {video_id} unchanged. Marking as ready."
|
||||||
|
)
|
||||||
document.status = DocumentStatus.ready()
|
document.status = DocumentStatus.ready()
|
||||||
await session.commit()
|
await session.commit()
|
||||||
return document
|
return document
|
||||||
|
|
@ -408,7 +418,9 @@ async def add_youtube_video_document(
|
||||||
# Mark document as failed if it exists
|
# Mark document as failed if it exists
|
||||||
if document:
|
if document:
|
||||||
try:
|
try:
|
||||||
document.status = DocumentStatus.failed(f"Database error: {str(db_error)[:150]}")
|
document.status = DocumentStatus.failed(
|
||||||
|
f"Database error: {str(db_error)[:150]}"
|
||||||
|
)
|
||||||
document.updated_at = get_current_timestamp()
|
document.updated_at = get_current_timestamp()
|
||||||
await session.commit()
|
await session.commit()
|
||||||
except Exception:
|
except Exception:
|
||||||
|
|
|
||||||
|
|
@ -38,7 +38,9 @@ export function DocumentTypeChip({ type, className }: { type: string; className?
|
||||||
className={`inline-flex items-center gap-1.5 rounded bg-muted/40 px-2 py-1 text-xs text-muted-foreground max-w-full overflow-hidden ${className ?? ""}`}
|
className={`inline-flex items-center gap-1.5 rounded bg-muted/40 px-2 py-1 text-xs text-muted-foreground max-w-full overflow-hidden ${className ?? ""}`}
|
||||||
>
|
>
|
||||||
<span className="opacity-80 flex-shrink-0">{icon}</span>
|
<span className="opacity-80 flex-shrink-0">{icon}</span>
|
||||||
<span ref={textRef} className="truncate min-w-0">{fullLabel}</span>
|
<span ref={textRef} className="truncate min-w-0">
|
||||||
|
{fullLabel}
|
||||||
|
</span>
|
||||||
</span>
|
</span>
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -68,9 +68,7 @@ export function DocumentsFilters({
|
||||||
const filteredTypes = useMemo(() => {
|
const filteredTypes = useMemo(() => {
|
||||||
if (!typeSearchQuery.trim()) return uniqueTypes;
|
if (!typeSearchQuery.trim()) return uniqueTypes;
|
||||||
const query = typeSearchQuery.toLowerCase();
|
const query = typeSearchQuery.toLowerCase();
|
||||||
return uniqueTypes.filter((type) =>
|
return uniqueTypes.filter((type) => getDocumentTypeLabel(type).toLowerCase().includes(query));
|
||||||
getDocumentTypeLabel(type).toLowerCase().includes(query)
|
|
||||||
);
|
|
||||||
}, [uniqueTypes, typeSearchQuery]);
|
}, [uniqueTypes, typeSearchQuery]);
|
||||||
|
|
||||||
const typeCounts = useMemo(() => {
|
const typeCounts = useMemo(() => {
|
||||||
|
|
@ -156,94 +154,95 @@ export function DocumentsFilters({
|
||||||
|
|
||||||
{/* Filter Buttons Group */}
|
{/* Filter Buttons Group */}
|
||||||
<div className="flex items-center gap-2 flex-wrap">
|
<div className="flex items-center gap-2 flex-wrap">
|
||||||
{/* Type Filter */}
|
{/* Type Filter */}
|
||||||
<Popover>
|
<Popover>
|
||||||
<PopoverTrigger asChild>
|
<PopoverTrigger asChild>
|
||||||
<Button
|
<Button
|
||||||
variant="outline"
|
variant="outline"
|
||||||
size="sm"
|
size="sm"
|
||||||
className="h-9 gap-2 border-dashed border-border/60 text-muted-foreground hover:text-foreground hover:border-border"
|
className="h-9 gap-2 border-dashed border-border/60 text-muted-foreground hover:text-foreground hover:border-border"
|
||||||
>
|
>
|
||||||
<FileType size={14} className="text-muted-foreground" />
|
<FileType size={14} className="text-muted-foreground" />
|
||||||
<span className="hidden sm:inline">Type</span>
|
<span className="hidden sm:inline">Type</span>
|
||||||
{activeTypes.length > 0 && (
|
{activeTypes.length > 0 && (
|
||||||
<span className="flex h-5 w-5 items-center justify-center rounded-full bg-primary text-[10px] font-medium text-primary-foreground">
|
<span className="flex h-5 w-5 items-center justify-center rounded-full bg-primary text-[10px] font-medium text-primary-foreground">
|
||||||
{activeTypes.length}
|
{activeTypes.length}
|
||||||
</span>
|
</span>
|
||||||
)}
|
)}
|
||||||
</Button>
|
</Button>
|
||||||
</PopoverTrigger>
|
</PopoverTrigger>
|
||||||
<PopoverContent className="w-64 !p-0 overflow-hidden" align="end">
|
<PopoverContent className="w-64 !p-0 overflow-hidden" align="end">
|
||||||
<div>
|
<div>
|
||||||
{/* Search input */}
|
{/* Search input */}
|
||||||
<div className="p-2 border-b border-border/50">
|
<div className="p-2 border-b border-border/50">
|
||||||
<div className="relative">
|
<div className="relative">
|
||||||
<Search className="absolute left-0.5 top-1/2 -translate-y-1/2 h-4 w-4 text-muted-foreground" />
|
<Search className="absolute left-0.5 top-1/2 -translate-y-1/2 h-4 w-4 text-muted-foreground" />
|
||||||
<Input
|
<Input
|
||||||
placeholder="Search types..."
|
placeholder="Search types..."
|
||||||
value={typeSearchQuery}
|
value={typeSearchQuery}
|
||||||
onChange={(e) => setTypeSearchQuery(e.target.value)}
|
onChange={(e) => setTypeSearchQuery(e.target.value)}
|
||||||
className="h-6 pl-6 text-sm bg-transparent border-0 focus-visible:ring-0"
|
className="h-6 pl-6 text-sm bg-transparent border-0 focus-visible:ring-0"
|
||||||
/>
|
/>
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div className="max-h-[300px] overflow-y-auto overflow-x-hidden py-1.5 px-1.5">
|
|
||||||
{filteredTypes.length === 0 ? (
|
|
||||||
<div className="py-6 text-center text-sm text-muted-foreground">
|
|
||||||
No types found
|
|
||||||
</div>
|
</div>
|
||||||
) : (
|
</div>
|
||||||
filteredTypes.map((value: DocumentTypeEnum, i) => (
|
|
||||||
<button
|
<div className="max-h-[300px] overflow-y-auto overflow-x-hidden py-1.5 px-1.5">
|
||||||
key={value}
|
{filteredTypes.length === 0 ? (
|
||||||
type="button"
|
<div className="py-6 text-center text-sm text-muted-foreground">
|
||||||
className="flex w-full items-center gap-2.5 py-2 px-3 rounded-md hover:bg-muted/50 transition-colors cursor-pointer text-left"
|
No types found
|
||||||
onClick={() => onToggleType(value, !activeTypes.includes(value))}
|
</div>
|
||||||
|
) : (
|
||||||
|
filteredTypes.map((value: DocumentTypeEnum, i) => (
|
||||||
|
<button
|
||||||
|
key={value}
|
||||||
|
type="button"
|
||||||
|
className="flex w-full items-center gap-2.5 py-2 px-3 rounded-md hover:bg-muted/50 transition-colors cursor-pointer text-left"
|
||||||
|
onClick={() => onToggleType(value, !activeTypes.includes(value))}
|
||||||
|
>
|
||||||
|
{/* Icon */}
|
||||||
|
<div className="flex h-7 w-7 shrink-0 items-center justify-center rounded-md bg-muted/50 text-foreground/80">
|
||||||
|
{getDocumentTypeIcon(value, "h-4 w-4")}
|
||||||
|
</div>
|
||||||
|
{/* Text content */}
|
||||||
|
<div className="flex flex-col min-w-0 flex-1 gap-0.5">
|
||||||
|
<span className="text-[13px] font-medium text-foreground truncate leading-tight">
|
||||||
|
{getDocumentTypeLabel(value)}
|
||||||
|
</span>
|
||||||
|
<span className="text-[11px] text-muted-foreground leading-tight">
|
||||||
|
{typeCounts.get(value)} document
|
||||||
|
{(typeCounts.get(value) ?? 0) !== 1 ? "s" : ""}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
{/* Checkbox */}
|
||||||
|
<Checkbox
|
||||||
|
id={`${id}-${i}`}
|
||||||
|
checked={activeTypes.includes(value)}
|
||||||
|
onCheckedChange={(checked: boolean) => onToggleType(value, !!checked)}
|
||||||
|
className="h-4 w-4 shrink-0 rounded border-muted-foreground/30 data-[state=checked]:bg-primary data-[state=checked]:border-primary"
|
||||||
|
/>
|
||||||
|
</button>
|
||||||
|
))
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
{activeTypes.length > 0 && (
|
||||||
|
<div className="px-3 pt-1.5 pb-1.5 border-t border-border/50">
|
||||||
|
<Button
|
||||||
|
variant="ghost"
|
||||||
|
size="sm"
|
||||||
|
className="w-full h-7 text-[11px] text-muted-foreground hover:text-foreground"
|
||||||
|
onClick={() => {
|
||||||
|
activeTypes.forEach((t) => {
|
||||||
|
onToggleType(t, false);
|
||||||
|
});
|
||||||
|
}}
|
||||||
>
|
>
|
||||||
{/* Icon */}
|
Clear filters
|
||||||
<div className="flex h-7 w-7 shrink-0 items-center justify-center rounded-md bg-muted/50 text-foreground/80">
|
</Button>
|
||||||
{getDocumentTypeIcon(value, "h-4 w-4")}
|
</div>
|
||||||
</div>
|
|
||||||
{/* Text content */}
|
|
||||||
<div className="flex flex-col min-w-0 flex-1 gap-0.5">
|
|
||||||
<span className="text-[13px] font-medium text-foreground truncate leading-tight">
|
|
||||||
{getDocumentTypeLabel(value)}
|
|
||||||
</span>
|
|
||||||
<span className="text-[11px] text-muted-foreground leading-tight">
|
|
||||||
{typeCounts.get(value)} document{(typeCounts.get(value) ?? 0) !== 1 ? "s" : ""}
|
|
||||||
</span>
|
|
||||||
</div>
|
|
||||||
{/* Checkbox */}
|
|
||||||
<Checkbox
|
|
||||||
id={`${id}-${i}`}
|
|
||||||
checked={activeTypes.includes(value)}
|
|
||||||
onCheckedChange={(checked: boolean) => onToggleType(value, !!checked)}
|
|
||||||
className="h-4 w-4 shrink-0 rounded border-muted-foreground/30 data-[state=checked]:bg-primary data-[state=checked]:border-primary"
|
|
||||||
/>
|
|
||||||
</button>
|
|
||||||
))
|
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
{activeTypes.length > 0 && (
|
</PopoverContent>
|
||||||
<div className="px-3 pt-1.5 pb-1.5 border-t border-border/50">
|
</Popover>
|
||||||
<Button
|
|
||||||
variant="ghost"
|
|
||||||
size="sm"
|
|
||||||
className="w-full h-7 text-[11px] text-muted-foreground hover:text-foreground"
|
|
||||||
onClick={() => {
|
|
||||||
activeTypes.forEach((t) => {
|
|
||||||
onToggleType(t, false);
|
|
||||||
});
|
|
||||||
}}
|
|
||||||
>
|
|
||||||
Clear filters
|
|
||||||
</Button>
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
</PopoverContent>
|
|
||||||
</Popover>
|
|
||||||
|
|
||||||
{/* Bulk Delete Button */}
|
{/* Bulk Delete Button */}
|
||||||
{selectedIds.size > 0 && (
|
{selectedIds.size > 0 && (
|
||||||
|
|
@ -255,22 +254,14 @@ export function DocumentsFilters({
|
||||||
exit={{ opacity: 0, scale: 0.9 }}
|
exit={{ opacity: 0, scale: 0.9 }}
|
||||||
>
|
>
|
||||||
{/* Mobile: icon with count */}
|
{/* Mobile: icon with count */}
|
||||||
<Button
|
<Button variant="destructive" size="sm" className="h-9 gap-1.5 px-2.5 md:hidden">
|
||||||
variant="destructive"
|
|
||||||
size="sm"
|
|
||||||
className="h-9 gap-1.5 px-2.5 md:hidden"
|
|
||||||
>
|
|
||||||
<Trash size={14} />
|
<Trash size={14} />
|
||||||
<span className="flex h-5 w-5 items-center justify-center rounded-full bg-destructive-foreground/20 text-[10px] font-medium">
|
<span className="flex h-5 w-5 items-center justify-center rounded-full bg-destructive-foreground/20 text-[10px] font-medium">
|
||||||
{selectedIds.size}
|
{selectedIds.size}
|
||||||
</span>
|
</span>
|
||||||
</Button>
|
</Button>
|
||||||
{/* Desktop: full button */}
|
{/* Desktop: full button */}
|
||||||
<Button
|
<Button variant="destructive" size="sm" className="h-9 gap-2 hidden md:flex">
|
||||||
variant="destructive"
|
|
||||||
size="sm"
|
|
||||||
className="h-9 gap-2 hidden md:flex"
|
|
||||||
>
|
|
||||||
<Trash size={14} />
|
<Trash size={14} />
|
||||||
Delete
|
Delete
|
||||||
<span className="flex h-5 w-5 items-center justify-center rounded-full bg-destructive-foreground/20 text-[10px] font-medium">
|
<span className="flex h-5 w-5 items-center justify-center rounded-full bg-destructive-foreground/20 text-[10px] font-medium">
|
||||||
|
|
@ -288,9 +279,12 @@ export function DocumentsFilters({
|
||||||
<CircleAlert size={18} strokeWidth={2} />
|
<CircleAlert size={18} strokeWidth={2} />
|
||||||
</div>
|
</div>
|
||||||
<AlertDialogHeader className="flex-1">
|
<AlertDialogHeader className="flex-1">
|
||||||
<AlertDialogTitle>Delete {selectedIds.size} document{selectedIds.size !== 1 ? "s" : ""}?</AlertDialogTitle>
|
<AlertDialogTitle>
|
||||||
|
Delete {selectedIds.size} document{selectedIds.size !== 1 ? "s" : ""}?
|
||||||
|
</AlertDialogTitle>
|
||||||
<AlertDialogDescription>
|
<AlertDialogDescription>
|
||||||
This action cannot be undone. This will permanently delete the selected {selectedIds.size === 1 ? "document" : "documents"} from your search space.
|
This action cannot be undone. This will permanently delete the selected{" "}
|
||||||
|
{selectedIds.size === 1 ? "document" : "documents"} from your search space.
|
||||||
</AlertDialogDescription>
|
</AlertDialogDescription>
|
||||||
</AlertDialogHeader>
|
</AlertDialogHeader>
|
||||||
</div>
|
</div>
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,20 @@
|
||||||
"use client";
|
"use client";
|
||||||
|
|
||||||
import { formatDistanceToNow } from "date-fns";
|
import { formatDistanceToNow } from "date-fns";
|
||||||
import { AlertCircle, Calendar, CheckCircle2, ChevronDown, ChevronUp, Clock, FileText, FileX, Loader2, Network, Plus, User } from "lucide-react";
|
import {
|
||||||
|
AlertCircle,
|
||||||
|
Calendar,
|
||||||
|
CheckCircle2,
|
||||||
|
ChevronDown,
|
||||||
|
ChevronUp,
|
||||||
|
Clock,
|
||||||
|
FileText,
|
||||||
|
FileX,
|
||||||
|
Loader2,
|
||||||
|
Network,
|
||||||
|
Plus,
|
||||||
|
User,
|
||||||
|
} from "lucide-react";
|
||||||
import { motion } from "motion/react";
|
import { motion } from "motion/react";
|
||||||
import { useTranslations } from "next-intl";
|
import { useTranslations } from "next-intl";
|
||||||
import React, { useRef, useState, useEffect, useCallback } from "react";
|
import React, { useRef, useState, useEffect, useCallback } from "react";
|
||||||
|
|
@ -10,12 +23,7 @@ import { JsonMetadataViewer } from "@/components/json-metadata-viewer";
|
||||||
import { MarkdownViewer } from "@/components/markdown-viewer";
|
import { MarkdownViewer } from "@/components/markdown-viewer";
|
||||||
import { Button } from "@/components/ui/button";
|
import { Button } from "@/components/ui/button";
|
||||||
import { Checkbox } from "@/components/ui/checkbox";
|
import { Checkbox } from "@/components/ui/checkbox";
|
||||||
import {
|
import { Dialog, DialogContent, DialogHeader, DialogTitle } from "@/components/ui/dialog";
|
||||||
Dialog,
|
|
||||||
DialogContent,
|
|
||||||
DialogHeader,
|
|
||||||
DialogTitle,
|
|
||||||
} from "@/components/ui/dialog";
|
|
||||||
import { Skeleton } from "@/components/ui/skeleton";
|
import { Skeleton } from "@/components/ui/skeleton";
|
||||||
import { Spinner } from "@/components/ui/spinner";
|
import { Spinner } from "@/components/ui/spinner";
|
||||||
import {
|
import {
|
||||||
|
|
@ -35,7 +43,7 @@ import type { ColumnVisibility, Document, DocumentStatus } from "./types";
|
||||||
// Status indicator component for document processing status
|
// Status indicator component for document processing status
|
||||||
function StatusIndicator({ status }: { status?: DocumentStatus }) {
|
function StatusIndicator({ status }: { status?: DocumentStatus }) {
|
||||||
const state = status?.state ?? "ready";
|
const state = status?.state ?? "ready";
|
||||||
|
|
||||||
switch (state) {
|
switch (state) {
|
||||||
case "pending":
|
case "pending":
|
||||||
return (
|
return (
|
||||||
|
|
@ -176,12 +184,10 @@ function SortableHeader({
|
||||||
>
|
>
|
||||||
{icon && <span className="opacity-60">{icon}</span>}
|
{icon && <span className="opacity-60">{icon}</span>}
|
||||||
{children}
|
{children}
|
||||||
<span className={`transition-opacity ${isActive ? "opacity-100" : "opacity-0 group-hover:opacity-50"}`}>
|
<span
|
||||||
{isActive && sortDesc ? (
|
className={`transition-opacity ${isActive ? "opacity-100" : "opacity-0 group-hover:opacity-50"}`}
|
||||||
<ChevronDown size={14} />
|
>
|
||||||
) : (
|
{isActive && sortDesc ? <ChevronDown size={14} /> : <ChevronUp size={14} />}
|
||||||
<ChevronUp size={14} />
|
|
||||||
)}
|
|
||||||
</span>
|
</span>
|
||||||
</button>
|
</button>
|
||||||
);
|
);
|
||||||
|
|
@ -300,8 +306,10 @@ export function DocumentsTableShell({
|
||||||
|
|
||||||
// Only consider selectable documents for "select all" logic
|
// Only consider selectable documents for "select all" logic
|
||||||
const selectableDocs = sorted.filter(isSelectable);
|
const selectableDocs = sorted.filter(isSelectable);
|
||||||
const allSelectedOnPage = selectableDocs.length > 0 && selectableDocs.every((d) => selectedIds.has(d.id));
|
const allSelectedOnPage =
|
||||||
const someSelectedOnPage = selectableDocs.some((d) => selectedIds.has(d.id)) && !allSelectedOnPage;
|
selectableDocs.length > 0 && selectableDocs.every((d) => selectedIds.has(d.id));
|
||||||
|
const someSelectedOnPage =
|
||||||
|
selectableDocs.some((d) => selectedIds.has(d.id)) && !allSelectedOnPage;
|
||||||
|
|
||||||
const toggleAll = (checked: boolean) => {
|
const toggleAll = (checked: boolean) => {
|
||||||
const next = new Set(selectedIds);
|
const next = new Set(selectedIds);
|
||||||
|
|
@ -388,10 +396,7 @@ export function DocumentsTableShell({
|
||||||
</div>
|
</div>
|
||||||
</TableCell>
|
</TableCell>
|
||||||
<TableCell className="w-[35%] py-2.5 max-w-0 border-r border-border/40">
|
<TableCell className="w-[35%] py-2.5 max-w-0 border-r border-border/40">
|
||||||
<Skeleton
|
<Skeleton className="h-4" style={{ width: `${widthPercent}%` }} />
|
||||||
className="h-4"
|
|
||||||
style={{ width: `${widthPercent}%` }}
|
|
||||||
/>
|
|
||||||
</TableCell>
|
</TableCell>
|
||||||
{columnVisibility.document_type && (
|
{columnVisibility.document_type && (
|
||||||
<TableCell className="w-[20%] min-w-[120px] max-w-[200px] py-2.5 border-r border-border/40 overflow-hidden">
|
<TableCell className="w-[20%] min-w-[120px] max-w-[200px] py-2.5 border-r border-border/40 overflow-hidden">
|
||||||
|
|
@ -429,24 +434,15 @@ export function DocumentsTableShell({
|
||||||
<div className="flex items-start gap-3">
|
<div className="flex items-start gap-3">
|
||||||
<Skeleton className="h-4 w-4 mt-0.5 rounded" />
|
<Skeleton className="h-4 w-4 mt-0.5 rounded" />
|
||||||
<div className="flex-1 min-w-0 space-y-2">
|
<div className="flex-1 min-w-0 space-y-2">
|
||||||
<Skeleton
|
<Skeleton className="h-4" style={{ width: `${widthPercent}%` }} />
|
||||||
className="h-4"
|
|
||||||
style={{ width: `${widthPercent}%` }}
|
|
||||||
/>
|
|
||||||
<div className="flex flex-wrap items-center gap-2">
|
<div className="flex flex-wrap items-center gap-2">
|
||||||
<Skeleton className="h-5 w-20 rounded" />
|
<Skeleton className="h-5 w-20 rounded" />
|
||||||
{columnVisibility.created_by && (
|
{columnVisibility.created_by && <Skeleton className="h-3 w-14" />}
|
||||||
<Skeleton className="h-3 w-14" />
|
{columnVisibility.created_at && <Skeleton className="h-3 w-20" />}
|
||||||
)}
|
|
||||||
{columnVisibility.created_at && (
|
|
||||||
<Skeleton className="h-3 w-20" />
|
|
||||||
)}
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div className="flex items-center gap-2">
|
<div className="flex items-center gap-2">
|
||||||
{columnVisibility.status && (
|
{columnVisibility.status && <Skeleton className="h-5 w-5 rounded-full" />}
|
||||||
<Skeleton className="h-5 w-5 rounded-full" />
|
|
||||||
)}
|
|
||||||
<Skeleton className="h-7 w-7 rounded" />
|
<Skeleton className="h-7 w-7 rounded" />
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
@ -549,9 +545,7 @@ export function DocumentsTableShell({
|
||||||
)}
|
)}
|
||||||
{columnVisibility.status && (
|
{columnVisibility.status && (
|
||||||
<TableHead className="w-20 text-center">
|
<TableHead className="w-20 text-center">
|
||||||
<span className="text-sm font-medium text-muted-foreground/70">
|
<span className="text-sm font-medium text-muted-foreground/70">Status</span>
|
||||||
Status
|
|
||||||
</span>
|
|
||||||
</TableHead>
|
</TableHead>
|
||||||
)}
|
)}
|
||||||
<TableHead className="w-10">
|
<TableHead className="w-10">
|
||||||
|
|
@ -580,9 +574,7 @@ export function DocumentsTableShell({
|
||||||
},
|
},
|
||||||
}}
|
}}
|
||||||
className={`border-b border-border/40 transition-colors ${
|
className={`border-b border-border/40 transition-colors ${
|
||||||
isSelected
|
isSelected ? "bg-primary/5 hover:bg-primary/8" : "hover:bg-muted/30"
|
||||||
? "bg-primary/5 hover:bg-primary/8"
|
|
||||||
: "hover:bg-muted/30"
|
|
||||||
}`}
|
}`}
|
||||||
>
|
>
|
||||||
<TableCell className="w-8 px-0 py-2.5 text-center">
|
<TableCell className="w-8 px-0 py-2.5 text-center">
|
||||||
|
|
@ -591,7 +583,9 @@ export function DocumentsTableShell({
|
||||||
checked={isSelected}
|
checked={isSelected}
|
||||||
onCheckedChange={(v) => canSelect && toggleOne(doc.id, !!v)}
|
onCheckedChange={(v) => canSelect && toggleOne(doc.id, !!v)}
|
||||||
disabled={!canSelect}
|
disabled={!canSelect}
|
||||||
aria-label={canSelect ? "Select row" : "Cannot select while processing"}
|
aria-label={
|
||||||
|
canSelect ? "Select row" : "Cannot select while processing"
|
||||||
|
}
|
||||||
className={`border-foreground data-[state=checked]:bg-primary data-[state=checked]:border-primary ${!canSelect ? "opacity-40 cursor-not-allowed" : ""}`}
|
className={`border-foreground data-[state=checked]:bg-primary data-[state=checked]:border-primary ${!canSelect ? "opacity-40 cursor-not-allowed" : ""}`}
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
|
|
@ -639,7 +633,9 @@ export function DocumentsTableShell({
|
||||||
<TableCell className="w-32 py-2.5 text-sm text-foreground border-r border-border/40">
|
<TableCell className="w-32 py-2.5 text-sm text-foreground border-r border-border/40">
|
||||||
<Tooltip>
|
<Tooltip>
|
||||||
<TooltipTrigger asChild>
|
<TooltipTrigger asChild>
|
||||||
<span className="cursor-default">{formatRelativeDate(doc.created_at)}</span>
|
<span className="cursor-default">
|
||||||
|
{formatRelativeDate(doc.created_at)}
|
||||||
|
</span>
|
||||||
</TooltipTrigger>
|
</TooltipTrigger>
|
||||||
<TooltipContent side="top">
|
<TooltipContent side="top">
|
||||||
{formatAbsoluteDate(doc.created_at)}
|
{formatAbsoluteDate(doc.created_at)}
|
||||||
|
|
@ -720,9 +716,7 @@ export function DocumentsTableShell({
|
||||||
<div className="flex flex-wrap items-center gap-2">
|
<div className="flex flex-wrap items-center gap-2">
|
||||||
<DocumentTypeChip type={doc.document_type} />
|
<DocumentTypeChip type={doc.document_type} />
|
||||||
{columnVisibility.created_by && doc.created_by_name && (
|
{columnVisibility.created_by && doc.created_by_name && (
|
||||||
<span className="text-xs text-foreground">
|
<span className="text-xs text-foreground">{doc.created_by_name}</span>
|
||||||
{doc.created_by_name}
|
|
||||||
</span>
|
|
||||||
)}
|
)}
|
||||||
{columnVisibility.created_at && (
|
{columnVisibility.created_at && (
|
||||||
<Tooltip>
|
<Tooltip>
|
||||||
|
|
|
||||||
|
|
@ -46,7 +46,8 @@ export function RowActions({
|
||||||
);
|
);
|
||||||
|
|
||||||
// Documents in "pending" or "processing" state should show disabled delete
|
// Documents in "pending" or "processing" state should show disabled delete
|
||||||
const isBeingProcessed = document.status?.state === "pending" || document.status?.state === "processing";
|
const isBeingProcessed =
|
||||||
|
document.status?.state === "pending" || document.status?.state === "processing";
|
||||||
|
|
||||||
// SURFSENSE_DOCS are system-managed and should not show delete at all
|
// SURFSENSE_DOCS are system-managed and should not show delete at all
|
||||||
const shouldShowDelete = !NON_DELETABLE_DOCUMENT_TYPES.includes(
|
const shouldShowDelete = !NON_DELETABLE_DOCUMENT_TYPES.includes(
|
||||||
|
|
@ -67,8 +68,9 @@ export function RowActions({
|
||||||
} catch (error: unknown) {
|
} catch (error: unknown) {
|
||||||
console.error("Error deleting document:", error);
|
console.error("Error deleting document:", error);
|
||||||
// Check for 409 Conflict (document started processing after UI loaded)
|
// Check for 409 Conflict (document started processing after UI loaded)
|
||||||
const status = (error as { response?: { status?: number } })?.response?.status
|
const status =
|
||||||
?? (error as { status?: number })?.status;
|
(error as { response?: { status?: number } })?.response?.status ??
|
||||||
|
(error as { status?: number })?.status;
|
||||||
if (status === 409) {
|
if (status === 409) {
|
||||||
toast.error("Document is now being processed. Please try again later.");
|
toast.error("Document is now being processed. Please try again later.");
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -92,7 +94,11 @@ export function RowActions({
|
||||||
// Editable documents: show 3-dot dropdown with edit + delete
|
// Editable documents: show 3-dot dropdown with edit + delete
|
||||||
<DropdownMenu>
|
<DropdownMenu>
|
||||||
<DropdownMenuTrigger asChild>
|
<DropdownMenuTrigger asChild>
|
||||||
<Button variant="ghost" size="icon" className="h-8 w-8 text-muted-foreground hover:text-foreground hover:bg-muted/80">
|
<Button
|
||||||
|
variant="ghost"
|
||||||
|
size="icon"
|
||||||
|
className="h-8 w-8 text-muted-foreground hover:text-foreground hover:bg-muted/80"
|
||||||
|
>
|
||||||
<MoreHorizontal className="h-4 w-4" />
|
<MoreHorizontal className="h-4 w-4" />
|
||||||
<span className="sr-only">Open menu</span>
|
<span className="sr-only">Open menu</span>
|
||||||
</Button>
|
</Button>
|
||||||
|
|
@ -101,7 +107,9 @@ export function RowActions({
|
||||||
<DropdownMenuItem
|
<DropdownMenuItem
|
||||||
onClick={() => !isEditDisabled && handleEdit()}
|
onClick={() => !isEditDisabled && handleEdit()}
|
||||||
disabled={isEditDisabled}
|
disabled={isEditDisabled}
|
||||||
className={isEditDisabled ? "text-muted-foreground cursor-not-allowed opacity-50" : ""}
|
className={
|
||||||
|
isEditDisabled ? "text-muted-foreground cursor-not-allowed opacity-50" : ""
|
||||||
|
}
|
||||||
>
|
>
|
||||||
<Pencil className="mr-2 h-4 w-4" />
|
<Pencil className="mr-2 h-4 w-4" />
|
||||||
<span>Edit</span>
|
<span>Edit</span>
|
||||||
|
|
@ -110,7 +118,11 @@ export function RowActions({
|
||||||
<DropdownMenuItem
|
<DropdownMenuItem
|
||||||
onClick={() => !isDeleteDisabled && setIsDeleteOpen(true)}
|
onClick={() => !isDeleteDisabled && setIsDeleteOpen(true)}
|
||||||
disabled={isDeleteDisabled}
|
disabled={isDeleteDisabled}
|
||||||
className={isDeleteDisabled ? "text-muted-foreground cursor-not-allowed opacity-50" : "text-destructive focus:text-destructive"}
|
className={
|
||||||
|
isDeleteDisabled
|
||||||
|
? "text-muted-foreground cursor-not-allowed opacity-50"
|
||||||
|
: "text-destructive focus:text-destructive"
|
||||||
|
}
|
||||||
>
|
>
|
||||||
<Trash2 className="mr-2 h-4 w-4" />
|
<Trash2 className="mr-2 h-4 w-4" />
|
||||||
<span>Delete</span>
|
<span>Delete</span>
|
||||||
|
|
@ -150,7 +162,9 @@ export function RowActions({
|
||||||
<DropdownMenuItem
|
<DropdownMenuItem
|
||||||
onClick={() => !isEditDisabled && handleEdit()}
|
onClick={() => !isEditDisabled && handleEdit()}
|
||||||
disabled={isEditDisabled}
|
disabled={isEditDisabled}
|
||||||
className={isEditDisabled ? "text-muted-foreground cursor-not-allowed opacity-50" : ""}
|
className={
|
||||||
|
isEditDisabled ? "text-muted-foreground cursor-not-allowed opacity-50" : ""
|
||||||
|
}
|
||||||
>
|
>
|
||||||
<Pencil className="mr-2 h-4 w-4" />
|
<Pencil className="mr-2 h-4 w-4" />
|
||||||
<span>Edit</span>
|
<span>Edit</span>
|
||||||
|
|
@ -159,7 +173,11 @@ export function RowActions({
|
||||||
<DropdownMenuItem
|
<DropdownMenuItem
|
||||||
onClick={() => !isDeleteDisabled && setIsDeleteOpen(true)}
|
onClick={() => !isDeleteDisabled && setIsDeleteOpen(true)}
|
||||||
disabled={isDeleteDisabled}
|
disabled={isDeleteDisabled}
|
||||||
className={isDeleteDisabled ? "text-muted-foreground cursor-not-allowed opacity-50" : "text-destructive focus:text-destructive"}
|
className={
|
||||||
|
isDeleteDisabled
|
||||||
|
? "text-muted-foreground cursor-not-allowed opacity-50"
|
||||||
|
: "text-destructive focus:text-destructive"
|
||||||
|
}
|
||||||
>
|
>
|
||||||
<Trash2 className="mr-2 h-4 w-4" />
|
<Trash2 className="mr-2 h-4 w-4" />
|
||||||
<span>Delete</span>
|
<span>Delete</span>
|
||||||
|
|
|
||||||
|
|
@ -116,13 +116,15 @@ export default function DocumentsTable() {
|
||||||
created_by_id: item.created_by_id ?? null,
|
created_by_id: item.created_by_id ?? null,
|
||||||
created_by_name: item.created_by_name ?? null,
|
created_by_name: item.created_by_name ?? null,
|
||||||
created_at: item.created_at,
|
created_at: item.created_at,
|
||||||
status: (item as { status?: { state: "ready" | "pending" | "processing" | "failed"; reason?: string } }).status ?? { state: "ready" as const },
|
status: (
|
||||||
|
item as {
|
||||||
|
status?: { state: "ready" | "pending" | "processing" | "failed"; reason?: string };
|
||||||
|
}
|
||||||
|
).status ?? { state: "ready" as const },
|
||||||
}))
|
}))
|
||||||
: paginatedRealtimeDocuments;
|
: paginatedRealtimeDocuments;
|
||||||
|
|
||||||
const displayTotal = isSearchMode
|
const displayTotal = isSearchMode ? searchResponse?.total || 0 : sortedRealtimeDocuments.length;
|
||||||
? searchResponse?.total || 0
|
|
||||||
: sortedRealtimeDocuments.length;
|
|
||||||
|
|
||||||
const loading = isSearchMode ? isSearchLoading : realtimeLoading;
|
const loading = isSearchMode ? isSearchLoading : realtimeLoading;
|
||||||
const error = isSearchMode ? searchError : realtimeError;
|
const error = isSearchMode ? searchError : realtimeError;
|
||||||
|
|
@ -149,13 +151,13 @@ export default function DocumentsTable() {
|
||||||
// Filter out pending/processing documents - they cannot be deleted
|
// Filter out pending/processing documents - they cannot be deleted
|
||||||
// For real-time mode, use sortedRealtimeDocuments (which has status)
|
// For real-time mode, use sortedRealtimeDocuments (which has status)
|
||||||
// For search mode, use searchResponse items (need to safely access status)
|
// For search mode, use searchResponse items (need to safely access status)
|
||||||
const allDocs = isSearchMode
|
const allDocs = isSearchMode
|
||||||
? (searchResponse?.items || []).map(item => ({
|
? (searchResponse?.items || []).map((item) => ({
|
||||||
id: item.id,
|
id: item.id,
|
||||||
status: (item as { status?: { state: string } }).status,
|
status: (item as { status?: { state: string } }).status,
|
||||||
}))
|
}))
|
||||||
: sortedRealtimeDocuments.map(doc => ({ id: doc.id, status: doc.status }));
|
: sortedRealtimeDocuments.map((doc) => ({ id: doc.id, status: doc.status }));
|
||||||
|
|
||||||
const selectedDocs = allDocs.filter((doc) => selectedIds.has(doc.id));
|
const selectedDocs = allDocs.filter((doc) => selectedIds.has(doc.id));
|
||||||
const deletableIds = selectedDocs
|
const deletableIds = selectedDocs
|
||||||
.filter((doc) => doc.status?.state !== "pending" && doc.status?.state !== "processing")
|
.filter((doc) => doc.status?.state !== "pending" && doc.status?.state !== "processing")
|
||||||
|
|
@ -163,7 +165,9 @@ export default function DocumentsTable() {
|
||||||
const inProgressCount = selectedIds.size - deletableIds.length;
|
const inProgressCount = selectedIds.size - deletableIds.length;
|
||||||
|
|
||||||
if (inProgressCount > 0) {
|
if (inProgressCount > 0) {
|
||||||
toast.warning(`${inProgressCount} document(s) are pending or processing and cannot be deleted.`);
|
toast.warning(
|
||||||
|
`${inProgressCount} document(s) are pending or processing and cannot be deleted.`
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (deletableIds.length === 0) {
|
if (deletableIds.length === 0) {
|
||||||
|
|
@ -180,8 +184,9 @@ export default function DocumentsTable() {
|
||||||
await deleteDocumentMutation({ id });
|
await deleteDocumentMutation({ id });
|
||||||
return true;
|
return true;
|
||||||
} catch (error: unknown) {
|
} catch (error: unknown) {
|
||||||
const status = (error as { response?: { status?: number } })?.response?.status
|
const status =
|
||||||
?? (error as { status?: number })?.status;
|
(error as { response?: { status?: number } })?.response?.status ??
|
||||||
|
(error as { status?: number })?.status;
|
||||||
if (status === 409) conflictCount++;
|
if (status === 409) conflictCount++;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
@ -195,13 +200,13 @@ export default function DocumentsTable() {
|
||||||
} else {
|
} else {
|
||||||
toast.error(t("delete_partial_failed"));
|
toast.error(t("delete_partial_failed"));
|
||||||
}
|
}
|
||||||
|
|
||||||
// If in search mode, refetch search results to reflect deletion
|
// If in search mode, refetch search results to reflect deletion
|
||||||
if (isSearchMode) {
|
if (isSearchMode) {
|
||||||
await refetchSearch();
|
await refetchSearch();
|
||||||
}
|
}
|
||||||
// Real-time mode: Electric will sync the deletion automatically
|
// Real-time mode: Electric will sync the deletion automatically
|
||||||
|
|
||||||
setSelectedIds(new Set());
|
setSelectedIds(new Set());
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error(e);
|
console.error(e);
|
||||||
|
|
@ -210,21 +215,24 @@ export default function DocumentsTable() {
|
||||||
};
|
};
|
||||||
|
|
||||||
// Single document delete handler for RowActions
|
// Single document delete handler for RowActions
|
||||||
const handleDeleteDocument = useCallback(async (id: number): Promise<boolean> => {
|
const handleDeleteDocument = useCallback(
|
||||||
try {
|
async (id: number): Promise<boolean> => {
|
||||||
await deleteDocumentMutation({ id });
|
try {
|
||||||
toast.success(t("delete_success") || "Document deleted");
|
await deleteDocumentMutation({ id });
|
||||||
// If in search mode, refetch search results to reflect deletion
|
toast.success(t("delete_success") || "Document deleted");
|
||||||
if (isSearchMode) {
|
// If in search mode, refetch search results to reflect deletion
|
||||||
await refetchSearch();
|
if (isSearchMode) {
|
||||||
|
await refetchSearch();
|
||||||
|
}
|
||||||
|
// Real-time mode: Electric will sync the deletion automatically
|
||||||
|
return true;
|
||||||
|
} catch (e) {
|
||||||
|
console.error("Error deleting document:", e);
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
// Real-time mode: Electric will sync the deletion automatically
|
},
|
||||||
return true;
|
[deleteDocumentMutation, isSearchMode, refetchSearch, t]
|
||||||
} catch (e) {
|
);
|
||||||
console.error("Error deleting document:", e);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}, [deleteDocumentMutation, isSearchMode, refetchSearch, t]);
|
|
||||||
|
|
||||||
const handleSortChange = useCallback((key: SortKey) => {
|
const handleSortChange = useCallback((key: SortKey) => {
|
||||||
setSortKey((currentKey) => {
|
setSortKey((currentKey) => {
|
||||||
|
|
|
||||||
|
|
@ -2,4 +2,3 @@ import { atom } from "jotai";
|
||||||
|
|
||||||
// Atom to control the connector dialog open state from anywhere in the app
|
// Atom to control the connector dialog open state from anywhere in the app
|
||||||
export const connectorDialogOpenAtom = atom(false);
|
export const connectorDialogOpenAtom = atom(false);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -191,7 +191,9 @@ export const ConnectorIndicator: FC<{ hideTrigger?: boolean }> = ({ hideTrigger
|
||||||
{!hideTrigger && (
|
{!hideTrigger && (
|
||||||
<TooltipIconButton
|
<TooltipIconButton
|
||||||
data-joyride="connector-icon"
|
data-joyride="connector-icon"
|
||||||
tooltip={hasConnectors ? `Manage ${activeConnectorsCount} connectors` : "Connect your data"}
|
tooltip={
|
||||||
|
hasConnectors ? `Manage ${activeConnectorsCount} connectors` : "Connect your data"
|
||||||
|
}
|
||||||
side="bottom"
|
side="bottom"
|
||||||
className={cn(
|
className={cn(
|
||||||
"size-[34px] rounded-full p-1 flex items-center justify-center transition-colors relative",
|
"size-[34px] rounded-full p-1 flex items-center justify-center transition-colors relative",
|
||||||
|
|
|
||||||
|
|
@ -346,13 +346,13 @@ export const useConnectorDialog = () => {
|
||||||
const connectorId = parseInt(params.connectorId, 10);
|
const connectorId = parseInt(params.connectorId, 10);
|
||||||
newConnector = result.data.find((c: SearchSourceConnector) => c.id === connectorId);
|
newConnector = result.data.find((c: SearchSourceConnector) => c.id === connectorId);
|
||||||
|
|
||||||
// If we found the connector, find the matching OAuth/Composio connector by type
|
// If we found the connector, find the matching OAuth/Composio connector by type
|
||||||
if (newConnector) {
|
if (newConnector) {
|
||||||
const connectorType = newConnector.connector_type;
|
const connectorType = newConnector.connector_type;
|
||||||
oauthConnector =
|
oauthConnector =
|
||||||
OAUTH_CONNECTORS.find((c) => c.connectorType === connectorType) ||
|
OAUTH_CONNECTORS.find((c) => c.connectorType === connectorType) ||
|
||||||
COMPOSIO_CONNECTORS.find((c) => c.connectorType === connectorType);
|
COMPOSIO_CONNECTORS.find((c) => c.connectorType === connectorType);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we don't have a connector yet, try to find by connector param
|
// If we don't have a connector yet, try to find by connector param
|
||||||
|
|
@ -361,12 +361,12 @@ export const useConnectorDialog = () => {
|
||||||
OAUTH_CONNECTORS.find((c) => c.id === params.connector) ||
|
OAUTH_CONNECTORS.find((c) => c.id === params.connector) ||
|
||||||
COMPOSIO_CONNECTORS.find((c) => c.id === params.connector);
|
COMPOSIO_CONNECTORS.find((c) => c.id === params.connector);
|
||||||
|
|
||||||
if (oauthConnector) {
|
if (oauthConnector) {
|
||||||
const oauthConnectorType = oauthConnector.connectorType;
|
const oauthConnectorType = oauthConnector.connectorType;
|
||||||
newConnector = result.data.find(
|
newConnector = result.data.find(
|
||||||
(c: SearchSourceConnector) => c.connector_type === oauthConnectorType
|
(c: SearchSourceConnector) => c.connector_type === oauthConnectorType
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (newConnector && oauthConnector) {
|
if (newConnector && oauthConnector) {
|
||||||
|
|
@ -679,11 +679,11 @@ export const useConnectorDialog = () => {
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
const successMessage =
|
const successMessage =
|
||||||
currentConnectorType === "MCP_CONNECTOR"
|
currentConnectorType === "MCP_CONNECTOR"
|
||||||
? `${connector.name} added successfully`
|
? `${connector.name} added successfully`
|
||||||
: `${connectorTitle} connected and syncing started!`;
|
: `${connectorTitle} connected and syncing started!`;
|
||||||
toast.success(successMessage);
|
toast.success(successMessage);
|
||||||
|
|
||||||
const url = new URL(window.location.href);
|
const url = new URL(window.location.href);
|
||||||
url.searchParams.delete("modal");
|
url.searchParams.delete("modal");
|
||||||
|
|
|
||||||
|
|
@ -8,172 +8,167 @@ import { cn } from "@/lib/utils";
|
||||||
|
|
||||||
// ///////////////////////////////////////////////////////////////////////////
|
// ///////////////////////////////////////////////////////////////////////////
|
||||||
// Types
|
// Types
|
||||||
export type AnimationVariant =
|
export type AnimationVariant = "circle" | "rectangle" | "gif" | "polygon" | "circle-blur";
|
||||||
| "circle"
|
|
||||||
| "rectangle"
|
|
||||||
| "gif"
|
|
||||||
| "polygon"
|
|
||||||
| "circle-blur";
|
|
||||||
export type AnimationStart =
|
export type AnimationStart =
|
||||||
| "top-left"
|
| "top-left"
|
||||||
| "top-right"
|
| "top-right"
|
||||||
| "bottom-left"
|
| "bottom-left"
|
||||||
| "bottom-right"
|
| "bottom-right"
|
||||||
| "center"
|
| "center"
|
||||||
| "top-center"
|
| "top-center"
|
||||||
| "bottom-center"
|
| "bottom-center"
|
||||||
| "bottom-up"
|
| "bottom-up"
|
||||||
| "top-down"
|
| "top-down"
|
||||||
| "left-right"
|
| "left-right"
|
||||||
| "right-left";
|
| "right-left";
|
||||||
|
|
||||||
interface Animation {
|
interface Animation {
|
||||||
name: string;
|
name: string;
|
||||||
css: string;
|
css: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ///////////////////////////////////////////////////////////////////////////
|
// ///////////////////////////////////////////////////////////////////////////
|
||||||
// Helper functions
|
// Helper functions
|
||||||
|
|
||||||
const getPositionCoords = (position: AnimationStart) => {
|
const getPositionCoords = (position: AnimationStart) => {
|
||||||
switch (position) {
|
switch (position) {
|
||||||
case "top-left":
|
case "top-left":
|
||||||
return { cx: "0", cy: "0" };
|
return { cx: "0", cy: "0" };
|
||||||
case "top-right":
|
case "top-right":
|
||||||
return { cx: "40", cy: "0" };
|
return { cx: "40", cy: "0" };
|
||||||
case "bottom-left":
|
case "bottom-left":
|
||||||
return { cx: "0", cy: "40" };
|
return { cx: "0", cy: "40" };
|
||||||
case "bottom-right":
|
case "bottom-right":
|
||||||
return { cx: "40", cy: "40" };
|
return { cx: "40", cy: "40" };
|
||||||
case "top-center":
|
case "top-center":
|
||||||
return { cx: "20", cy: "0" };
|
return { cx: "20", cy: "0" };
|
||||||
case "bottom-center":
|
case "bottom-center":
|
||||||
return { cx: "20", cy: "40" };
|
return { cx: "20", cy: "40" };
|
||||||
case "bottom-up":
|
case "bottom-up":
|
||||||
case "top-down":
|
case "top-down":
|
||||||
case "left-right":
|
case "left-right":
|
||||||
case "right-left":
|
case "right-left":
|
||||||
return { cx: "20", cy: "20" };
|
return { cx: "20", cy: "20" };
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const generateSVG = (variant: AnimationVariant, start: AnimationStart) => {
|
const generateSVG = (variant: AnimationVariant, start: AnimationStart) => {
|
||||||
if (variant === "circle-blur") {
|
if (variant === "circle-blur") {
|
||||||
if (start === "center") {
|
if (start === "center") {
|
||||||
return `data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 40 40"><defs><filter id="blur"><feGaussianBlur stdDeviation="2"/></filter></defs><circle cx="20" cy="20" r="18" fill="white" filter="url(%23blur)"/></svg>`;
|
return `data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 40 40"><defs><filter id="blur"><feGaussianBlur stdDeviation="2"/></filter></defs><circle cx="20" cy="20" r="18" fill="white" filter="url(%23blur)"/></svg>`;
|
||||||
}
|
}
|
||||||
const positionCoords = getPositionCoords(start);
|
const positionCoords = getPositionCoords(start);
|
||||||
if (!positionCoords) {
|
if (!positionCoords) {
|
||||||
throw new Error(`Invalid start position: ${start}`);
|
throw new Error(`Invalid start position: ${start}`);
|
||||||
}
|
}
|
||||||
const { cx, cy } = positionCoords;
|
const { cx, cy } = positionCoords;
|
||||||
return `data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 40 40"><defs><filter id="blur"><feGaussianBlur stdDeviation="2"/></filter></defs><circle cx="${cx}" cy="${cy}" r="18" fill="white" filter="url(%23blur)"/></svg>`;
|
return `data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 40 40"><defs><filter id="blur"><feGaussianBlur stdDeviation="2"/></filter></defs><circle cx="${cx}" cy="${cy}" r="18" fill="white" filter="url(%23blur)"/></svg>`;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (start === "center") return;
|
if (start === "center") return;
|
||||||
|
|
||||||
if (variant === "rectangle") return "";
|
if (variant === "rectangle") return "";
|
||||||
|
|
||||||
const positionCoords = getPositionCoords(start);
|
const positionCoords = getPositionCoords(start);
|
||||||
if (!positionCoords) {
|
if (!positionCoords) {
|
||||||
throw new Error(`Invalid start position: ${start}`);
|
throw new Error(`Invalid start position: ${start}`);
|
||||||
}
|
}
|
||||||
const { cx, cy } = positionCoords;
|
const { cx, cy } = positionCoords;
|
||||||
|
|
||||||
if (variant === "circle") {
|
if (variant === "circle") {
|
||||||
return `data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 40 40"><circle cx="${cx}" cy="${cy}" r="20" fill="white"/></svg>`;
|
return `data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 40 40"><circle cx="${cx}" cy="${cy}" r="20" fill="white"/></svg>`;
|
||||||
}
|
}
|
||||||
|
|
||||||
return "";
|
return "";
|
||||||
};
|
};
|
||||||
|
|
||||||
const getTransformOrigin = (start: AnimationStart) => {
|
const getTransformOrigin = (start: AnimationStart) => {
|
||||||
switch (start) {
|
switch (start) {
|
||||||
case "top-left":
|
case "top-left":
|
||||||
return "top left";
|
return "top left";
|
||||||
case "top-right":
|
case "top-right":
|
||||||
return "top right";
|
return "top right";
|
||||||
case "bottom-left":
|
case "bottom-left":
|
||||||
return "bottom left";
|
return "bottom left";
|
||||||
case "bottom-right":
|
case "bottom-right":
|
||||||
return "bottom right";
|
return "bottom right";
|
||||||
case "top-center":
|
case "top-center":
|
||||||
return "top center";
|
return "top center";
|
||||||
case "bottom-center":
|
case "bottom-center":
|
||||||
return "bottom center";
|
return "bottom center";
|
||||||
case "bottom-up":
|
case "bottom-up":
|
||||||
case "top-down":
|
case "top-down":
|
||||||
case "left-right":
|
case "left-right":
|
||||||
case "right-left":
|
case "right-left":
|
||||||
return "center";
|
return "center";
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
export const createAnimation = (
|
export const createAnimation = (
|
||||||
variant: AnimationVariant,
|
variant: AnimationVariant,
|
||||||
start: AnimationStart = "center",
|
start: AnimationStart = "center",
|
||||||
blur = false,
|
blur = false,
|
||||||
url?: string,
|
url?: string
|
||||||
): Animation => {
|
): Animation => {
|
||||||
const svg = generateSVG(variant, start);
|
const svg = generateSVG(variant, start);
|
||||||
const transformOrigin = getTransformOrigin(start);
|
const transformOrigin = getTransformOrigin(start);
|
||||||
|
|
||||||
if (variant === "rectangle") {
|
if (variant === "rectangle") {
|
||||||
const getClipPath = (direction: AnimationStart) => {
|
const getClipPath = (direction: AnimationStart) => {
|
||||||
switch (direction) {
|
switch (direction) {
|
||||||
case "bottom-up":
|
case "bottom-up":
|
||||||
return {
|
return {
|
||||||
from: "polygon(0% 100%, 100% 100%, 100% 100%, 0% 100%)",
|
from: "polygon(0% 100%, 100% 100%, 100% 100%, 0% 100%)",
|
||||||
to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
|
to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
|
||||||
};
|
};
|
||||||
case "top-down":
|
case "top-down":
|
||||||
return {
|
return {
|
||||||
from: "polygon(0% 0%, 100% 0%, 100% 0%, 0% 0%)",
|
from: "polygon(0% 0%, 100% 0%, 100% 0%, 0% 0%)",
|
||||||
to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
|
to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
|
||||||
};
|
};
|
||||||
case "left-right":
|
case "left-right":
|
||||||
return {
|
return {
|
||||||
from: "polygon(0% 0%, 0% 0%, 0% 100%, 0% 100%)",
|
from: "polygon(0% 0%, 0% 0%, 0% 100%, 0% 100%)",
|
||||||
to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
|
to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
|
||||||
};
|
};
|
||||||
case "right-left":
|
case "right-left":
|
||||||
return {
|
return {
|
||||||
from: "polygon(100% 0%, 100% 0%, 100% 100%, 100% 100%)",
|
from: "polygon(100% 0%, 100% 0%, 100% 100%, 100% 100%)",
|
||||||
to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
|
to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
|
||||||
};
|
};
|
||||||
case "top-left":
|
case "top-left":
|
||||||
return {
|
return {
|
||||||
from: "polygon(0% 0%, 0% 0%, 0% 0%, 0% 0%)",
|
from: "polygon(0% 0%, 0% 0%, 0% 0%, 0% 0%)",
|
||||||
to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
|
to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
|
||||||
};
|
};
|
||||||
case "top-right":
|
case "top-right":
|
||||||
return {
|
return {
|
||||||
from: "polygon(100% 0%, 100% 0%, 100% 0%, 100% 0%)",
|
from: "polygon(100% 0%, 100% 0%, 100% 0%, 100% 0%)",
|
||||||
to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
|
to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
|
||||||
};
|
};
|
||||||
case "bottom-left":
|
case "bottom-left":
|
||||||
return {
|
return {
|
||||||
from: "polygon(0% 100%, 0% 100%, 0% 100%, 0% 100%)",
|
from: "polygon(0% 100%, 0% 100%, 0% 100%, 0% 100%)",
|
||||||
to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
|
to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
|
||||||
};
|
};
|
||||||
case "bottom-right":
|
case "bottom-right":
|
||||||
return {
|
return {
|
||||||
from: "polygon(100% 100%, 100% 100%, 100% 100%, 100% 100%)",
|
from: "polygon(100% 100%, 100% 100%, 100% 100%, 100% 100%)",
|
||||||
to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
|
to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
|
||||||
};
|
};
|
||||||
default:
|
default:
|
||||||
return {
|
return {
|
||||||
from: "polygon(0% 100%, 100% 100%, 100% 100%, 0% 100%)",
|
from: "polygon(0% 100%, 100% 100%, 100% 100%, 0% 100%)",
|
||||||
to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
|
to: "polygon(0% 0%, 100% 0%, 100% 100%, 0% 100%)",
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const clipPath = getClipPath(start);
|
const clipPath = getClipPath(start);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
name: `${variant}-${start}${blur ? "-blur" : ""}`,
|
name: `${variant}-${start}${blur ? "-blur" : ""}`,
|
||||||
css: `
|
css: `
|
||||||
::view-transition-group(root) {
|
::view-transition-group(root) {
|
||||||
animation-duration: 0.7s;
|
animation-duration: 0.7s;
|
||||||
animation-timing-function: var(--expo-out);
|
animation-timing-function: var(--expo-out);
|
||||||
|
|
@ -218,12 +213,12 @@ export const createAnimation = (
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
`,
|
`,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
if (variant === "circle" && start == "center") {
|
if (variant === "circle" && start == "center") {
|
||||||
return {
|
return {
|
||||||
name: `${variant}-${start}${blur ? "-blur" : ""}`,
|
name: `${variant}-${start}${blur ? "-blur" : ""}`,
|
||||||
css: `
|
css: `
|
||||||
::view-transition-group(root) {
|
::view-transition-group(root) {
|
||||||
animation-duration: 0.7s;
|
animation-duration: 0.7s;
|
||||||
animation-timing-function: var(--expo-out);
|
animation-timing-function: var(--expo-out);
|
||||||
|
|
@ -268,12 +263,12 @@ export const createAnimation = (
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
`,
|
`,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
if (variant === "gif") {
|
if (variant === "gif") {
|
||||||
return {
|
return {
|
||||||
name: `${variant}-${start}`,
|
name: `${variant}-${start}`,
|
||||||
css: `
|
css: `
|
||||||
::view-transition-group(root) {
|
::view-transition-group(root) {
|
||||||
animation-timing-function: var(--expo-in);
|
animation-timing-function: var(--expo-in);
|
||||||
}
|
}
|
||||||
|
|
@ -302,14 +297,14 @@ export const createAnimation = (
|
||||||
mask-size: 2000vmax;
|
mask-size: 2000vmax;
|
||||||
}
|
}
|
||||||
}`,
|
}`,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
if (variant === "circle-blur") {
|
if (variant === "circle-blur") {
|
||||||
if (start === "center") {
|
if (start === "center") {
|
||||||
return {
|
return {
|
||||||
name: `${variant}-${start}`,
|
name: `${variant}-${start}`,
|
||||||
css: `
|
css: `
|
||||||
::view-transition-group(root) {
|
::view-transition-group(root) {
|
||||||
animation-timing-function: var(--expo-out);
|
animation-timing-function: var(--expo-out);
|
||||||
}
|
}
|
||||||
|
|
@ -334,12 +329,12 @@ export const createAnimation = (
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
`,
|
`,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
name: `${variant}-${start}`,
|
name: `${variant}-${start}`,
|
||||||
css: `
|
css: `
|
||||||
::view-transition-group(root) {
|
::view-transition-group(root) {
|
||||||
animation-timing-function: var(--expo-out);
|
animation-timing-function: var(--expo-out);
|
||||||
}
|
}
|
||||||
|
|
@ -364,41 +359,41 @@ export const createAnimation = (
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
`,
|
`,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
if (variant === "polygon") {
|
if (variant === "polygon") {
|
||||||
const getPolygonClipPaths = (position: AnimationStart) => {
|
const getPolygonClipPaths = (position: AnimationStart) => {
|
||||||
switch (position) {
|
switch (position) {
|
||||||
case "top-left":
|
case "top-left":
|
||||||
return {
|
return {
|
||||||
darkFrom: "polygon(50% -71%, -50% 71%, -50% 71%, 50% -71%)",
|
darkFrom: "polygon(50% -71%, -50% 71%, -50% 71%, 50% -71%)",
|
||||||
darkTo: "polygon(50% -71%, -50% 71%, 50% 171%, 171% 50%)",
|
darkTo: "polygon(50% -71%, -50% 71%, 50% 171%, 171% 50%)",
|
||||||
lightFrom: "polygon(171% 50%, 50% 171%, 50% 171%, 171% 50%)",
|
lightFrom: "polygon(171% 50%, 50% 171%, 50% 171%, 171% 50%)",
|
||||||
lightTo: "polygon(171% 50%, 50% 171%, -50% 71%, 50% -71%)",
|
lightTo: "polygon(171% 50%, 50% 171%, -50% 71%, 50% -71%)",
|
||||||
};
|
};
|
||||||
case "top-right":
|
case "top-right":
|
||||||
return {
|
return {
|
||||||
darkFrom: "polygon(150% -71%, 250% 71%, 250% 71%, 150% -71%)",
|
darkFrom: "polygon(150% -71%, 250% 71%, 250% 71%, 150% -71%)",
|
||||||
darkTo: "polygon(150% -71%, 250% 71%, 50% 171%, -71% 50%)",
|
darkTo: "polygon(150% -71%, 250% 71%, 50% 171%, -71% 50%)",
|
||||||
lightFrom: "polygon(-71% 50%, 50% 171%, 50% 171%, -71% 50%)",
|
lightFrom: "polygon(-71% 50%, 50% 171%, 50% 171%, -71% 50%)",
|
||||||
lightTo: "polygon(-71% 50%, 50% 171%, 250% 71%, 150% -71%)",
|
lightTo: "polygon(-71% 50%, 50% 171%, 250% 71%, 150% -71%)",
|
||||||
};
|
};
|
||||||
default:
|
default:
|
||||||
return {
|
return {
|
||||||
darkFrom: "polygon(50% -71%, -50% 71%, -50% 71%, 50% -71%)",
|
darkFrom: "polygon(50% -71%, -50% 71%, -50% 71%, 50% -71%)",
|
||||||
darkTo: "polygon(50% -71%, -50% 71%, 50% 171%, 171% 50%)",
|
darkTo: "polygon(50% -71%, -50% 71%, 50% 171%, 171% 50%)",
|
||||||
lightFrom: "polygon(171% 50%, 50% 171%, 50% 171%, 171% 50%)",
|
lightFrom: "polygon(171% 50%, 50% 171%, 50% 171%, 171% 50%)",
|
||||||
lightTo: "polygon(171% 50%, 50% 171%, -50% 71%, 50% -71%)",
|
lightTo: "polygon(171% 50%, 50% 171%, -50% 71%, 50% -71%)",
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const clipPaths = getPolygonClipPaths(start);
|
const clipPaths = getPolygonClipPaths(start);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
name: `${variant}-${start}${blur ? "-blur" : ""}`,
|
name: `${variant}-${start}${blur ? "-blur" : ""}`,
|
||||||
css: `
|
css: `
|
||||||
::view-transition-group(root) {
|
::view-transition-group(root) {
|
||||||
animation-duration: 0.7s;
|
animation-duration: 0.7s;
|
||||||
animation-timing-function: var(--expo-out);
|
animation-timing-function: var(--expo-out);
|
||||||
|
|
@ -443,35 +438,35 @@ export const createAnimation = (
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
`,
|
`,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle circle variants with start positions using clip-path
|
// Handle circle variants with start positions using clip-path
|
||||||
if (variant === "circle" && start !== "center") {
|
if (variant === "circle" && start !== "center") {
|
||||||
const getClipPathPosition = (position: AnimationStart) => {
|
const getClipPathPosition = (position: AnimationStart) => {
|
||||||
switch (position) {
|
switch (position) {
|
||||||
case "top-left":
|
case "top-left":
|
||||||
return "0% 0%";
|
return "0% 0%";
|
||||||
case "top-right":
|
case "top-right":
|
||||||
return "100% 0%";
|
return "100% 0%";
|
||||||
case "bottom-left":
|
case "bottom-left":
|
||||||
return "0% 100%";
|
return "0% 100%";
|
||||||
case "bottom-right":
|
case "bottom-right":
|
||||||
return "100% 100%";
|
return "100% 100%";
|
||||||
case "top-center":
|
case "top-center":
|
||||||
return "50% 0%";
|
return "50% 0%";
|
||||||
case "bottom-center":
|
case "bottom-center":
|
||||||
return "50% 100%";
|
return "50% 100%";
|
||||||
default:
|
default:
|
||||||
return "50% 50%";
|
return "50% 50%";
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const clipPosition = getClipPathPosition(start);
|
const clipPosition = getClipPathPosition(start);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
name: `${variant}-${start}${blur ? "-blur" : ""}`,
|
name: `${variant}-${start}${blur ? "-blur" : ""}`,
|
||||||
css: `
|
css: `
|
||||||
::view-transition-group(root) {
|
::view-transition-group(root) {
|
||||||
animation-duration: 1s;
|
animation-duration: 1s;
|
||||||
animation-timing-function: var(--expo-out);
|
animation-timing-function: var(--expo-out);
|
||||||
|
|
@ -516,12 +511,12 @@ export const createAnimation = (
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
`,
|
`,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
name: `${variant}-${start}${blur ? "-blur" : ""}`,
|
name: `${variant}-${start}${blur ? "-blur" : ""}`,
|
||||||
css: `
|
css: `
|
||||||
::view-transition-group(root) {
|
::view-transition-group(root) {
|
||||||
animation-timing-function: var(--expo-in);
|
animation-timing-function: var(--expo-in);
|
||||||
}
|
}
|
||||||
|
|
@ -549,237 +544,229 @@ export const createAnimation = (
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
`,
|
`,
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
// ///////////////////////////////////////////////////////////////////////////
|
// ///////////////////////////////////////////////////////////////////////////
|
||||||
// Custom hook for theme toggle functionality
|
// Custom hook for theme toggle functionality
|
||||||
export const useThemeToggle = ({
|
export const useThemeToggle = ({
|
||||||
variant = "circle",
|
variant = "circle",
|
||||||
start = "center",
|
start = "center",
|
||||||
blur = false,
|
blur = false,
|
||||||
gifUrl = "",
|
gifUrl = "",
|
||||||
}: {
|
}: {
|
||||||
variant?: AnimationVariant;
|
variant?: AnimationVariant;
|
||||||
start?: AnimationStart;
|
start?: AnimationStart;
|
||||||
blur?: boolean;
|
blur?: boolean;
|
||||||
gifUrl?: string;
|
gifUrl?: string;
|
||||||
} = {}) => {
|
} = {}) => {
|
||||||
const { theme, setTheme, resolvedTheme } = useTheme();
|
const { theme, setTheme, resolvedTheme } = useTheme();
|
||||||
|
|
||||||
const [isDark, setIsDark] = useState(false);
|
const [isDark, setIsDark] = useState(false);
|
||||||
|
|
||||||
// Sync isDark state with resolved theme after hydration
|
// Sync isDark state with resolved theme after hydration
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
setIsDark(resolvedTheme === "dark");
|
setIsDark(resolvedTheme === "dark");
|
||||||
}, [resolvedTheme]);
|
}, [resolvedTheme]);
|
||||||
|
|
||||||
const styleId = "theme-transition-styles";
|
const styleId = "theme-transition-styles";
|
||||||
|
|
||||||
const updateStyles = useCallback((css: string) => {
|
const updateStyles = useCallback((css: string) => {
|
||||||
if (typeof window === "undefined") return;
|
if (typeof window === "undefined") return;
|
||||||
|
|
||||||
let styleElement = document.getElementById(styleId) as HTMLStyleElement;
|
let styleElement = document.getElementById(styleId) as HTMLStyleElement;
|
||||||
|
|
||||||
if (!styleElement) {
|
if (!styleElement) {
|
||||||
styleElement = document.createElement("style");
|
styleElement = document.createElement("style");
|
||||||
styleElement.id = styleId;
|
styleElement.id = styleId;
|
||||||
document.head.appendChild(styleElement);
|
document.head.appendChild(styleElement);
|
||||||
}
|
}
|
||||||
|
|
||||||
styleElement.textContent = css;
|
styleElement.textContent = css;
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
const toggleTheme = useCallback(() => {
|
const toggleTheme = useCallback(() => {
|
||||||
setIsDark(!isDark);
|
setIsDark(!isDark);
|
||||||
|
|
||||||
const animation = createAnimation(variant, start, blur, gifUrl);
|
const animation = createAnimation(variant, start, blur, gifUrl);
|
||||||
|
|
||||||
updateStyles(animation.css);
|
updateStyles(animation.css);
|
||||||
|
|
||||||
if (typeof window === "undefined") return;
|
if (typeof window === "undefined") return;
|
||||||
|
|
||||||
const switchTheme = () => {
|
const switchTheme = () => {
|
||||||
setTheme(theme === "light" ? "dark" : "light");
|
setTheme(theme === "light" ? "dark" : "light");
|
||||||
};
|
};
|
||||||
|
|
||||||
if (!document.startViewTransition) {
|
if (!document.startViewTransition) {
|
||||||
switchTheme();
|
switchTheme();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
document.startViewTransition(switchTheme);
|
document.startViewTransition(switchTheme);
|
||||||
}, [theme, setTheme, variant, start, blur, gifUrl, updateStyles, isDark]);
|
}, [theme, setTheme, variant, start, blur, gifUrl, updateStyles, isDark]);
|
||||||
|
|
||||||
const setCrazyLightTheme = useCallback(() => {
|
const setCrazyLightTheme = useCallback(() => {
|
||||||
setIsDark(false);
|
setIsDark(false);
|
||||||
|
|
||||||
const animation = createAnimation(variant, start, blur, gifUrl);
|
const animation = createAnimation(variant, start, blur, gifUrl);
|
||||||
|
|
||||||
updateStyles(animation.css);
|
updateStyles(animation.css);
|
||||||
|
|
||||||
if (typeof window === "undefined") return;
|
if (typeof window === "undefined") return;
|
||||||
|
|
||||||
const switchTheme = () => {
|
const switchTheme = () => {
|
||||||
setTheme("light");
|
setTheme("light");
|
||||||
};
|
};
|
||||||
|
|
||||||
if (!document.startViewTransition) {
|
if (!document.startViewTransition) {
|
||||||
switchTheme();
|
switchTheme();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
document.startViewTransition(switchTheme);
|
document.startViewTransition(switchTheme);
|
||||||
}, [setTheme, variant, start, blur, gifUrl, updateStyles]);
|
}, [setTheme, variant, start, blur, gifUrl, updateStyles]);
|
||||||
|
|
||||||
const setCrazyDarkTheme = useCallback(() => {
|
const setCrazyDarkTheme = useCallback(() => {
|
||||||
setIsDark(true);
|
setIsDark(true);
|
||||||
|
|
||||||
const animation = createAnimation(variant, start, blur, gifUrl);
|
const animation = createAnimation(variant, start, blur, gifUrl);
|
||||||
|
|
||||||
updateStyles(animation.css);
|
updateStyles(animation.css);
|
||||||
|
|
||||||
if (typeof window === "undefined") return;
|
if (typeof window === "undefined") return;
|
||||||
|
|
||||||
const switchTheme = () => {
|
const switchTheme = () => {
|
||||||
setTheme("dark");
|
setTheme("dark");
|
||||||
};
|
};
|
||||||
|
|
||||||
if (!document.startViewTransition) {
|
if (!document.startViewTransition) {
|
||||||
switchTheme();
|
switchTheme();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
document.startViewTransition(switchTheme);
|
document.startViewTransition(switchTheme);
|
||||||
}, [setTheme, variant, start, blur, gifUrl, updateStyles]);
|
}, [setTheme, variant, start, blur, gifUrl, updateStyles]);
|
||||||
|
|
||||||
const setCrazySystemTheme = useCallback(() => {
|
const setCrazySystemTheme = useCallback(() => {
|
||||||
if (typeof window === "undefined") return;
|
if (typeof window === "undefined") return;
|
||||||
|
|
||||||
const prefersDark = window.matchMedia(
|
const prefersDark = window.matchMedia("(prefers-color-scheme: dark)").matches;
|
||||||
"(prefers-color-scheme: dark)",
|
setIsDark(prefersDark);
|
||||||
).matches;
|
|
||||||
setIsDark(prefersDark);
|
|
||||||
|
|
||||||
const animation = createAnimation(variant, start, blur, gifUrl);
|
const animation = createAnimation(variant, start, blur, gifUrl);
|
||||||
|
|
||||||
updateStyles(animation.css);
|
updateStyles(animation.css);
|
||||||
|
|
||||||
const switchTheme = () => {
|
const switchTheme = () => {
|
||||||
setTheme("system");
|
setTheme("system");
|
||||||
};
|
};
|
||||||
|
|
||||||
if (!document.startViewTransition) {
|
if (!document.startViewTransition) {
|
||||||
switchTheme();
|
switchTheme();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
document.startViewTransition(switchTheme);
|
document.startViewTransition(switchTheme);
|
||||||
}, [setTheme, variant, start, blur, gifUrl, updateStyles]);
|
}, [setTheme, variant, start, blur, gifUrl, updateStyles]);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
isDark,
|
isDark,
|
||||||
setIsDark,
|
setIsDark,
|
||||||
toggleTheme,
|
toggleTheme,
|
||||||
setCrazyLightTheme,
|
setCrazyLightTheme,
|
||||||
setCrazyDarkTheme,
|
setCrazyDarkTheme,
|
||||||
setCrazySystemTheme,
|
setCrazySystemTheme,
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
// ///////////////////////////////////////////////////////////////////////////
|
// ///////////////////////////////////////////////////////////////////////////
|
||||||
// Theme Toggle Button Component (Sun/Moon Style)
|
// Theme Toggle Button Component (Sun/Moon Style)
|
||||||
|
|
||||||
export const ThemeToggleButton = ({
|
export const ThemeToggleButton = ({
|
||||||
className = "",
|
className = "",
|
||||||
variant = "circle",
|
variant = "circle",
|
||||||
start = "center",
|
start = "center",
|
||||||
blur = false,
|
blur = false,
|
||||||
gifUrl = "",
|
gifUrl = "",
|
||||||
}: {
|
}: {
|
||||||
className?: string;
|
className?: string;
|
||||||
variant?: AnimationVariant;
|
variant?: AnimationVariant;
|
||||||
start?: AnimationStart;
|
start?: AnimationStart;
|
||||||
blur?: boolean;
|
blur?: boolean;
|
||||||
gifUrl?: string;
|
gifUrl?: string;
|
||||||
}) => {
|
}) => {
|
||||||
const { isDark, toggleTheme } = useThemeToggle({
|
const { isDark, toggleTheme } = useThemeToggle({
|
||||||
variant,
|
variant,
|
||||||
start,
|
start,
|
||||||
blur,
|
blur,
|
||||||
gifUrl,
|
gifUrl,
|
||||||
});
|
});
|
||||||
const clipId = useId();
|
const clipId = useId();
|
||||||
const clipPathId = `theme-toggle-clip-${clipId}`;
|
const clipPathId = `theme-toggle-clip-${clipId}`;
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<button
|
<button
|
||||||
type="button"
|
type="button"
|
||||||
className={cn(
|
className={cn(
|
||||||
"size-10 cursor-pointer rounded-full p-2 transition-all duration-300 active:scale-95 bg-transparent",
|
"size-10 cursor-pointer rounded-full p-2 transition-all duration-300 active:scale-95 bg-transparent",
|
||||||
isDark ? "text-white" : "text-black",
|
isDark ? "text-white" : "text-black",
|
||||||
className,
|
className
|
||||||
)}
|
)}
|
||||||
onClick={toggleTheme}
|
onClick={toggleTheme}
|
||||||
aria-label="Toggle theme"
|
aria-label="Toggle theme"
|
||||||
>
|
>
|
||||||
<span className="sr-only">Toggle theme</span>
|
<span className="sr-only">Toggle theme</span>
|
||||||
<svg
|
<svg
|
||||||
xmlns="http://www.w3.org/2000/svg"
|
xmlns="http://www.w3.org/2000/svg"
|
||||||
aria-hidden="true"
|
aria-hidden="true"
|
||||||
fill="currentColor"
|
fill="currentColor"
|
||||||
strokeLinecap="round"
|
strokeLinecap="round"
|
||||||
viewBox="0 0 32 32"
|
viewBox="0 0 32 32"
|
||||||
>
|
>
|
||||||
<clipPath id={clipPathId}>
|
<clipPath id={clipPathId}>
|
||||||
<motion.path
|
<motion.path
|
||||||
animate={{ y: isDark ? 10 : 0, x: isDark ? -12 : 0 }}
|
animate={{ y: isDark ? 10 : 0, x: isDark ? -12 : 0 }}
|
||||||
transition={{ ease: "easeInOut", duration: 0.35 }}
|
transition={{ ease: "easeInOut", duration: 0.35 }}
|
||||||
d="M0-5h30a1 1 0 0 0 9 13v24H0Z"
|
d="M0-5h30a1 1 0 0 0 9 13v24H0Z"
|
||||||
/>
|
/>
|
||||||
</clipPath>
|
</clipPath>
|
||||||
<g clipPath={`url(#${clipPathId})`}>
|
<g clipPath={`url(#${clipPathId})`}>
|
||||||
<motion.circle
|
<motion.circle
|
||||||
animate={{ r: isDark ? 10 : 8 }}
|
animate={{ r: isDark ? 10 : 8 }}
|
||||||
transition={{ ease: "easeInOut", duration: 0.35 }}
|
transition={{ ease: "easeInOut", duration: 0.35 }}
|
||||||
cx="16"
|
cx="16"
|
||||||
cy="16"
|
cy="16"
|
||||||
/>
|
/>
|
||||||
<motion.g
|
<motion.g
|
||||||
animate={{
|
animate={{
|
||||||
rotate: isDark ? -100 : 0,
|
rotate: isDark ? -100 : 0,
|
||||||
scale: isDark ? 0.5 : 1,
|
scale: isDark ? 0.5 : 1,
|
||||||
opacity: isDark ? 0 : 1,
|
opacity: isDark ? 0 : 1,
|
||||||
}}
|
}}
|
||||||
transition={{ ease: "easeInOut", duration: 0.35 }}
|
transition={{ ease: "easeInOut", duration: 0.35 }}
|
||||||
stroke="currentColor"
|
stroke="currentColor"
|
||||||
strokeWidth="1.5"
|
strokeWidth="1.5"
|
||||||
>
|
>
|
||||||
<path d="M16 5.5v-4" />
|
<path d="M16 5.5v-4" />
|
||||||
<path d="M16 30.5v-4" />
|
<path d="M16 30.5v-4" />
|
||||||
<path d="M1.5 16h4" />
|
<path d="M1.5 16h4" />
|
||||||
<path d="M26.5 16h4" />
|
<path d="M26.5 16h4" />
|
||||||
<path d="m23.4 8.6 2.8-2.8" />
|
<path d="m23.4 8.6 2.8-2.8" />
|
||||||
<path d="m5.7 26.3 2.9-2.9" />
|
<path d="m5.7 26.3 2.9-2.9" />
|
||||||
<path d="m5.8 5.8 2.8 2.8" />
|
<path d="m5.8 5.8 2.8 2.8" />
|
||||||
<path d="m23.4 23.4 2.9 2.9" />
|
<path d="m23.4 23.4 2.9 2.9" />
|
||||||
</motion.g>
|
</motion.g>
|
||||||
</g>
|
</g>
|
||||||
</svg>
|
</svg>
|
||||||
</button>
|
</button>
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|
||||||
// ///////////////////////////////////////////////////////////////////////////
|
// ///////////////////////////////////////////////////////////////////////////
|
||||||
// Backwards compatible export (alias for ThemeToggleButton with default settings)
|
// Backwards compatible export (alias for ThemeToggleButton with default settings)
|
||||||
export function ThemeTogglerComponent() {
|
export function ThemeTogglerComponent() {
|
||||||
return (
|
return <ThemeToggleButton variant="circle" start="top-right" className="size-8" />;
|
||||||
<ThemeToggleButton
|
|
||||||
variant="circle"
|
|
||||||
start="top-right"
|
|
||||||
className="size-8"
|
|
||||||
/>
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -144,7 +144,7 @@ export function useDocuments(
|
||||||
(doc: DocumentElectric): DocumentDisplay => ({
|
(doc: DocumentElectric): DocumentDisplay => ({
|
||||||
...doc,
|
...doc,
|
||||||
created_by_name: doc.created_by_id
|
created_by_name: doc.created_by_id
|
||||||
? userCacheRef.current.get(doc.created_by_id) ?? null
|
? (userCacheRef.current.get(doc.created_by_id) ?? null)
|
||||||
: null,
|
: null,
|
||||||
status: doc.status ?? { state: "ready" },
|
status: doc.status ?? { state: "ready" },
|
||||||
}),
|
}),
|
||||||
|
|
@ -232,7 +232,15 @@ export function useDocuments(
|
||||||
const handle = await client.syncShape({
|
const handle = await client.syncShape({
|
||||||
table: "documents",
|
table: "documents",
|
||||||
where: `search_space_id = ${spaceId}`,
|
where: `search_space_id = ${spaceId}`,
|
||||||
columns: ["id", "document_type", "search_space_id", "title", "created_by_id", "created_at", "status"],
|
columns: [
|
||||||
|
"id",
|
||||||
|
"document_type",
|
||||||
|
"search_space_id",
|
||||||
|
"title",
|
||||||
|
"created_by_id",
|
||||||
|
"created_at",
|
||||||
|
"status",
|
||||||
|
],
|
||||||
primaryKey: ["id"],
|
primaryKey: ["id"],
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
@ -258,7 +266,10 @@ export function useDocuments(
|
||||||
// Set up live query
|
// Set up live query
|
||||||
const db = client.db as {
|
const db = client.db as {
|
||||||
live?: {
|
live?: {
|
||||||
query: <T>(sql: string, params?: (number | string)[]) => Promise<{
|
query: <T>(
|
||||||
|
sql: string,
|
||||||
|
params?: (number | string)[]
|
||||||
|
) => Promise<{
|
||||||
subscribe: (cb: (result: { rows: T[] }) => void) => void;
|
subscribe: (cb: (result: { rows: T[] }) => void) => void;
|
||||||
unsubscribe?: () => void;
|
unsubscribe?: () => void;
|
||||||
}>;
|
}>;
|
||||||
|
|
@ -297,8 +308,7 @@ export function useDocuments(
|
||||||
if (!mounted || !result.rows) return;
|
if (!mounted || !result.rows) return;
|
||||||
|
|
||||||
// DEBUG: Log first few raw documents to see what's coming from Electric
|
// DEBUG: Log first few raw documents to see what's coming from Electric
|
||||||
console.log("[useDocuments] Raw data sample:", result.rows.slice(0, 3));
|
console.log("[useDocuments] Raw data sample:", result.rows.slice(0, 3));
|
||||||
|
|
||||||
|
|
||||||
const validItems = result.rows.filter(isValidDocument);
|
const validItems = result.rows.filter(isValidDocument);
|
||||||
const isFullySynced = syncHandleRef.current?.isUpToDate ?? false;
|
const isFullySynced = syncHandleRef.current?.isUpToDate ?? false;
|
||||||
|
|
@ -309,8 +319,9 @@ export function useDocuments(
|
||||||
|
|
||||||
// Fetch user names for new users (non-blocking)
|
// Fetch user names for new users (non-blocking)
|
||||||
const unknownUserIds = validItems
|
const unknownUserIds = validItems
|
||||||
.filter((doc): doc is DocumentElectric & { created_by_id: string } =>
|
.filter(
|
||||||
doc.created_by_id !== null && !userCacheRef.current.has(doc.created_by_id)
|
(doc): doc is DocumentElectric & { created_by_id: string } =>
|
||||||
|
doc.created_by_id !== null && !userCacheRef.current.has(doc.created_by_id)
|
||||||
)
|
)
|
||||||
.map((doc) => doc.created_by_id);
|
.map((doc) => doc.created_by_id);
|
||||||
|
|
||||||
|
|
@ -326,7 +337,7 @@ export function useDocuments(
|
||||||
prev.map((doc) => ({
|
prev.map((doc) => ({
|
||||||
...doc,
|
...doc,
|
||||||
created_by_name: doc.created_by_id
|
created_by_name: doc.created_by_id
|
||||||
? userCacheRef.current.get(doc.created_by_id) ?? null
|
? (userCacheRef.current.get(doc.created_by_id) ?? null)
|
||||||
: null,
|
: null,
|
||||||
}))
|
}))
|
||||||
);
|
);
|
||||||
|
|
@ -358,7 +369,9 @@ export function useDocuments(
|
||||||
// Case 2: Electric is fully synced - TRUST IT COMPLETELY (handles bulk deletes)
|
// Case 2: Electric is fully synced - TRUST IT COMPLETELY (handles bulk deletes)
|
||||||
if (isFullySynced) {
|
if (isFullySynced) {
|
||||||
const liveDocs = deduplicateAndSort(validItems.map(electricToDisplayDoc));
|
const liveDocs = deduplicateAndSort(validItems.map(electricToDisplayDoc));
|
||||||
console.log(`[useDocuments] Synced update: ${liveDocs.length} docs (was ${prev.length})`);
|
console.log(
|
||||||
|
`[useDocuments] Synced update: ${liveDocs.length} docs (was ${prev.length})`
|
||||||
|
);
|
||||||
return liveDocs;
|
return liveDocs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -444,9 +444,9 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
|
||||||
// in use-inbox.ts generating different sync keys on each render.
|
// in use-inbox.ts generating different sync keys on each render.
|
||||||
// That's now fixed (rounded to midnight UTC in getSyncCutoffDate).
|
// That's now fixed (rounded to midnight UTC in getSyncCutoffDate).
|
||||||
// We can safely use shapeKey for fast incremental sync.
|
// We can safely use shapeKey for fast incremental sync.
|
||||||
|
|
||||||
const shapeKey = `${userId}_v${SYNC_VERSION}_${table}_${where?.replace(/[^a-zA-Z0-9]/g, "_") || "all"}`;
|
const shapeKey = `${userId}_v${SYNC_VERSION}_${table}_${where?.replace(/[^a-zA-Z0-9]/g, "_") || "all"}`;
|
||||||
|
|
||||||
// Type assertion to PGlite with electric extension
|
// Type assertion to PGlite with electric extension
|
||||||
const pgWithElectric = db as unknown as {
|
const pgWithElectric = db as unknown as {
|
||||||
electric: {
|
electric: {
|
||||||
|
|
@ -495,9 +495,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
|
||||||
// Parse the WHERE clause to build a DELETE statement
|
// Parse the WHERE clause to build a DELETE statement
|
||||||
// The WHERE clause is already validated and formatted
|
// The WHERE clause is already validated and formatted
|
||||||
await tx.exec(`DELETE FROM ${table} WHERE ${validatedWhere}`);
|
await tx.exec(`DELETE FROM ${table} WHERE ${validatedWhere}`);
|
||||||
debugLog(
|
debugLog(`[Electric] 🗑️ Cleared ${table} rows matching: ${validatedWhere}`);
|
||||||
`[Electric] 🗑️ Cleared ${table} rows matching: ${validatedWhere}`
|
|
||||||
);
|
|
||||||
} else {
|
} else {
|
||||||
// No WHERE clause means we're syncing the entire table
|
// No WHERE clause means we're syncing the entire table
|
||||||
await tx.exec(`DELETE FROM ${table}`);
|
await tx.exec(`DELETE FROM ${table}`);
|
||||||
|
|
@ -514,10 +512,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
debugLog(
|
debugLog("[Electric] syncShapeToTable config:", JSON.stringify(shapeConfig, null, 2));
|
||||||
"[Electric] syncShapeToTable config:",
|
|
||||||
JSON.stringify(shapeConfig, null, 2)
|
|
||||||
);
|
|
||||||
|
|
||||||
let shape: { unsubscribe: () => void; isUpToDate: boolean; stream: unknown };
|
let shape: { unsubscribe: () => void; isUpToDate: boolean; stream: unknown };
|
||||||
try {
|
try {
|
||||||
|
|
@ -550,9 +545,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
|
||||||
retryError instanceof Error ? retryError.message : String(retryError);
|
retryError instanceof Error ? retryError.message : String(retryError);
|
||||||
if (retryMessage.includes("Already syncing")) {
|
if (retryMessage.includes("Already syncing")) {
|
||||||
// Still syncing - create a placeholder handle that indicates the table is being synced
|
// Still syncing - create a placeholder handle that indicates the table is being synced
|
||||||
debugWarn(
|
debugWarn(`[Electric] ${table} still syncing, creating placeholder handle`);
|
||||||
`[Electric] ${table} still syncing, creating placeholder handle`
|
|
||||||
);
|
|
||||||
const placeholderHandle: SyncHandle = {
|
const placeholderHandle: SyncHandle = {
|
||||||
unsubscribe: () => {
|
unsubscribe: () => {
|
||||||
debugLog(`[Electric] Placeholder unsubscribe for: ${cacheKey}`);
|
debugLog(`[Electric] Placeholder unsubscribe for: ${cacheKey}`);
|
||||||
|
|
@ -656,9 +649,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
|
||||||
|
|
||||||
// Also check stream's isUpToDate property immediately
|
// Also check stream's isUpToDate property immediately
|
||||||
if (stream?.isUpToDate) {
|
if (stream?.isUpToDate) {
|
||||||
debugLog(
|
debugLog(`[Electric] ✅ Stream isUpToDate is true immediately for ${table}`);
|
||||||
`[Electric] ✅ Stream isUpToDate is true immediately for ${table}`
|
|
||||||
);
|
|
||||||
resolveInitialSync();
|
resolveInitialSync();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -671,9 +662,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (shape.isUpToDate || stream?.isUpToDate) {
|
if (shape.isUpToDate || stream?.isUpToDate) {
|
||||||
debugLog(
|
debugLog(`[Electric] ✅ Sync completed (detected via polling) for ${table}`);
|
||||||
`[Electric] ✅ Sync completed (detected via polling) for ${table}`
|
|
||||||
);
|
|
||||||
clearInterval(pollInterval);
|
clearInterval(pollInterval);
|
||||||
resolveInitialSync();
|
resolveInitialSync();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue