mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-15 18:25:18 +02:00
refactor: unify all 3 google Composio and non-Composio connector types and pipelines keeping same credential adapters
This commit is contained in:
parent
6c37b563c0
commit
83152e8e7e
24 changed files with 633 additions and 3596 deletions
|
|
@ -55,7 +55,6 @@ async def _check_and_trigger_schedules():
|
|||
from app.tasks.celery_tasks.connector_tasks import (
|
||||
index_airtable_records_task,
|
||||
index_clickup_tasks_task,
|
||||
index_composio_connector_task,
|
||||
index_confluence_pages_task,
|
||||
index_crawled_urls_task,
|
||||
index_discord_messages_task,
|
||||
|
|
@ -88,10 +87,10 @@ async def _check_and_trigger_schedules():
|
|||
SearchSourceConnectorType.ELASTICSEARCH_CONNECTOR: index_elasticsearch_documents_task,
|
||||
SearchSourceConnectorType.WEBCRAWLER_CONNECTOR: index_crawled_urls_task,
|
||||
SearchSourceConnectorType.GOOGLE_DRIVE_CONNECTOR: index_google_drive_files_task,
|
||||
# Composio connector types
|
||||
SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR: index_composio_connector_task,
|
||||
SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR: index_composio_connector_task,
|
||||
SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR: index_composio_connector_task,
|
||||
# Composio connector types (unified with native Google tasks)
|
||||
SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR: index_google_drive_files_task,
|
||||
SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR: index_google_gmail_messages_task,
|
||||
SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR: index_google_calendar_events_task,
|
||||
}
|
||||
|
||||
# Trigger indexing for each due connector
|
||||
|
|
@ -129,11 +128,11 @@ async def _check_and_trigger_schedules():
|
|||
f"({connector.connector_type.value})"
|
||||
)
|
||||
|
||||
# Special handling for Google Drive - uses config for folder/file selection
|
||||
if (
|
||||
connector.connector_type
|
||||
== SearchSourceConnectorType.GOOGLE_DRIVE_CONNECTOR
|
||||
):
|
||||
# Special handling for Google Drive (native and Composio) - uses config for folder/file selection
|
||||
if connector.connector_type in [
|
||||
SearchSourceConnectorType.GOOGLE_DRIVE_CONNECTOR,
|
||||
SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
|
||||
]:
|
||||
connector_config = connector.config or {}
|
||||
selected_folders = connector_config.get("selected_folders", [])
|
||||
selected_files = connector_config.get("selected_files", [])
|
||||
|
|
|
|||
|
|
@ -16,6 +16,15 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
|||
|
||||
from app.connectors.google_calendar_connector import GoogleCalendarConnector
|
||||
from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
|
||||
from app.utils.google_credentials import (
|
||||
COMPOSIO_GOOGLE_CONNECTOR_TYPES,
|
||||
build_composio_credentials,
|
||||
)
|
||||
|
||||
ACCEPTED_CALENDAR_CONNECTOR_TYPES = {
|
||||
SearchSourceConnectorType.GOOGLE_CALENDAR_CONNECTOR,
|
||||
SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR,
|
||||
}
|
||||
from app.services.llm_service import get_user_long_context_llm
|
||||
from app.services.task_logging_service import TaskLoggingService
|
||||
from app.utils.document_converters import (
|
||||
|
|
@ -87,10 +96,12 @@ async def index_google_calendar_events(
|
|||
)
|
||||
|
||||
try:
|
||||
# Get the connector from the database
|
||||
connector = await get_connector_by_id(
|
||||
session, connector_id, SearchSourceConnectorType.GOOGLE_CALENDAR_CONNECTOR
|
||||
)
|
||||
# Accept both native and Composio Calendar connectors
|
||||
connector = None
|
||||
for ct in ACCEPTED_CALENDAR_CONNECTOR_TYPES:
|
||||
connector = await get_connector_by_id(session, connector_id, ct)
|
||||
if connector:
|
||||
break
|
||||
|
||||
if not connector:
|
||||
await task_logger.log_task_failure(
|
||||
|
|
@ -101,69 +112,80 @@ async def index_google_calendar_events(
|
|||
)
|
||||
return 0, f"Connector with ID {connector_id} not found"
|
||||
|
||||
# Get the Google Calendar credentials from the connector config
|
||||
config_data = connector.config
|
||||
|
||||
# Decrypt sensitive credentials if encrypted (for backward compatibility)
|
||||
from app.config import config
|
||||
from app.utils.oauth_security import TokenEncryption
|
||||
|
||||
token_encrypted = config_data.get("_token_encrypted", False)
|
||||
if token_encrypted and config.SECRET_KEY:
|
||||
try:
|
||||
token_encryption = TokenEncryption(config.SECRET_KEY)
|
||||
|
||||
# Decrypt sensitive fields
|
||||
if config_data.get("token"):
|
||||
config_data["token"] = token_encryption.decrypt_token(
|
||||
config_data["token"]
|
||||
)
|
||||
if config_data.get("refresh_token"):
|
||||
config_data["refresh_token"] = token_encryption.decrypt_token(
|
||||
config_data["refresh_token"]
|
||||
)
|
||||
if config_data.get("client_secret"):
|
||||
config_data["client_secret"] = token_encryption.decrypt_token(
|
||||
config_data["client_secret"]
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Decrypted Google Calendar credentials for connector {connector_id}"
|
||||
)
|
||||
except Exception as e:
|
||||
# Build credentials based on connector type
|
||||
if connector.connector_type in COMPOSIO_GOOGLE_CONNECTOR_TYPES:
|
||||
connected_account_id = connector.config.get(
|
||||
"composio_connected_account_id"
|
||||
)
|
||||
if not connected_account_id:
|
||||
await task_logger.log_task_failure(
|
||||
log_entry,
|
||||
f"Failed to decrypt Google Calendar credentials for connector {connector_id}: {e!s}",
|
||||
"Credential decryption failed",
|
||||
{"error_type": "CredentialDecryptionError"},
|
||||
f"Composio connected_account_id not found for connector {connector_id}",
|
||||
"Missing Composio account",
|
||||
{"error_type": "MissingComposioAccount"},
|
||||
)
|
||||
return 0, f"Failed to decrypt Google Calendar credentials: {e!s}"
|
||||
return 0, "Composio connected_account_id not found"
|
||||
credentials = build_composio_credentials(connected_account_id)
|
||||
else:
|
||||
config_data = connector.config
|
||||
|
||||
exp = config_data.get("expiry", "").replace("Z", "")
|
||||
credentials = Credentials(
|
||||
token=config_data.get("token"),
|
||||
refresh_token=config_data.get("refresh_token"),
|
||||
token_uri=config_data.get("token_uri"),
|
||||
client_id=config_data.get("client_id"),
|
||||
client_secret=config_data.get("client_secret"),
|
||||
scopes=config_data.get("scopes"),
|
||||
expiry=datetime.fromisoformat(exp) if exp else None,
|
||||
)
|
||||
from app.config import config
|
||||
from app.utils.oauth_security import TokenEncryption
|
||||
|
||||
if (
|
||||
not credentials.client_id
|
||||
or not credentials.client_secret
|
||||
or not credentials.refresh_token
|
||||
):
|
||||
await task_logger.log_task_failure(
|
||||
log_entry,
|
||||
f"Google Calendar credentials not found in connector config for connector {connector_id}",
|
||||
"Missing Google Calendar credentials",
|
||||
{"error_type": "MissingCredentials"},
|
||||
token_encrypted = config_data.get("_token_encrypted", False)
|
||||
if token_encrypted and config.SECRET_KEY:
|
||||
try:
|
||||
token_encryption = TokenEncryption(config.SECRET_KEY)
|
||||
if config_data.get("token"):
|
||||
config_data["token"] = token_encryption.decrypt_token(
|
||||
config_data["token"]
|
||||
)
|
||||
if config_data.get("refresh_token"):
|
||||
config_data["refresh_token"] = token_encryption.decrypt_token(
|
||||
config_data["refresh_token"]
|
||||
)
|
||||
if config_data.get("client_secret"):
|
||||
config_data["client_secret"] = token_encryption.decrypt_token(
|
||||
config_data["client_secret"]
|
||||
)
|
||||
logger.info(
|
||||
f"Decrypted Google Calendar credentials for connector {connector_id}"
|
||||
)
|
||||
except Exception as e:
|
||||
await task_logger.log_task_failure(
|
||||
log_entry,
|
||||
f"Failed to decrypt Google Calendar credentials for connector {connector_id}: {e!s}",
|
||||
"Credential decryption failed",
|
||||
{"error_type": "CredentialDecryptionError"},
|
||||
)
|
||||
return 0, f"Failed to decrypt Google Calendar credentials: {e!s}"
|
||||
|
||||
exp = config_data.get("expiry", "")
|
||||
if exp:
|
||||
exp = exp.replace("Z", "")
|
||||
credentials = Credentials(
|
||||
token=config_data.get("token"),
|
||||
refresh_token=config_data.get("refresh_token"),
|
||||
token_uri=config_data.get("token_uri"),
|
||||
client_id=config_data.get("client_id"),
|
||||
client_secret=config_data.get("client_secret"),
|
||||
scopes=config_data.get("scopes", []),
|
||||
expiry=datetime.fromisoformat(exp) if exp else None,
|
||||
)
|
||||
return 0, "Google Calendar credentials not found in connector config"
|
||||
|
||||
# Initialize Google Calendar client
|
||||
if (
|
||||
not credentials.client_id
|
||||
or not credentials.client_secret
|
||||
or not credentials.refresh_token
|
||||
):
|
||||
await task_logger.log_task_failure(
|
||||
log_entry,
|
||||
f"Google Calendar credentials not found in connector config for connector {connector_id}",
|
||||
"Missing Google Calendar credentials",
|
||||
{"error_type": "MissingCredentials"},
|
||||
)
|
||||
return 0, "Google Calendar credentials not found in connector config"
|
||||
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Initializing Google Calendar client for connector {connector_id}",
|
||||
|
|
|
|||
|
|
@ -31,6 +31,15 @@ from app.tasks.connector_indexers.base import (
|
|||
update_connector_last_indexed,
|
||||
)
|
||||
from app.utils.document_converters import generate_unique_identifier_hash
|
||||
from app.utils.google_credentials import (
|
||||
COMPOSIO_GOOGLE_CONNECTOR_TYPES,
|
||||
build_composio_credentials,
|
||||
)
|
||||
|
||||
ACCEPTED_DRIVE_CONNECTOR_TYPES = {
|
||||
SearchSourceConnectorType.GOOGLE_DRIVE_CONNECTOR,
|
||||
SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
|
||||
}
|
||||
|
||||
# Type hint for heartbeat callback
|
||||
HeartbeatCallbackType = Callable[[int], Awaitable[None]]
|
||||
|
|
@ -89,14 +98,17 @@ async def index_google_drive_files(
|
|||
)
|
||||
|
||||
try:
|
||||
connector = await get_connector_by_id(
|
||||
session, connector_id, SearchSourceConnectorType.GOOGLE_DRIVE_CONNECTOR
|
||||
)
|
||||
# Accept both native and Composio Drive connectors
|
||||
connector = None
|
||||
for ct in ACCEPTED_DRIVE_CONNECTOR_TYPES:
|
||||
connector = await get_connector_by_id(session, connector_id, ct)
|
||||
if connector:
|
||||
break
|
||||
|
||||
if not connector:
|
||||
error_msg = f"Google Drive connector with ID {connector_id} not found"
|
||||
await task_logger.log_task_failure(
|
||||
log_entry, error_msg, {"error_type": "ConnectorNotFound"}
|
||||
log_entry, error_msg, None, {"error_type": "ConnectorNotFound"}
|
||||
)
|
||||
return 0, error_msg
|
||||
|
||||
|
|
@ -106,27 +118,43 @@ async def index_google_drive_files(
|
|||
{"stage": "client_initialization"},
|
||||
)
|
||||
|
||||
# Check if credentials are encrypted (only when explicitly marked)
|
||||
token_encrypted = connector.config.get("_token_encrypted", False)
|
||||
if token_encrypted:
|
||||
# Credentials are explicitly marked as encrypted, will be decrypted during client initialization
|
||||
if not config.SECRET_KEY:
|
||||
await task_logger.log_task_failure(
|
||||
log_entry,
|
||||
f"SECRET_KEY not configured but credentials are marked as encrypted for connector {connector_id}",
|
||||
"Missing SECRET_KEY for token decryption",
|
||||
{"error_type": "MissingSecretKey"},
|
||||
)
|
||||
return (
|
||||
0,
|
||||
"SECRET_KEY not configured but credentials are marked as encrypted",
|
||||
)
|
||||
logger.info(
|
||||
f"Google Drive credentials are encrypted for connector {connector_id}, will decrypt during client initialization"
|
||||
# Build credentials based on connector type
|
||||
pre_built_credentials = None
|
||||
if connector.connector_type in COMPOSIO_GOOGLE_CONNECTOR_TYPES:
|
||||
connected_account_id = connector.config.get(
|
||||
"composio_connected_account_id"
|
||||
)
|
||||
# If _token_encrypted is False or not set, treat credentials as plaintext
|
||||
if not connected_account_id:
|
||||
error_msg = f"Composio connected_account_id not found for connector {connector_id}"
|
||||
await task_logger.log_task_failure(
|
||||
log_entry, error_msg, "Missing Composio account",
|
||||
{"error_type": "MissingComposioAccount"},
|
||||
)
|
||||
return 0, error_msg
|
||||
pre_built_credentials = build_composio_credentials(connected_account_id)
|
||||
else:
|
||||
token_encrypted = connector.config.get("_token_encrypted", False)
|
||||
if token_encrypted:
|
||||
if not config.SECRET_KEY:
|
||||
await task_logger.log_task_failure(
|
||||
log_entry,
|
||||
f"SECRET_KEY not configured but credentials are marked as encrypted for connector {connector_id}",
|
||||
"Missing SECRET_KEY for token decryption",
|
||||
{"error_type": "MissingSecretKey"},
|
||||
)
|
||||
return (
|
||||
0,
|
||||
"SECRET_KEY not configured but credentials are marked as encrypted",
|
||||
)
|
||||
logger.info(
|
||||
f"Google Drive credentials are encrypted for connector {connector_id}, will decrypt during client initialization"
|
||||
)
|
||||
|
||||
drive_client = GoogleDriveClient(session, connector_id)
|
||||
connector_enable_summary = getattr(connector, "enable_summary", True)
|
||||
|
||||
drive_client = GoogleDriveClient(
|
||||
session, connector_id, credentials=pre_built_credentials
|
||||
)
|
||||
|
||||
if not folder_id:
|
||||
error_msg = "folder_id is required for Google Drive indexing"
|
||||
|
|
@ -164,6 +192,7 @@ async def index_google_drive_files(
|
|||
max_files=max_files,
|
||||
include_subfolders=include_subfolders,
|
||||
on_heartbeat_callback=on_heartbeat_callback,
|
||||
enable_summary=connector_enable_summary,
|
||||
)
|
||||
else:
|
||||
logger.info(f"Using full scan for connector {connector_id}")
|
||||
|
|
@ -181,6 +210,7 @@ async def index_google_drive_files(
|
|||
max_files=max_files,
|
||||
include_subfolders=include_subfolders,
|
||||
on_heartbeat_callback=on_heartbeat_callback,
|
||||
enable_summary=connector_enable_summary,
|
||||
)
|
||||
|
||||
documents_indexed, documents_skipped = result
|
||||
|
|
@ -278,14 +308,17 @@ async def index_google_drive_single_file(
|
|||
)
|
||||
|
||||
try:
|
||||
connector = await get_connector_by_id(
|
||||
session, connector_id, SearchSourceConnectorType.GOOGLE_DRIVE_CONNECTOR
|
||||
)
|
||||
# Accept both native and Composio Drive connectors
|
||||
connector = None
|
||||
for ct in ACCEPTED_DRIVE_CONNECTOR_TYPES:
|
||||
connector = await get_connector_by_id(session, connector_id, ct)
|
||||
if connector:
|
||||
break
|
||||
|
||||
if not connector:
|
||||
error_msg = f"Google Drive connector with ID {connector_id} not found"
|
||||
await task_logger.log_task_failure(
|
||||
log_entry, error_msg, {"error_type": "ConnectorNotFound"}
|
||||
log_entry, error_msg, None, {"error_type": "ConnectorNotFound"}
|
||||
)
|
||||
return 0, error_msg
|
||||
|
||||
|
|
@ -295,27 +328,42 @@ async def index_google_drive_single_file(
|
|||
{"stage": "client_initialization"},
|
||||
)
|
||||
|
||||
# Check if credentials are encrypted (only when explicitly marked)
|
||||
token_encrypted = connector.config.get("_token_encrypted", False)
|
||||
if token_encrypted:
|
||||
# Credentials are explicitly marked as encrypted, will be decrypted during client initialization
|
||||
if not config.SECRET_KEY:
|
||||
await task_logger.log_task_failure(
|
||||
log_entry,
|
||||
f"SECRET_KEY not configured but credentials are marked as encrypted for connector {connector_id}",
|
||||
"Missing SECRET_KEY for token decryption",
|
||||
{"error_type": "MissingSecretKey"},
|
||||
)
|
||||
return (
|
||||
0,
|
||||
"SECRET_KEY not configured but credentials are marked as encrypted",
|
||||
)
|
||||
logger.info(
|
||||
f"Google Drive credentials are encrypted for connector {connector_id}, will decrypt during client initialization"
|
||||
pre_built_credentials = None
|
||||
if connector.connector_type in COMPOSIO_GOOGLE_CONNECTOR_TYPES:
|
||||
connected_account_id = connector.config.get(
|
||||
"composio_connected_account_id"
|
||||
)
|
||||
# If _token_encrypted is False or not set, treat credentials as plaintext
|
||||
if not connected_account_id:
|
||||
error_msg = f"Composio connected_account_id not found for connector {connector_id}"
|
||||
await task_logger.log_task_failure(
|
||||
log_entry, error_msg, "Missing Composio account",
|
||||
{"error_type": "MissingComposioAccount"},
|
||||
)
|
||||
return 0, error_msg
|
||||
pre_built_credentials = build_composio_credentials(connected_account_id)
|
||||
else:
|
||||
token_encrypted = connector.config.get("_token_encrypted", False)
|
||||
if token_encrypted:
|
||||
if not config.SECRET_KEY:
|
||||
await task_logger.log_task_failure(
|
||||
log_entry,
|
||||
f"SECRET_KEY not configured but credentials are marked as encrypted for connector {connector_id}",
|
||||
"Missing SECRET_KEY for token decryption",
|
||||
{"error_type": "MissingSecretKey"},
|
||||
)
|
||||
return (
|
||||
0,
|
||||
"SECRET_KEY not configured but credentials are marked as encrypted",
|
||||
)
|
||||
logger.info(
|
||||
f"Google Drive credentials are encrypted for connector {connector_id}, will decrypt during client initialization"
|
||||
)
|
||||
|
||||
drive_client = GoogleDriveClient(session, connector_id)
|
||||
connector_enable_summary = getattr(connector, "enable_summary", True)
|
||||
|
||||
drive_client = GoogleDriveClient(
|
||||
session, connector_id, credentials=pre_built_credentials
|
||||
)
|
||||
|
||||
# Fetch the file metadata
|
||||
file, error = await get_file_by_id(drive_client, file_id)
|
||||
|
|
@ -362,6 +410,7 @@ async def index_google_drive_single_file(
|
|||
task_logger=task_logger,
|
||||
log_entry=log_entry,
|
||||
pending_document=pending_doc,
|
||||
enable_summary=connector_enable_summary,
|
||||
)
|
||||
|
||||
await session.commit()
|
||||
|
|
@ -433,6 +482,7 @@ async def _index_full_scan(
|
|||
max_files: int,
|
||||
include_subfolders: bool = False,
|
||||
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||
enable_summary: bool = True,
|
||||
) -> tuple[int, int]:
|
||||
"""Perform full scan indexing of a folder.
|
||||
|
||||
|
|
@ -562,6 +612,7 @@ async def _index_full_scan(
|
|||
task_logger=task_logger,
|
||||
log_entry=log_entry,
|
||||
pending_document=pending_doc,
|
||||
enable_summary=enable_summary,
|
||||
)
|
||||
|
||||
documents_indexed += indexed
|
||||
|
|
@ -592,6 +643,7 @@ async def _index_with_delta_sync(
|
|||
max_files: int,
|
||||
include_subfolders: bool = False,
|
||||
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||
enable_summary: bool = True,
|
||||
) -> tuple[int, int]:
|
||||
"""Perform delta sync indexing using change tracking.
|
||||
|
||||
|
|
@ -703,6 +755,7 @@ async def _index_with_delta_sync(
|
|||
task_logger=task_logger,
|
||||
log_entry=log_entry,
|
||||
pending_document=pending_doc,
|
||||
enable_summary=enable_summary,
|
||||
)
|
||||
|
||||
documents_indexed += indexed
|
||||
|
|
@ -957,6 +1010,7 @@ async def _process_single_file(
|
|||
task_logger: TaskLoggingService,
|
||||
log_entry: any,
|
||||
pending_document: Document | None = None,
|
||||
enable_summary: bool = True,
|
||||
) -> tuple[int, int, int]:
|
||||
"""
|
||||
Process a single file by downloading and using Surfsense's file processor.
|
||||
|
|
@ -1020,6 +1074,7 @@ async def _process_single_file(
|
|||
task_logger=task_logger,
|
||||
log_entry=log_entry,
|
||||
connector_id=connector_id,
|
||||
enable_summary=enable_summary,
|
||||
)
|
||||
|
||||
if error:
|
||||
|
|
|
|||
|
|
@ -21,6 +21,15 @@ from app.db import (
|
|||
DocumentType,
|
||||
SearchSourceConnectorType,
|
||||
)
|
||||
from app.utils.google_credentials import (
|
||||
COMPOSIO_GOOGLE_CONNECTOR_TYPES,
|
||||
build_composio_credentials,
|
||||
)
|
||||
|
||||
ACCEPTED_GMAIL_CONNECTOR_TYPES = {
|
||||
SearchSourceConnectorType.GOOGLE_GMAIL_CONNECTOR,
|
||||
SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR,
|
||||
}
|
||||
from app.services.llm_service import get_user_long_context_llm
|
||||
from app.services.task_logging_service import TaskLoggingService
|
||||
from app.utils.document_converters import (
|
||||
|
|
@ -94,90 +103,100 @@ async def index_google_gmail_messages(
|
|||
)
|
||||
|
||||
try:
|
||||
# Get connector by id
|
||||
connector = await get_connector_by_id(
|
||||
session, connector_id, SearchSourceConnectorType.GOOGLE_GMAIL_CONNECTOR
|
||||
)
|
||||
# Accept both native and Composio Gmail connectors
|
||||
connector = None
|
||||
for ct in ACCEPTED_GMAIL_CONNECTOR_TYPES:
|
||||
connector = await get_connector_by_id(session, connector_id, ct)
|
||||
if connector:
|
||||
break
|
||||
|
||||
if not connector:
|
||||
error_msg = f"Gmail connector with ID {connector_id} not found"
|
||||
await task_logger.log_task_failure(
|
||||
log_entry, error_msg, {"error_type": "ConnectorNotFound"}
|
||||
log_entry, error_msg, None, {"error_type": "ConnectorNotFound"}
|
||||
)
|
||||
return 0, error_msg
|
||||
|
||||
# Get the Google Gmail credentials from the connector config
|
||||
config_data = connector.config
|
||||
|
||||
# Decrypt sensitive credentials if encrypted (for backward compatibility)
|
||||
from app.config import config
|
||||
from app.utils.oauth_security import TokenEncryption
|
||||
|
||||
token_encrypted = config_data.get("_token_encrypted", False)
|
||||
if token_encrypted and config.SECRET_KEY:
|
||||
try:
|
||||
token_encryption = TokenEncryption(config.SECRET_KEY)
|
||||
|
||||
# Decrypt sensitive fields
|
||||
if config_data.get("token"):
|
||||
config_data["token"] = token_encryption.decrypt_token(
|
||||
config_data["token"]
|
||||
)
|
||||
if config_data.get("refresh_token"):
|
||||
config_data["refresh_token"] = token_encryption.decrypt_token(
|
||||
config_data["refresh_token"]
|
||||
)
|
||||
if config_data.get("client_secret"):
|
||||
config_data["client_secret"] = token_encryption.decrypt_token(
|
||||
config_data["client_secret"]
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Decrypted Google Gmail credentials for connector {connector_id}"
|
||||
)
|
||||
except Exception as e:
|
||||
# Build credentials based on connector type
|
||||
if connector.connector_type in COMPOSIO_GOOGLE_CONNECTOR_TYPES:
|
||||
connected_account_id = connector.config.get(
|
||||
"composio_connected_account_id"
|
||||
)
|
||||
if not connected_account_id:
|
||||
await task_logger.log_task_failure(
|
||||
log_entry,
|
||||
f"Failed to decrypt Google Gmail credentials for connector {connector_id}: {e!s}",
|
||||
"Credential decryption failed",
|
||||
{"error_type": "CredentialDecryptionError"},
|
||||
f"Composio connected_account_id not found for connector {connector_id}",
|
||||
"Missing Composio account",
|
||||
{"error_type": "MissingComposioAccount"},
|
||||
)
|
||||
return 0, f"Failed to decrypt Google Gmail credentials: {e!s}"
|
||||
return 0, "Composio connected_account_id not found"
|
||||
credentials = build_composio_credentials(connected_account_id)
|
||||
else:
|
||||
config_data = connector.config
|
||||
|
||||
exp = config_data.get("expiry", "")
|
||||
if exp:
|
||||
exp = exp.replace("Z", "")
|
||||
credentials = Credentials(
|
||||
token=config_data.get("token"),
|
||||
refresh_token=config_data.get("refresh_token"),
|
||||
token_uri=config_data.get("token_uri"),
|
||||
client_id=config_data.get("client_id"),
|
||||
client_secret=config_data.get("client_secret"),
|
||||
scopes=config_data.get("scopes", []),
|
||||
expiry=datetime.fromisoformat(exp) if exp else None,
|
||||
)
|
||||
from app.config import config
|
||||
from app.utils.oauth_security import TokenEncryption
|
||||
|
||||
if (
|
||||
not credentials.client_id
|
||||
or not credentials.client_secret
|
||||
or not credentials.refresh_token
|
||||
):
|
||||
await task_logger.log_task_failure(
|
||||
log_entry,
|
||||
f"Google gmail credentials not found in connector config for connector {connector_id}",
|
||||
"Missing Google gmail credentials",
|
||||
{"error_type": "MissingCredentials"},
|
||||
token_encrypted = config_data.get("_token_encrypted", False)
|
||||
if token_encrypted and config.SECRET_KEY:
|
||||
try:
|
||||
token_encryption = TokenEncryption(config.SECRET_KEY)
|
||||
if config_data.get("token"):
|
||||
config_data["token"] = token_encryption.decrypt_token(
|
||||
config_data["token"]
|
||||
)
|
||||
if config_data.get("refresh_token"):
|
||||
config_data["refresh_token"] = token_encryption.decrypt_token(
|
||||
config_data["refresh_token"]
|
||||
)
|
||||
if config_data.get("client_secret"):
|
||||
config_data["client_secret"] = token_encryption.decrypt_token(
|
||||
config_data["client_secret"]
|
||||
)
|
||||
logger.info(
|
||||
f"Decrypted Google Gmail credentials for connector {connector_id}"
|
||||
)
|
||||
except Exception as e:
|
||||
await task_logger.log_task_failure(
|
||||
log_entry,
|
||||
f"Failed to decrypt Google Gmail credentials for connector {connector_id}: {e!s}",
|
||||
"Credential decryption failed",
|
||||
{"error_type": "CredentialDecryptionError"},
|
||||
)
|
||||
return 0, f"Failed to decrypt Google Gmail credentials: {e!s}"
|
||||
|
||||
exp = config_data.get("expiry", "")
|
||||
if exp:
|
||||
exp = exp.replace("Z", "")
|
||||
credentials = Credentials(
|
||||
token=config_data.get("token"),
|
||||
refresh_token=config_data.get("refresh_token"),
|
||||
token_uri=config_data.get("token_uri"),
|
||||
client_id=config_data.get("client_id"),
|
||||
client_secret=config_data.get("client_secret"),
|
||||
scopes=config_data.get("scopes", []),
|
||||
expiry=datetime.fromisoformat(exp) if exp else None,
|
||||
)
|
||||
return 0, "Google gmail credentials not found in connector config"
|
||||
|
||||
# Initialize Google gmail client
|
||||
if (
|
||||
not credentials.client_id
|
||||
or not credentials.client_secret
|
||||
or not credentials.refresh_token
|
||||
):
|
||||
await task_logger.log_task_failure(
|
||||
log_entry,
|
||||
f"Google gmail credentials not found in connector config for connector {connector_id}",
|
||||
"Missing Google gmail credentials",
|
||||
{"error_type": "MissingCredentials"},
|
||||
)
|
||||
return 0, "Google gmail credentials not found in connector config"
|
||||
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Initializing Google gmail client for connector {connector_id}",
|
||||
{"stage": "client_initialization"},
|
||||
)
|
||||
|
||||
# Initialize Google gmail connector
|
||||
gmail_connector = GoogleGmailConnector(
|
||||
credentials, session, user_id, connector_id
|
||||
)
|
||||
|
|
|
|||
|
|
@ -411,6 +411,7 @@ async def add_received_file_document_using_unstructured(
|
|||
search_space_id: int,
|
||||
user_id: str,
|
||||
connector: dict | None = None,
|
||||
enable_summary: bool = True,
|
||||
) -> Document | None:
|
||||
"""
|
||||
Process and store a file document using Unstructured service.
|
||||
|
|
@ -471,9 +472,13 @@ async def add_received_file_document_using_unstructured(
|
|||
"etl_service": "UNSTRUCTURED",
|
||||
"document_type": "File Document",
|
||||
}
|
||||
summary_content, summary_embedding = await generate_document_summary(
|
||||
file_in_markdown, user_llm, document_metadata
|
||||
)
|
||||
if enable_summary:
|
||||
summary_content, summary_embedding = await generate_document_summary(
|
||||
file_in_markdown, user_llm, document_metadata
|
||||
)
|
||||
else:
|
||||
summary_content = f"File: {file_name}\n\n{file_in_markdown[:4000]}"
|
||||
summary_embedding = embed_text(summary_content)
|
||||
|
||||
# Process chunks
|
||||
chunks = await create_document_chunks(file_in_markdown)
|
||||
|
|
@ -493,14 +498,13 @@ async def add_received_file_document_using_unstructured(
|
|||
existing_document.source_markdown = file_in_markdown
|
||||
existing_document.content_needs_reindexing = False
|
||||
existing_document.updated_at = get_current_timestamp()
|
||||
existing_document.status = DocumentStatus.ready() # Mark as ready
|
||||
existing_document.status = DocumentStatus.ready()
|
||||
|
||||
await session.commit()
|
||||
await session.refresh(existing_document)
|
||||
document = existing_document
|
||||
else:
|
||||
# Create new document
|
||||
# Determine document type based on connector
|
||||
doc_type = DocumentType.FILE
|
||||
if connector and connector.get("type") == DocumentType.GOOGLE_DRIVE_FILE:
|
||||
doc_type = DocumentType.GOOGLE_DRIVE_FILE
|
||||
|
|
@ -523,7 +527,7 @@ async def add_received_file_document_using_unstructured(
|
|||
updated_at=get_current_timestamp(),
|
||||
created_by_id=user_id,
|
||||
connector_id=connector.get("connector_id") if connector else None,
|
||||
status=DocumentStatus.ready(), # Mark as ready
|
||||
status=DocumentStatus.ready(),
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
@ -546,6 +550,7 @@ async def add_received_file_document_using_llamacloud(
|
|||
search_space_id: int,
|
||||
user_id: str,
|
||||
connector: dict | None = None,
|
||||
enable_summary: bool = True,
|
||||
) -> Document | None:
|
||||
"""
|
||||
Process and store document content parsed by LlamaCloud.
|
||||
|
|
@ -605,16 +610,19 @@ async def add_received_file_document_using_llamacloud(
|
|||
"etl_service": "LLAMACLOUD",
|
||||
"document_type": "File Document",
|
||||
}
|
||||
summary_content, summary_embedding = await generate_document_summary(
|
||||
file_in_markdown, user_llm, document_metadata
|
||||
)
|
||||
if enable_summary:
|
||||
summary_content, summary_embedding = await generate_document_summary(
|
||||
file_in_markdown, user_llm, document_metadata
|
||||
)
|
||||
else:
|
||||
summary_content = f"File: {file_name}\n\n{file_in_markdown[:4000]}"
|
||||
summary_embedding = embed_text(summary_content)
|
||||
|
||||
# Process chunks
|
||||
chunks = await create_document_chunks(file_in_markdown)
|
||||
|
||||
# Update or create document
|
||||
if existing_document:
|
||||
# Update existing document
|
||||
existing_document.title = file_name
|
||||
existing_document.content = summary_content
|
||||
existing_document.content_hash = content_hash
|
||||
|
|
@ -627,14 +635,12 @@ async def add_received_file_document_using_llamacloud(
|
|||
existing_document.source_markdown = file_in_markdown
|
||||
existing_document.content_needs_reindexing = False
|
||||
existing_document.updated_at = get_current_timestamp()
|
||||
existing_document.status = DocumentStatus.ready() # Mark as ready
|
||||
existing_document.status = DocumentStatus.ready()
|
||||
|
||||
await session.commit()
|
||||
await session.refresh(existing_document)
|
||||
document = existing_document
|
||||
else:
|
||||
# Create new document
|
||||
# Determine document type based on connector
|
||||
doc_type = DocumentType.FILE
|
||||
if connector and connector.get("type") == DocumentType.GOOGLE_DRIVE_FILE:
|
||||
doc_type = DocumentType.GOOGLE_DRIVE_FILE
|
||||
|
|
@ -657,7 +663,7 @@ async def add_received_file_document_using_llamacloud(
|
|||
updated_at=get_current_timestamp(),
|
||||
created_by_id=user_id,
|
||||
connector_id=connector.get("connector_id") if connector else None,
|
||||
status=DocumentStatus.ready(), # Mark as ready
|
||||
status=DocumentStatus.ready(),
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
@ -682,6 +688,7 @@ async def add_received_file_document_using_docling(
|
|||
search_space_id: int,
|
||||
user_id: str,
|
||||
connector: dict | None = None,
|
||||
enable_summary: bool = True,
|
||||
) -> Document | None:
|
||||
"""
|
||||
Process and store document content parsed by Docling.
|
||||
|
|
@ -734,33 +741,32 @@ async def add_received_file_document_using_docling(
|
|||
f"No long context LLM configured for user {user_id} in search_space {search_space_id}"
|
||||
)
|
||||
|
||||
# Generate summary using chunked processing for large documents
|
||||
from app.services.docling_service import create_docling_service
|
||||
if enable_summary:
|
||||
from app.services.docling_service import create_docling_service
|
||||
|
||||
docling_service = create_docling_service()
|
||||
docling_service = create_docling_service()
|
||||
|
||||
summary_content = await docling_service.process_large_document_summary(
|
||||
content=file_in_markdown, llm=user_llm, document_title=file_name
|
||||
)
|
||||
summary_content = await docling_service.process_large_document_summary(
|
||||
content=file_in_markdown, llm=user_llm, document_title=file_name
|
||||
)
|
||||
|
||||
# Enhance summary with metadata
|
||||
document_metadata = {
|
||||
"file_name": file_name,
|
||||
"etl_service": "DOCLING",
|
||||
"document_type": "File Document",
|
||||
}
|
||||
metadata_parts = []
|
||||
metadata_parts.append("# DOCUMENT METADATA")
|
||||
document_metadata = {
|
||||
"file_name": file_name,
|
||||
"etl_service": "DOCLING",
|
||||
"document_type": "File Document",
|
||||
}
|
||||
metadata_parts = ["# DOCUMENT METADATA"]
|
||||
for key, value in document_metadata.items():
|
||||
if value:
|
||||
formatted_key = key.replace("_", " ").title()
|
||||
metadata_parts.append(f"**{formatted_key}:** {value}")
|
||||
|
||||
for key, value in document_metadata.items():
|
||||
if value: # Only include non-empty values
|
||||
formatted_key = key.replace("_", " ").title()
|
||||
metadata_parts.append(f"**{formatted_key}:** {value}")
|
||||
|
||||
metadata_section = "\n".join(metadata_parts)
|
||||
enhanced_summary_content = (
|
||||
f"{metadata_section}\n\n# DOCUMENT SUMMARY\n\n{summary_content}"
|
||||
)
|
||||
metadata_section = "\n".join(metadata_parts)
|
||||
enhanced_summary_content = (
|
||||
f"{metadata_section}\n\n# DOCUMENT SUMMARY\n\n{summary_content}"
|
||||
)
|
||||
else:
|
||||
enhanced_summary_content = f"File: {file_name}\n\n{file_in_markdown[:4000]}"
|
||||
|
||||
summary_embedding = embed_text(enhanced_summary_content)
|
||||
|
||||
|
|
@ -1219,9 +1225,10 @@ async def process_file_in_background(
|
|||
print("Error deleting temp file", e)
|
||||
pass
|
||||
|
||||
# Pass the documents to the existing background task
|
||||
enable_summary = connector.get("enable_summary", True) if connector else True
|
||||
result = await add_received_file_document_using_unstructured(
|
||||
session, filename, docs, search_space_id, user_id, connector
|
||||
session, filename, docs, search_space_id, user_id, connector,
|
||||
enable_summary=enable_summary,
|
||||
)
|
||||
|
||||
if connector:
|
||||
|
|
@ -1362,7 +1369,7 @@ async def process_file_in_background(
|
|||
# Extract text content from the markdown documents
|
||||
markdown_content = doc.text
|
||||
|
||||
# Process the documents using our LlamaCloud background task
|
||||
enable_summary = connector.get("enable_summary", True) if connector else True
|
||||
doc_result = await add_received_file_document_using_llamacloud(
|
||||
session,
|
||||
filename,
|
||||
|
|
@ -1370,6 +1377,7 @@ async def process_file_in_background(
|
|||
search_space_id=search_space_id,
|
||||
user_id=user_id,
|
||||
connector=connector,
|
||||
enable_summary=enable_summary,
|
||||
)
|
||||
|
||||
# Track if this document was successfully created
|
||||
|
|
@ -1516,7 +1524,7 @@ async def process_file_in_background(
|
|||
session, notification, stage="chunking"
|
||||
)
|
||||
|
||||
# Process the document using our Docling background task
|
||||
enable_summary = connector.get("enable_summary", True) if connector else True
|
||||
doc_result = await add_received_file_document_using_docling(
|
||||
session,
|
||||
filename,
|
||||
|
|
@ -1524,6 +1532,7 @@ async def process_file_in_background(
|
|||
search_space_id=search_space_id,
|
||||
user_id=user_id,
|
||||
connector=connector,
|
||||
enable_summary=enable_summary,
|
||||
)
|
||||
|
||||
if doc_result:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue