Merge remote-tracking branch 'upstream/dev' into feat/obsidian-plugin

This commit is contained in:
Anish Sarkar 2026-04-24 21:34:55 +05:30
commit 9b1b9a90c0
175 changed files with 10592 additions and 2302 deletions

View file

@ -0,0 +1,129 @@
"""Async retry decorators for connector API calls, built on tenacity."""
from __future__ import annotations
import logging
from collections.abc import Callable
from typing import TypeVar
import httpx
from tenacity import (
before_sleep_log,
retry,
retry_if_exception,
stop_after_attempt,
stop_after_delay,
wait_exponential_jitter,
)
from app.connectors.exceptions import (
ConnectorAPIError,
ConnectorAuthError,
ConnectorError,
ConnectorRateLimitError,
ConnectorTimeoutError,
)
logger = logging.getLogger(__name__)
F = TypeVar("F", bound=Callable)
def _is_retryable(exc: BaseException) -> bool:
if isinstance(exc, ConnectorError):
return exc.retryable
if isinstance(exc, (httpx.TimeoutException, httpx.ConnectError)):
return True
return False
def build_retry(
*,
max_attempts: int = 4,
max_delay: float = 60.0,
initial_delay: float = 1.0,
total_timeout: float = 180.0,
service: str = "",
) -> Callable:
"""Configurable tenacity ``@retry`` decorator with exponential backoff + jitter."""
_logger = logging.getLogger(f"connector.retry.{service}") if service else logger
return retry(
retry=retry_if_exception(_is_retryable),
stop=(stop_after_attempt(max_attempts) | stop_after_delay(total_timeout)),
wait=wait_exponential_jitter(initial=initial_delay, max=max_delay),
reraise=True,
before_sleep=before_sleep_log(_logger, logging.WARNING),
)
def retry_on_transient(
*,
service: str = "",
max_attempts: int = 4,
) -> Callable:
"""Shorthand: retry up to *max_attempts* on rate-limits, timeouts, and 5xx."""
return build_retry(max_attempts=max_attempts, service=service)
def raise_for_status(
response: httpx.Response,
*,
service: str = "",
) -> None:
"""Map non-2xx httpx responses to the appropriate ``ConnectorError``."""
if response.is_success:
return
status = response.status_code
try:
body = response.json()
except Exception:
body = response.text[:500] if response.text else None
if status == 429:
retry_after_raw = response.headers.get("Retry-After")
retry_after: float | None = None
if retry_after_raw:
try:
retry_after = float(retry_after_raw)
except (ValueError, TypeError):
pass
raise ConnectorRateLimitError(
f"{service} rate limited (429)",
service=service,
retry_after=retry_after,
response_body=body,
)
if status in (401, 403):
raise ConnectorAuthError(
f"{service} authentication failed ({status})",
service=service,
status_code=status,
response_body=body,
)
if status == 504:
raise ConnectorTimeoutError(
f"{service} gateway timeout (504)",
service=service,
status_code=status,
response_body=body,
)
if status >= 500:
raise ConnectorAPIError(
f"{service} server error ({status})",
service=service,
status_code=status,
response_body=body,
)
raise ConnectorAPIError(
f"{service} request failed ({status})",
service=service,
status_code=status,
response_body=body,
)

View file

@ -39,7 +39,7 @@ BASE_NAME_FOR_TYPE = {
def get_base_name_for_type(connector_type: SearchSourceConnectorType) -> str:
"""Get a friendly display name for a connector type."""
return BASE_NAME_FOR_TYPE.get(
connector_type, connector_type.replace("_", " ").title()
connector_type, connector_type.value.replace("_", " ").title()
)
@ -231,9 +231,11 @@ async def generate_unique_connector_name(
base = get_base_name_for_type(connector_type)
if identifier:
return f"{base} - {identifier}"
name = f"{base} - {identifier}"
return await ensure_unique_connector_name(
session, name, search_space_id, user_id,
)
# Fallback: use counter for uniqueness
count = await count_connectors_of_type(
session, connector_type, search_space_id, user_id
)

View file

@ -18,19 +18,9 @@ logger = logging.getLogger(__name__)
# Mapping of connector types to their corresponding Celery task names
CONNECTOR_TASK_MAP = {
SearchSourceConnectorType.SLACK_CONNECTOR: "index_slack_messages",
SearchSourceConnectorType.TEAMS_CONNECTOR: "index_teams_messages",
SearchSourceConnectorType.NOTION_CONNECTOR: "index_notion_pages",
SearchSourceConnectorType.GITHUB_CONNECTOR: "index_github_repos",
SearchSourceConnectorType.LINEAR_CONNECTOR: "index_linear_issues",
SearchSourceConnectorType.JIRA_CONNECTOR: "index_jira_issues",
SearchSourceConnectorType.CONFLUENCE_CONNECTOR: "index_confluence_pages",
SearchSourceConnectorType.CLICKUP_CONNECTOR: "index_clickup_tasks",
SearchSourceConnectorType.GOOGLE_CALENDAR_CONNECTOR: "index_google_calendar_events",
SearchSourceConnectorType.AIRTABLE_CONNECTOR: "index_airtable_records",
SearchSourceConnectorType.GOOGLE_GMAIL_CONNECTOR: "index_google_gmail_messages",
SearchSourceConnectorType.DISCORD_CONNECTOR: "index_discord_messages",
SearchSourceConnectorType.LUMA_CONNECTOR: "index_luma_events",
SearchSourceConnectorType.ELASTICSEARCH_CONNECTOR: "index_elasticsearch_documents",
SearchSourceConnectorType.WEBCRAWLER_CONNECTOR: "index_crawled_urls",
SearchSourceConnectorType.BOOKSTACK_CONNECTOR: "index_bookstack_pages",
@ -83,39 +73,19 @@ def create_periodic_schedule(
f"(frequency: {frequency_minutes} minutes). Triggering first run..."
)
# Import all indexing tasks
from app.tasks.celery_tasks.connector_tasks import (
index_airtable_records_task,
index_bookstack_pages_task,
index_clickup_tasks_task,
index_confluence_pages_task,
index_crawled_urls_task,
index_discord_messages_task,
index_elasticsearch_documents_task,
index_github_repos_task,
index_google_calendar_events_task,
index_google_gmail_messages_task,
index_jira_issues_task,
index_linear_issues_task,
index_luma_events_task,
index_notion_pages_task,
index_slack_messages_task,
)
# Map connector type to task
task_map = {
SearchSourceConnectorType.SLACK_CONNECTOR: index_slack_messages_task,
SearchSourceConnectorType.NOTION_CONNECTOR: index_notion_pages_task,
SearchSourceConnectorType.GITHUB_CONNECTOR: index_github_repos_task,
SearchSourceConnectorType.LINEAR_CONNECTOR: index_linear_issues_task,
SearchSourceConnectorType.JIRA_CONNECTOR: index_jira_issues_task,
SearchSourceConnectorType.CONFLUENCE_CONNECTOR: index_confluence_pages_task,
SearchSourceConnectorType.CLICKUP_CONNECTOR: index_clickup_tasks_task,
SearchSourceConnectorType.GOOGLE_CALENDAR_CONNECTOR: index_google_calendar_events_task,
SearchSourceConnectorType.AIRTABLE_CONNECTOR: index_airtable_records_task,
SearchSourceConnectorType.GOOGLE_GMAIL_CONNECTOR: index_google_gmail_messages_task,
SearchSourceConnectorType.DISCORD_CONNECTOR: index_discord_messages_task,
SearchSourceConnectorType.LUMA_CONNECTOR: index_luma_events_task,
SearchSourceConnectorType.ELASTICSEARCH_CONNECTOR: index_elasticsearch_documents_task,
SearchSourceConnectorType.WEBCRAWLER_CONNECTOR: index_crawled_urls_task,
SearchSourceConnectorType.BOOKSTACK_CONNECTOR: index_bookstack_pages_task,