mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-27 09:46:25 +02:00
Merge remote-tracking branch 'upstream/dev' into feat/obsidian-plugin
This commit is contained in:
commit
9b1b9a90c0
175 changed files with 10592 additions and 2302 deletions
129
surfsense_backend/app/utils/async_retry.py
Normal file
129
surfsense_backend/app/utils/async_retry.py
Normal file
|
|
@ -0,0 +1,129 @@
|
|||
"""Async retry decorators for connector API calls, built on tenacity."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from collections.abc import Callable
|
||||
from typing import TypeVar
|
||||
|
||||
import httpx
|
||||
from tenacity import (
|
||||
before_sleep_log,
|
||||
retry,
|
||||
retry_if_exception,
|
||||
stop_after_attempt,
|
||||
stop_after_delay,
|
||||
wait_exponential_jitter,
|
||||
)
|
||||
|
||||
from app.connectors.exceptions import (
|
||||
ConnectorAPIError,
|
||||
ConnectorAuthError,
|
||||
ConnectorError,
|
||||
ConnectorRateLimitError,
|
||||
ConnectorTimeoutError,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
F = TypeVar("F", bound=Callable)
|
||||
|
||||
|
||||
def _is_retryable(exc: BaseException) -> bool:
|
||||
if isinstance(exc, ConnectorError):
|
||||
return exc.retryable
|
||||
if isinstance(exc, (httpx.TimeoutException, httpx.ConnectError)):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def build_retry(
|
||||
*,
|
||||
max_attempts: int = 4,
|
||||
max_delay: float = 60.0,
|
||||
initial_delay: float = 1.0,
|
||||
total_timeout: float = 180.0,
|
||||
service: str = "",
|
||||
) -> Callable:
|
||||
"""Configurable tenacity ``@retry`` decorator with exponential backoff + jitter."""
|
||||
_logger = logging.getLogger(f"connector.retry.{service}") if service else logger
|
||||
|
||||
return retry(
|
||||
retry=retry_if_exception(_is_retryable),
|
||||
stop=(stop_after_attempt(max_attempts) | stop_after_delay(total_timeout)),
|
||||
wait=wait_exponential_jitter(initial=initial_delay, max=max_delay),
|
||||
reraise=True,
|
||||
before_sleep=before_sleep_log(_logger, logging.WARNING),
|
||||
)
|
||||
|
||||
|
||||
def retry_on_transient(
|
||||
*,
|
||||
service: str = "",
|
||||
max_attempts: int = 4,
|
||||
) -> Callable:
|
||||
"""Shorthand: retry up to *max_attempts* on rate-limits, timeouts, and 5xx."""
|
||||
return build_retry(max_attempts=max_attempts, service=service)
|
||||
|
||||
|
||||
def raise_for_status(
|
||||
response: httpx.Response,
|
||||
*,
|
||||
service: str = "",
|
||||
) -> None:
|
||||
"""Map non-2xx httpx responses to the appropriate ``ConnectorError``."""
|
||||
if response.is_success:
|
||||
return
|
||||
|
||||
status = response.status_code
|
||||
|
||||
try:
|
||||
body = response.json()
|
||||
except Exception:
|
||||
body = response.text[:500] if response.text else None
|
||||
|
||||
if status == 429:
|
||||
retry_after_raw = response.headers.get("Retry-After")
|
||||
retry_after: float | None = None
|
||||
if retry_after_raw:
|
||||
try:
|
||||
retry_after = float(retry_after_raw)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
raise ConnectorRateLimitError(
|
||||
f"{service} rate limited (429)",
|
||||
service=service,
|
||||
retry_after=retry_after,
|
||||
response_body=body,
|
||||
)
|
||||
|
||||
if status in (401, 403):
|
||||
raise ConnectorAuthError(
|
||||
f"{service} authentication failed ({status})",
|
||||
service=service,
|
||||
status_code=status,
|
||||
response_body=body,
|
||||
)
|
||||
|
||||
if status == 504:
|
||||
raise ConnectorTimeoutError(
|
||||
f"{service} gateway timeout (504)",
|
||||
service=service,
|
||||
status_code=status,
|
||||
response_body=body,
|
||||
)
|
||||
|
||||
if status >= 500:
|
||||
raise ConnectorAPIError(
|
||||
f"{service} server error ({status})",
|
||||
service=service,
|
||||
status_code=status,
|
||||
response_body=body,
|
||||
)
|
||||
|
||||
raise ConnectorAPIError(
|
||||
f"{service} request failed ({status})",
|
||||
service=service,
|
||||
status_code=status,
|
||||
response_body=body,
|
||||
)
|
||||
|
|
@ -39,7 +39,7 @@ BASE_NAME_FOR_TYPE = {
|
|||
def get_base_name_for_type(connector_type: SearchSourceConnectorType) -> str:
|
||||
"""Get a friendly display name for a connector type."""
|
||||
return BASE_NAME_FOR_TYPE.get(
|
||||
connector_type, connector_type.replace("_", " ").title()
|
||||
connector_type, connector_type.value.replace("_", " ").title()
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -231,9 +231,11 @@ async def generate_unique_connector_name(
|
|||
base = get_base_name_for_type(connector_type)
|
||||
|
||||
if identifier:
|
||||
return f"{base} - {identifier}"
|
||||
name = f"{base} - {identifier}"
|
||||
return await ensure_unique_connector_name(
|
||||
session, name, search_space_id, user_id,
|
||||
)
|
||||
|
||||
# Fallback: use counter for uniqueness
|
||||
count = await count_connectors_of_type(
|
||||
session, connector_type, search_space_id, user_id
|
||||
)
|
||||
|
|
|
|||
|
|
@ -18,19 +18,9 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
# Mapping of connector types to their corresponding Celery task names
|
||||
CONNECTOR_TASK_MAP = {
|
||||
SearchSourceConnectorType.SLACK_CONNECTOR: "index_slack_messages",
|
||||
SearchSourceConnectorType.TEAMS_CONNECTOR: "index_teams_messages",
|
||||
SearchSourceConnectorType.NOTION_CONNECTOR: "index_notion_pages",
|
||||
SearchSourceConnectorType.GITHUB_CONNECTOR: "index_github_repos",
|
||||
SearchSourceConnectorType.LINEAR_CONNECTOR: "index_linear_issues",
|
||||
SearchSourceConnectorType.JIRA_CONNECTOR: "index_jira_issues",
|
||||
SearchSourceConnectorType.CONFLUENCE_CONNECTOR: "index_confluence_pages",
|
||||
SearchSourceConnectorType.CLICKUP_CONNECTOR: "index_clickup_tasks",
|
||||
SearchSourceConnectorType.GOOGLE_CALENDAR_CONNECTOR: "index_google_calendar_events",
|
||||
SearchSourceConnectorType.AIRTABLE_CONNECTOR: "index_airtable_records",
|
||||
SearchSourceConnectorType.GOOGLE_GMAIL_CONNECTOR: "index_google_gmail_messages",
|
||||
SearchSourceConnectorType.DISCORD_CONNECTOR: "index_discord_messages",
|
||||
SearchSourceConnectorType.LUMA_CONNECTOR: "index_luma_events",
|
||||
SearchSourceConnectorType.ELASTICSEARCH_CONNECTOR: "index_elasticsearch_documents",
|
||||
SearchSourceConnectorType.WEBCRAWLER_CONNECTOR: "index_crawled_urls",
|
||||
SearchSourceConnectorType.BOOKSTACK_CONNECTOR: "index_bookstack_pages",
|
||||
|
|
@ -83,39 +73,19 @@ def create_periodic_schedule(
|
|||
f"(frequency: {frequency_minutes} minutes). Triggering first run..."
|
||||
)
|
||||
|
||||
# Import all indexing tasks
|
||||
from app.tasks.celery_tasks.connector_tasks import (
|
||||
index_airtable_records_task,
|
||||
index_bookstack_pages_task,
|
||||
index_clickup_tasks_task,
|
||||
index_confluence_pages_task,
|
||||
index_crawled_urls_task,
|
||||
index_discord_messages_task,
|
||||
index_elasticsearch_documents_task,
|
||||
index_github_repos_task,
|
||||
index_google_calendar_events_task,
|
||||
index_google_gmail_messages_task,
|
||||
index_jira_issues_task,
|
||||
index_linear_issues_task,
|
||||
index_luma_events_task,
|
||||
index_notion_pages_task,
|
||||
index_slack_messages_task,
|
||||
)
|
||||
|
||||
# Map connector type to task
|
||||
task_map = {
|
||||
SearchSourceConnectorType.SLACK_CONNECTOR: index_slack_messages_task,
|
||||
SearchSourceConnectorType.NOTION_CONNECTOR: index_notion_pages_task,
|
||||
SearchSourceConnectorType.GITHUB_CONNECTOR: index_github_repos_task,
|
||||
SearchSourceConnectorType.LINEAR_CONNECTOR: index_linear_issues_task,
|
||||
SearchSourceConnectorType.JIRA_CONNECTOR: index_jira_issues_task,
|
||||
SearchSourceConnectorType.CONFLUENCE_CONNECTOR: index_confluence_pages_task,
|
||||
SearchSourceConnectorType.CLICKUP_CONNECTOR: index_clickup_tasks_task,
|
||||
SearchSourceConnectorType.GOOGLE_CALENDAR_CONNECTOR: index_google_calendar_events_task,
|
||||
SearchSourceConnectorType.AIRTABLE_CONNECTOR: index_airtable_records_task,
|
||||
SearchSourceConnectorType.GOOGLE_GMAIL_CONNECTOR: index_google_gmail_messages_task,
|
||||
SearchSourceConnectorType.DISCORD_CONNECTOR: index_discord_messages_task,
|
||||
SearchSourceConnectorType.LUMA_CONNECTOR: index_luma_events_task,
|
||||
SearchSourceConnectorType.ELASTICSEARCH_CONNECTOR: index_elasticsearch_documents_task,
|
||||
SearchSourceConnectorType.WEBCRAWLER_CONNECTOR: index_crawled_urls_task,
|
||||
SearchSourceConnectorType.BOOKSTACK_CONNECTOR: index_bookstack_pages_task,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue