mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-25 19:15:18 +02:00
feat(connectors): add retry and auth telemetry events
This commit is contained in:
parent
c4abbd6e20
commit
7a3b278b75
2 changed files with 32 additions and 6 deletions
|
|
@ -43,7 +43,7 @@ from app.db import (
|
||||||
async_session_maker,
|
async_session_maker,
|
||||||
get_async_session,
|
get_async_session,
|
||||||
)
|
)
|
||||||
from app.observability import metrics as ot_metrics
|
from app.observability import metrics as ot_metrics, otel as ot
|
||||||
from app.schemas import (
|
from app.schemas import (
|
||||||
GoogleDriveIndexRequest,
|
GoogleDriveIndexRequest,
|
||||||
MCPConnectorCreate,
|
MCPConnectorCreate,
|
||||||
|
|
@ -1246,6 +1246,12 @@ async def _persist_auth_expired(session: AsyncSession, connector_id: int) -> Non
|
||||||
"""Flag a connector as auth_expired so the frontend shows a re-auth prompt."""
|
"""Flag a connector as auth_expired so the frontend shows a re-auth prompt."""
|
||||||
from sqlalchemy.orm.attributes import flag_modified
|
from sqlalchemy.orm.attributes import flag_modified
|
||||||
|
|
||||||
|
ot.add_event(
|
||||||
|
"connector.auth.expired",
|
||||||
|
{
|
||||||
|
"error.category": "auth_failed",
|
||||||
|
},
|
||||||
|
)
|
||||||
try:
|
try:
|
||||||
result = await session.execute(
|
result = await session.execute(
|
||||||
select(SearchSourceConnector).where(
|
select(SearchSourceConnector).where(
|
||||||
|
|
@ -1305,6 +1311,13 @@ async def _run_indexing_with_notifications(
|
||||||
try:
|
try:
|
||||||
connector_lock_acquired = acquire_connector_indexing_lock(connector_id)
|
connector_lock_acquired = acquire_connector_indexing_lock(connector_id)
|
||||||
if not connector_lock_acquired:
|
if not connector_lock_acquired:
|
||||||
|
ot.add_event(
|
||||||
|
"connector.sync.skipped",
|
||||||
|
{
|
||||||
|
"skip.reason": "lock_contention",
|
||||||
|
"error.category": "lock_contention",
|
||||||
|
},
|
||||||
|
)
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Skipping indexing for connector {connector_id} "
|
f"Skipping indexing for connector {connector_id} "
|
||||||
"(another worker already holds Redis connector lock)"
|
"(another worker already holds Redis connector lock)"
|
||||||
|
|
@ -1375,6 +1388,15 @@ async def _run_indexing_with_notifications(
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Callback to update notification during API retries (rate limits, etc.)"""
|
"""Callback to update notification during API retries (rate limits, etc.)"""
|
||||||
nonlocal notification
|
nonlocal notification
|
||||||
|
ot.add_event(
|
||||||
|
"connector.retry.scheduled",
|
||||||
|
{
|
||||||
|
"retry.reason": retry_reason,
|
||||||
|
"retry.attempt": attempt,
|
||||||
|
"retry.max": max_attempts,
|
||||||
|
"retry.delay_ms": int(wait_seconds * 1000),
|
||||||
|
},
|
||||||
|
)
|
||||||
if notification:
|
if notification:
|
||||||
try:
|
try:
|
||||||
await session.refresh(notification)
|
await session.refresh(notification)
|
||||||
|
|
|
||||||
|
|
@ -22,15 +22,18 @@ def run_async_celery_task[T](coro_factory: Callable[[], Awaitable[T]]) -> T:
|
||||||
task_name = getattr(current_task, "name", None) or "unknown"
|
task_name = getattr(current_task, "name", None) or "unknown"
|
||||||
t0 = time.perf_counter()
|
t0 = time.perf_counter()
|
||||||
status = "failed"
|
status = "failed"
|
||||||
|
error_category: str | None = None
|
||||||
try:
|
try:
|
||||||
with ot.connector_sync_span(connector_type=task_name) as sp:
|
with ot.connector_sync_span(connector_type=task_name) as sp:
|
||||||
result = _run_async_celery_task(coro_factory)
|
try:
|
||||||
sp.set_attribute("connector.status", "success")
|
result = _run_async_celery_task(coro_factory)
|
||||||
|
sp.set_attribute("connector.status", "success")
|
||||||
|
except Exception as exc:
|
||||||
|
error_category = ot_metrics.categorize_exception(exc)
|
||||||
|
sp.set_attribute("connector.error.category", error_category)
|
||||||
|
raise
|
||||||
status = "success"
|
status = "success"
|
||||||
return result
|
return result
|
||||||
except Exception:
|
|
||||||
status = "failed"
|
|
||||||
raise
|
|
||||||
finally:
|
finally:
|
||||||
elapsed_s = time.perf_counter() - t0
|
elapsed_s = time.perf_counter() - t0
|
||||||
ot_metrics.record_connector_sync_duration(
|
ot_metrics.record_connector_sync_duration(
|
||||||
|
|
@ -40,6 +43,7 @@ def run_async_celery_task[T](coro_factory: Callable[[], Awaitable[T]]) -> T:
|
||||||
ot_metrics.record_connector_sync_outcome(
|
ot_metrics.record_connector_sync_outcome(
|
||||||
connector_type=task_name,
|
connector_type=task_name,
|
||||||
status=status,
|
status=status,
|
||||||
|
error_category=error_category,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue