fix: skip webcrawler indexing gracefully when no URLs configured

2026-05-03 04:42:39 +02:00 · 2026-01-28 17:54:46 +02:00 · 2026-01-28 17:54:46 +02:00 · b20fbaca4b
commit b20fbaca4b
parent 4f7ed8439f
5 changed files with 103 additions and 14 deletions
--- a/surfsense_backend/app/utils/periodic_scheduler.py
+++ b/surfsense_backend/app/utils/periodic_scheduler.py
@ -43,6 +43,7 @@ def create_periodic_schedule(
    user_id: str,
    connector_type: SearchSourceConnectorType,
    frequency_minutes: int,
+    connector_config: dict | None = None,
 ) -> bool:
    """
    Trigger the first indexing run immediately when periodic indexing is enabled.
@ -57,11 +58,26 @@ def create_periodic_schedule(
        user_id: User ID
        connector_type: Type of connector
        frequency_minutes: Frequency in minutes (used for logging)
+        connector_config: Optional connector config dict for validation

    Returns:
        True if successful, False otherwise
    """
    try:
+        # Special handling for connectors that require config validation
+        if connector_type == SearchSourceConnectorType.WEBCRAWLER_CONNECTOR:
+            from app.utils.webcrawler_utils import parse_webcrawler_urls
+
+            config = connector_config or {}
+            urls = parse_webcrawler_urls(config.get("INITIAL_URLS"))
+
+            if not urls:
+                logger.info(
+                    f"Webcrawler connector {connector_id} has no URLs configured, "
+                    "skipping first indexing run (will run when URLs are added)"
+                )
+                return True  # Return success - schedule is created, just no first run
+
        logger.info(
            f"Periodic indexing enabled for connector {connector_id} "
            f"(frequency: {frequency_minutes} minutes). Triggering first run..."