SurfSense/surfsense_backend/app/utils/webcrawler_utils.py
CREDO23 20b8a17254 fix(backend): handle non-string elements in webcrawler URL list
Add isinstance check to prevent AttributeError when INITIAL_URLS list
contains non-string elements (None, int, dict) from malformed config data.
2026-01-28 22:16:58 +02:00

26 lines
728 B
Python

"""
Utility functions for webcrawler connector.
"""
def parse_webcrawler_urls(initial_urls: str | list | None) -> list[str]:
"""
Parse URLs from webcrawler INITIAL_URLS value.
Handles both string (newline-separated) and list formats.
Args:
initial_urls: The INITIAL_URLS value (string, list, or None)
Returns:
List of parsed, stripped, non-empty URLs
"""
if initial_urls is None:
return []
if isinstance(initial_urls, str):
return [url.strip() for url in initial_urls.split("\n") if url.strip()]
elif isinstance(initial_urls, list):
return [url.strip() for url in initial_urls if isinstance(url, str) and url.strip()]
else:
return []