Webcrawler connector draft

This commit is contained in:
samkul-swe 2025-11-21 20:45:59 -08:00
parent 419f94e8ee
commit 896e410e2a
26 changed files with 1225 additions and 9 deletions

View file

@ -468,6 +468,26 @@ def validate_connector_config(
value = config.get(key)
if not isinstance(value, list) or not value:
raise ValueError(f"{field_name} must be a non-empty list of strings")
def validate_firecrawl_api_key_format() -> None:
api_key = config.get("FIRECRAWL_API_KEY", "")
if api_key and api_key.strip():
# Firecrawl API keys typically start with "fc-"
if not api_key.strip().startswith("fc-"):
raise ValueError(
"Firecrawl API key should start with 'fc-'. Please verify your API key."
)
def validate_initial_urls() -> None:
initial_urls = config.get("INITIAL_URLS", "")
if initial_urls and initial_urls.strip():
urls = [url.strip() for url in initial_urls.split("\n") if url.strip()]
for url in urls:
if not validators.url(url):
raise ValueError(
f"Invalid URL format in INITIAL_URLS: {url}"
)
# Lookup table for connector validation rules
connector_rules = {
@ -550,6 +570,14 @@ def validate_connector_config(
# "validators": {}
# },
"LUMA_CONNECTOR": {"required": ["LUMA_API_KEY"], "validators": {}},
"WEBCRAWLER_CONNECTOR": {
"required": [], # No required fields - API key is optional
"optional": ["FIRECRAWL_API_KEY", "INITIAL_URLS"],
"validators": {
"FIRECRAWL_API_KEY": lambda: validate_firecrawl_api_key_format(),
"INITIAL_URLS": lambda: validate_initial_urls(),
},
},
}
rules = connector_rules.get(connector_type_str)