mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-25 08:46:22 +02:00
Webcrawler connector draft
This commit is contained in:
parent
419f94e8ee
commit
896e410e2a
26 changed files with 1225 additions and 9 deletions
|
|
@ -468,6 +468,26 @@ def validate_connector_config(
|
|||
value = config.get(key)
|
||||
if not isinstance(value, list) or not value:
|
||||
raise ValueError(f"{field_name} must be a non-empty list of strings")
|
||||
|
||||
def validate_firecrawl_api_key_format() -> None:
|
||||
api_key = config.get("FIRECRAWL_API_KEY", "")
|
||||
if api_key and api_key.strip():
|
||||
# Firecrawl API keys typically start with "fc-"
|
||||
if not api_key.strip().startswith("fc-"):
|
||||
raise ValueError(
|
||||
"Firecrawl API key should start with 'fc-'. Please verify your API key."
|
||||
)
|
||||
|
||||
|
||||
def validate_initial_urls() -> None:
|
||||
initial_urls = config.get("INITIAL_URLS", "")
|
||||
if initial_urls and initial_urls.strip():
|
||||
urls = [url.strip() for url in initial_urls.split("\n") if url.strip()]
|
||||
for url in urls:
|
||||
if not validators.url(url):
|
||||
raise ValueError(
|
||||
f"Invalid URL format in INITIAL_URLS: {url}"
|
||||
)
|
||||
|
||||
# Lookup table for connector validation rules
|
||||
connector_rules = {
|
||||
|
|
@ -550,6 +570,14 @@ def validate_connector_config(
|
|||
# "validators": {}
|
||||
# },
|
||||
"LUMA_CONNECTOR": {"required": ["LUMA_API_KEY"], "validators": {}},
|
||||
"WEBCRAWLER_CONNECTOR": {
|
||||
"required": [], # No required fields - API key is optional
|
||||
"optional": ["FIRECRAWL_API_KEY", "INITIAL_URLS"],
|
||||
"validators": {
|
||||
"FIRECRAWL_API_KEY": lambda: validate_firecrawl_api_key_format(),
|
||||
"INITIAL_URLS": lambda: validate_initial_urls(),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
rules = connector_rules.get(connector_type_str)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue