Merge pull request #392 from Aki-07/feature/add-searxng-connector

Add Searxng connector
This commit is contained in:
Rohan Verma 2025-10-13 13:41:37 -07:00 committed by GitHub
commit 08661e686c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 838 additions and 7 deletions

View file

@ -0,0 +1,42 @@
"""Add SearxNG connector enum value
Revision ID: 26
Revises: 25
Create Date: 2025-01-18 00:00:00.000000
"""
from collections.abc import Sequence
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "26"
down_revision: str | None = "25"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
"""Safely add SEARXNG_API to searchsourceconnectortype enum."""
op.execute(
"""
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_type t
JOIN pg_enum e ON t.oid = e.enumtypid
WHERE t.typname = 'searchsourceconnectortype' AND e.enumlabel = 'SEARXNG_API'
) THEN
ALTER TYPE searchsourceconnectortype ADD VALUE 'SEARXNG_API';
END IF;
END
$$;
"""
)
def downgrade() -> None:
"""Downgrade not supported for enum edits."""
pass

View file

@ -1033,6 +1033,30 @@ async def fetch_relevant_documents(
}
)
elif connector == "SEARXNG_API":
(
source_object,
searx_chunks,
) = await connector_service.search_searxng(
user_query=reformulated_query,
user_id=user_id,
search_space_id=search_space_id,
top_k=top_k,
)
if source_object:
all_sources.append(source_object)
all_raw_documents.extend(searx_chunks)
if streaming_service and writer:
writer(
{
"yield_value": streaming_service.format_terminal_info_delta(
f"🌐 Found {len(searx_chunks)} SearxNG results related to your query"
)
}
)
elif connector == "LINKUP_API":
linkup_mode = "standard"

View file

@ -55,6 +55,7 @@ class DocumentType(str, Enum):
class SearchSourceConnectorType(str, Enum):
SERPER_API = "SERPER_API" # NOT IMPLEMENTED YET : DON'T REMEMBER WHY : MOST PROBABLY BECAUSE WE NEED TO CRAWL THE RESULTS RETURNED BY IT
TAVILY_API = "TAVILY_API"
SEARXNG_API = "SEARXNG_API"
LINKUP_API = "LINKUP_API"
SLACK_CONNECTOR = "SLACK_CONNECTOR"
NOTION_CONNECTOR = "NOTION_CONNECTOR"

View file

@ -1,6 +1,8 @@
import asyncio
from typing import Any
from urllib.parse import urljoin
import httpx
from linkup import LinkupClient
from sqlalchemy import func
from sqlalchemy.ext.asyncio import AsyncSession
@ -372,6 +374,192 @@ class ConnectorService:
"sources": [],
}, []
async def search_searxng(
self,
user_query: str,
user_id: str,
search_space_id: int,
top_k: int = 20,
) -> tuple:
"""
Search using a configured SearxNG instance and return both sources and documents.
"""
searx_connector = await self.get_connector_by_type(
user_id, SearchSourceConnectorType.SEARXNG_API, search_space_id
)
if not searx_connector:
return {
"id": 11,
"name": "SearxNG Search",
"type": "SEARXNG_API",
"sources": [],
}, []
config = searx_connector.config or {}
host = config.get("SEARXNG_HOST")
if not host:
print("SearxNG connector is missing SEARXNG_HOST configuration")
return {
"id": 11,
"name": "SearxNG Search",
"type": "SEARXNG_API",
"sources": [],
}, []
api_key = config.get("SEARXNG_API_KEY")
engines = config.get("SEARXNG_ENGINES")
categories = config.get("SEARXNG_CATEGORIES")
language = config.get("SEARXNG_LANGUAGE")
safesearch = config.get("SEARXNG_SAFESEARCH")
def _parse_bool(value: Any, default: bool = True) -> bool:
if isinstance(value, bool):
return value
if isinstance(value, str):
lowered = value.strip().lower()
if lowered in {"true", "1", "yes", "on"}:
return True
if lowered in {"false", "0", "no", "off"}:
return False
return default
verify_ssl = _parse_bool(config.get("SEARXNG_VERIFY_SSL", True))
safesearch_value: int | None = None
if isinstance(safesearch, str):
safesearch_clean = safesearch.strip()
if safesearch_clean.isdigit():
safesearch_value = int(safesearch_clean)
elif isinstance(safesearch, (int, float)):
safesearch_value = int(safesearch)
if safesearch_value is not None and not (0 <= safesearch_value <= 2):
safesearch_value = None
def _format_list(value: Any) -> str | None:
if value is None:
return None
if isinstance(value, str):
value = value.strip()
return value or None
if isinstance(value, (list, tuple, set)):
cleaned = [str(item).strip() for item in value if str(item).strip()]
return ",".join(cleaned) if cleaned else None
return str(value)
params: dict[str, Any] = {
"q": user_query,
"format": "json",
"language": language or "",
"limit": max(1, min(top_k, 50)),
}
engines_param = _format_list(engines)
if engines_param:
params["engines"] = engines_param
categories_param = _format_list(categories)
if categories_param:
params["categories"] = categories_param
if safesearch_value is not None:
params["safesearch"] = safesearch_value
if not params.get("language"):
params.pop("language")
headers = {"Accept": "application/json"}
if api_key:
headers["X-API-KEY"] = api_key
searx_endpoint = urljoin(host if host.endswith("/") else f"{host}/", "search")
try:
async with httpx.AsyncClient(timeout=20.0, verify=verify_ssl) as client:
response = await client.get(
searx_endpoint,
params=params,
headers=headers,
)
response.raise_for_status()
except httpx.HTTPError as exc:
print(f"Error searching with SearxNG: {exc!s}")
return {
"id": 11,
"name": "SearxNG Search",
"type": "SEARXNG_API",
"sources": [],
}, []
try:
data = response.json()
except ValueError:
print("Failed to decode JSON response from SearxNG")
return {
"id": 11,
"name": "SearxNG Search",
"type": "SEARXNG_API",
"sources": [],
}, []
searx_results = data.get("results", [])
if not searx_results:
return {
"id": 11,
"name": "SearxNG Search",
"type": "SEARXNG_API",
"sources": [],
}, []
sources_list: list[dict[str, Any]] = []
documents: list[dict[str, Any]] = []
async with self.counter_lock:
for result in searx_results:
description = result.get("content") or result.get("snippet") or ""
if len(description) > 160:
description = f"{description[:157]}..."
source = {
"id": self.source_id_counter,
"title": result.get("title", "SearxNG Result"),
"description": description,
"url": result.get("url", ""),
}
sources_list.append(source)
metadata = {
"url": result.get("url", ""),
"engines": result.get("engines", []),
"category": result.get("category"),
"source": "SEARXNG_API",
}
document = {
"chunk_id": self.source_id_counter,
"content": description or result.get("content", ""),
"score": result.get("score", 0.0),
"document": {
"id": self.source_id_counter,
"title": result.get("title", "SearxNG Result"),
"document_type": "SEARXNG_API",
"metadata": metadata,
},
}
documents.append(document)
self.source_id_counter += 1
result_object = {
"id": 11,
"name": "SearxNG Search",
"type": "SEARXNG_API",
"sources": sources_list,
}
return result_object, documents
async def search_slack(
self,
user_query: str,

View file

@ -424,6 +424,22 @@ def validate_connector_config(
connector_rules = {
"SERPER_API": {"required": ["SERPER_API_KEY"], "validators": {}},
"TAVILY_API": {"required": ["TAVILY_API_KEY"], "validators": {}},
"SEARXNG_API": {
"required": ["SEARXNG_HOST"],
"optional": [
"SEARXNG_API_KEY",
"SEARXNG_ENGINES",
"SEARXNG_CATEGORIES",
"SEARXNG_LANGUAGE",
"SEARXNG_SAFESEARCH",
"SEARXNG_VERIFY_SSL",
],
"validators": {
"SEARXNG_HOST": lambda: validate_url_field(
"SEARXNG_HOST", "SearxNG"
)
},
},
"LINKUP_API": {"required": ["LINKUP_API_KEY"], "validators": {}},
"SLACK_CONNECTOR": {"required": ["SLACK_BOT_TOKEN"], "validators": {}},
"NOTION_CONNECTOR": {
@ -484,10 +500,21 @@ def validate_connector_config(
if not rules:
return config # Unknown connector type, pass through
# Validate required keys match exactly
if set(config.keys()) != set(rules["required"]):
required_keys = set(rules["required"])
optional_keys = set(rules.get("optional", []))
config_keys = set(config.keys())
# Validate that no unexpected keys are present
if not config_keys.issubset(required_keys | optional_keys):
allowed_keys = list(required_keys | optional_keys)
raise ValueError(
f"For {connector_type_str} connector type, config must only contain these keys: {rules['required']}"
f"For {connector_type_str} connector type, config may only contain these keys: {allowed_keys}"
)
# Validate that all required keys are present
if not required_keys.issubset(config_keys):
raise ValueError(
f"For {connector_type_str} connector type, config must include these keys: {sorted(required_keys)}"
)
# Apply custom validators first (these check format before emptiness)