feat(back): add SearxNG search integration

This commit is contained in:
Aki-07 2025-10-12 20:44:00 +05:30
parent d2ffd12480
commit 020c5ed04e

View file

@ -1,6 +1,8 @@
import asyncio
from typing import Any
from urllib.parse import urljoin
import httpx
from linkup import LinkupClient
from sqlalchemy import func
from sqlalchemy.ext.asyncio import AsyncSession
@ -294,6 +296,189 @@ class ConnectorService:
"sources": [],
}, []
async def search_searxng(
self,
user_query: str,
user_id: str,
search_space_id: int,
top_k: int = 20,
) -> tuple:
"""
Search using a configured SearxNG instance and return both sources and documents.
"""
searx_connector = await self.get_connector_by_type(
user_id, SearchSourceConnectorType.SEARXNG_API, search_space_id
)
if not searx_connector:
return {
"id": 11,
"name": "SearxNG Search",
"type": "SEARXNG_API",
"sources": [],
}, []
config = searx_connector.config or {}
host = config.get("SEARXNG_HOST")
if not host:
print("SearxNG connector is missing SEARXNG_HOST configuration")
return {
"id": 11,
"name": "SearxNG Search",
"type": "SEARXNG_API",
"sources": [],
}, []
api_key = config.get("SEARXNG_API_KEY")
engines = config.get("SEARXNG_ENGINES")
categories = config.get("SEARXNG_CATEGORIES")
language = config.get("SEARXNG_LANGUAGE")
safesearch = config.get("SEARXNG_SAFESEARCH")
def _parse_bool(value: Any, default: bool = True) -> bool:
if isinstance(value, bool):
return value
if isinstance(value, str):
lowered = value.strip().lower()
if lowered in {"true", "1", "yes", "on"}:
return True
if lowered in {"false", "0", "no", "off"}:
return False
return default
verify_ssl = _parse_bool(config.get("SEARXNG_VERIFY_SSL", True))
safesearch_value: int | None = None
if isinstance(safesearch, str):
safesearch_clean = safesearch.strip()
if safesearch_clean.isdigit():
safesearch_value = int(safesearch_clean)
elif isinstance(safesearch, (int, float)):
safesearch_value = int(safesearch)
def _format_list(value: Any) -> str | None:
if value is None:
return None
if isinstance(value, str):
value = value.strip()
return value or None
if isinstance(value, (list, tuple, set)):
cleaned = [str(item).strip() for item in value if str(item).strip()]
return ",".join(cleaned) if cleaned else None
return str(value)
params: dict[str, Any] = {
"q": user_query,
"format": "json",
"language": language or "",
"limit": max(1, min(top_k, 50)),
}
engines_param = _format_list(engines)
if engines_param:
params["engines"] = engines_param
categories_param = _format_list(categories)
if categories_param:
params["categories"] = categories_param
if safesearch_value is not None:
params["safesearch"] = safesearch_value
if not params.get("language"):
params.pop("language")
headers = {"Accept": "application/json"}
if api_key:
headers["Authorization"] = f"Bearer {api_key}"
searx_endpoint = urljoin(host if host.endswith("/") else f"{host}/", "search")
try:
async with httpx.AsyncClient(timeout=20.0, verify=verify_ssl) as client:
response = await client.get(
searx_endpoint,
params=params,
headers=headers,
)
response.raise_for_status()
except httpx.HTTPError as exc:
print(f"Error searching with SearxNG: {exc!s}")
return {
"id": 11,
"name": "SearxNG Search",
"type": "SEARXNG_API",
"sources": [],
}, []
try:
data = response.json()
except ValueError:
print("Failed to decode JSON response from SearxNG")
return {
"id": 11,
"name": "SearxNG Search",
"type": "SEARXNG_API",
"sources": [],
}, []
searx_results = data.get("results", [])
if not searx_results:
return {
"id": 11,
"name": "SearxNG Search",
"type": "SEARXNG_API",
"sources": [],
}, []
sources_list: list[dict[str, Any]] = []
documents: list[dict[str, Any]] = []
async with self.counter_lock:
for result in searx_results:
description = result.get("content") or result.get("snippet") or ""
if len(description) > 160:
description = f"{description[:157]}..."
source = {
"id": self.source_id_counter,
"title": result.get("title", "SearxNG Result"),
"description": description,
"url": result.get("url", ""),
}
sources_list.append(source)
metadata = {
"url": result.get("url", ""),
"engines": result.get("engines", []),
"category": result.get("category"),
"source": "SEARXNG_API",
}
document = {
"chunk_id": self.source_id_counter,
"content": description or result.get("content", ""),
"score": result.get("score", 0.0),
"document": {
"id": self.source_id_counter,
"title": result.get("title", "SearxNG Result"),
"document_type": "SEARXNG_API",
"metadata": metadata,
},
}
documents.append(document)
self.source_id_counter += 1
result_object = {
"id": 11,
"name": "SearxNG Search",
"type": "SEARXNG_API",
"sources": sources_list,
}
return result_object, documents
# Initialize Tavily client with API key from connector config
tavily_api_key = tavily_connector.config.get("TAVILY_API_KEY")
tavily_client = TavilyClient(api_key=tavily_api_key)