mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-04 20:05:16 +02:00
feat(back): add SearxNG search integration
This commit is contained in:
parent
d2ffd12480
commit
020c5ed04e
1 changed files with 185 additions and 0 deletions
|
|
@ -1,6 +1,8 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
|
import httpx
|
||||||
from linkup import LinkupClient
|
from linkup import LinkupClient
|
||||||
from sqlalchemy import func
|
from sqlalchemy import func
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
@ -294,6 +296,189 @@ class ConnectorService:
|
||||||
"sources": [],
|
"sources": [],
|
||||||
}, []
|
}, []
|
||||||
|
|
||||||
|
async def search_searxng(
|
||||||
|
self,
|
||||||
|
user_query: str,
|
||||||
|
user_id: str,
|
||||||
|
search_space_id: int,
|
||||||
|
top_k: int = 20,
|
||||||
|
) -> tuple:
|
||||||
|
"""
|
||||||
|
Search using a configured SearxNG instance and return both sources and documents.
|
||||||
|
"""
|
||||||
|
searx_connector = await self.get_connector_by_type(
|
||||||
|
user_id, SearchSourceConnectorType.SEARXNG_API, search_space_id
|
||||||
|
)
|
||||||
|
|
||||||
|
if not searx_connector:
|
||||||
|
return {
|
||||||
|
"id": 11,
|
||||||
|
"name": "SearxNG Search",
|
||||||
|
"type": "SEARXNG_API",
|
||||||
|
"sources": [],
|
||||||
|
}, []
|
||||||
|
|
||||||
|
config = searx_connector.config or {}
|
||||||
|
host = config.get("SEARXNG_HOST")
|
||||||
|
|
||||||
|
if not host:
|
||||||
|
print("SearxNG connector is missing SEARXNG_HOST configuration")
|
||||||
|
return {
|
||||||
|
"id": 11,
|
||||||
|
"name": "SearxNG Search",
|
||||||
|
"type": "SEARXNG_API",
|
||||||
|
"sources": [],
|
||||||
|
}, []
|
||||||
|
|
||||||
|
api_key = config.get("SEARXNG_API_KEY")
|
||||||
|
engines = config.get("SEARXNG_ENGINES")
|
||||||
|
categories = config.get("SEARXNG_CATEGORIES")
|
||||||
|
language = config.get("SEARXNG_LANGUAGE")
|
||||||
|
safesearch = config.get("SEARXNG_SAFESEARCH")
|
||||||
|
|
||||||
|
def _parse_bool(value: Any, default: bool = True) -> bool:
|
||||||
|
if isinstance(value, bool):
|
||||||
|
return value
|
||||||
|
if isinstance(value, str):
|
||||||
|
lowered = value.strip().lower()
|
||||||
|
if lowered in {"true", "1", "yes", "on"}:
|
||||||
|
return True
|
||||||
|
if lowered in {"false", "0", "no", "off"}:
|
||||||
|
return False
|
||||||
|
return default
|
||||||
|
|
||||||
|
verify_ssl = _parse_bool(config.get("SEARXNG_VERIFY_SSL", True))
|
||||||
|
|
||||||
|
safesearch_value: int | None = None
|
||||||
|
if isinstance(safesearch, str):
|
||||||
|
safesearch_clean = safesearch.strip()
|
||||||
|
if safesearch_clean.isdigit():
|
||||||
|
safesearch_value = int(safesearch_clean)
|
||||||
|
elif isinstance(safesearch, (int, float)):
|
||||||
|
safesearch_value = int(safesearch)
|
||||||
|
|
||||||
|
def _format_list(value: Any) -> str | None:
|
||||||
|
if value is None:
|
||||||
|
return None
|
||||||
|
if isinstance(value, str):
|
||||||
|
value = value.strip()
|
||||||
|
return value or None
|
||||||
|
if isinstance(value, (list, tuple, set)):
|
||||||
|
cleaned = [str(item).strip() for item in value if str(item).strip()]
|
||||||
|
return ",".join(cleaned) if cleaned else None
|
||||||
|
return str(value)
|
||||||
|
|
||||||
|
params: dict[str, Any] = {
|
||||||
|
"q": user_query,
|
||||||
|
"format": "json",
|
||||||
|
"language": language or "",
|
||||||
|
"limit": max(1, min(top_k, 50)),
|
||||||
|
}
|
||||||
|
|
||||||
|
engines_param = _format_list(engines)
|
||||||
|
if engines_param:
|
||||||
|
params["engines"] = engines_param
|
||||||
|
|
||||||
|
categories_param = _format_list(categories)
|
||||||
|
if categories_param:
|
||||||
|
params["categories"] = categories_param
|
||||||
|
|
||||||
|
if safesearch_value is not None:
|
||||||
|
params["safesearch"] = safesearch_value
|
||||||
|
|
||||||
|
if not params.get("language"):
|
||||||
|
params.pop("language")
|
||||||
|
|
||||||
|
headers = {"Accept": "application/json"}
|
||||||
|
if api_key:
|
||||||
|
headers["Authorization"] = f"Bearer {api_key}"
|
||||||
|
|
||||||
|
searx_endpoint = urljoin(host if host.endswith("/") else f"{host}/", "search")
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(timeout=20.0, verify=verify_ssl) as client:
|
||||||
|
response = await client.get(
|
||||||
|
searx_endpoint,
|
||||||
|
params=params,
|
||||||
|
headers=headers,
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
except httpx.HTTPError as exc:
|
||||||
|
print(f"Error searching with SearxNG: {exc!s}")
|
||||||
|
return {
|
||||||
|
"id": 11,
|
||||||
|
"name": "SearxNG Search",
|
||||||
|
"type": "SEARXNG_API",
|
||||||
|
"sources": [],
|
||||||
|
}, []
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = response.json()
|
||||||
|
except ValueError:
|
||||||
|
print("Failed to decode JSON response from SearxNG")
|
||||||
|
return {
|
||||||
|
"id": 11,
|
||||||
|
"name": "SearxNG Search",
|
||||||
|
"type": "SEARXNG_API",
|
||||||
|
"sources": [],
|
||||||
|
}, []
|
||||||
|
|
||||||
|
searx_results = data.get("results", [])
|
||||||
|
if not searx_results:
|
||||||
|
return {
|
||||||
|
"id": 11,
|
||||||
|
"name": "SearxNG Search",
|
||||||
|
"type": "SEARXNG_API",
|
||||||
|
"sources": [],
|
||||||
|
}, []
|
||||||
|
|
||||||
|
sources_list: list[dict[str, Any]] = []
|
||||||
|
documents: list[dict[str, Any]] = []
|
||||||
|
|
||||||
|
async with self.counter_lock:
|
||||||
|
for result in searx_results:
|
||||||
|
description = result.get("content") or result.get("snippet") or ""
|
||||||
|
if len(description) > 160:
|
||||||
|
description = f"{description[:157]}..."
|
||||||
|
|
||||||
|
source = {
|
||||||
|
"id": self.source_id_counter,
|
||||||
|
"title": result.get("title", "SearxNG Result"),
|
||||||
|
"description": description,
|
||||||
|
"url": result.get("url", ""),
|
||||||
|
}
|
||||||
|
sources_list.append(source)
|
||||||
|
|
||||||
|
metadata = {
|
||||||
|
"url": result.get("url", ""),
|
||||||
|
"engines": result.get("engines", []),
|
||||||
|
"category": result.get("category"),
|
||||||
|
"source": "SEARXNG_API",
|
||||||
|
}
|
||||||
|
|
||||||
|
document = {
|
||||||
|
"chunk_id": self.source_id_counter,
|
||||||
|
"content": description or result.get("content", ""),
|
||||||
|
"score": result.get("score", 0.0),
|
||||||
|
"document": {
|
||||||
|
"id": self.source_id_counter,
|
||||||
|
"title": result.get("title", "SearxNG Result"),
|
||||||
|
"document_type": "SEARXNG_API",
|
||||||
|
"metadata": metadata,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
documents.append(document)
|
||||||
|
self.source_id_counter += 1
|
||||||
|
|
||||||
|
result_object = {
|
||||||
|
"id": 11,
|
||||||
|
"name": "SearxNG Search",
|
||||||
|
"type": "SEARXNG_API",
|
||||||
|
"sources": sources_list,
|
||||||
|
}
|
||||||
|
|
||||||
|
return result_object, documents
|
||||||
|
|
||||||
# Initialize Tavily client with API key from connector config
|
# Initialize Tavily client with API key from connector config
|
||||||
tavily_api_key = tavily_connector.config.get("TAVILY_API_KEY")
|
tavily_api_key = tavily_connector.config.get("TAVILY_API_KEY")
|
||||||
tavily_client = TavilyClient(api_key=tavily_api_key)
|
tavily_client = TavilyClient(api_key=tavily_api_key)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue