From 020c5ed04eba9a185170fd0f525d998e4344691f Mon Sep 17 00:00:00 2001 From: Aki-07 Date: Sun, 12 Oct 2025 20:44:00 +0530 Subject: [PATCH] feat(back): add SearxNG search integration --- .../app/services/connector_service.py | 185 ++++++++++++++++++ 1 file changed, 185 insertions(+) diff --git a/surfsense_backend/app/services/connector_service.py b/surfsense_backend/app/services/connector_service.py index 83659a5c4..79061d409 100644 --- a/surfsense_backend/app/services/connector_service.py +++ b/surfsense_backend/app/services/connector_service.py @@ -1,6 +1,8 @@ import asyncio from typing import Any +from urllib.parse import urljoin +import httpx from linkup import LinkupClient from sqlalchemy import func from sqlalchemy.ext.asyncio import AsyncSession @@ -294,6 +296,189 @@ class ConnectorService: "sources": [], }, [] + async def search_searxng( + self, + user_query: str, + user_id: str, + search_space_id: int, + top_k: int = 20, + ) -> tuple: + """ + Search using a configured SearxNG instance and return both sources and documents. + """ + searx_connector = await self.get_connector_by_type( + user_id, SearchSourceConnectorType.SEARXNG_API, search_space_id + ) + + if not searx_connector: + return { + "id": 11, + "name": "SearxNG Search", + "type": "SEARXNG_API", + "sources": [], + }, [] + + config = searx_connector.config or {} + host = config.get("SEARXNG_HOST") + + if not host: + print("SearxNG connector is missing SEARXNG_HOST configuration") + return { + "id": 11, + "name": "SearxNG Search", + "type": "SEARXNG_API", + "sources": [], + }, [] + + api_key = config.get("SEARXNG_API_KEY") + engines = config.get("SEARXNG_ENGINES") + categories = config.get("SEARXNG_CATEGORIES") + language = config.get("SEARXNG_LANGUAGE") + safesearch = config.get("SEARXNG_SAFESEARCH") + + def _parse_bool(value: Any, default: bool = True) -> bool: + if isinstance(value, bool): + return value + if isinstance(value, str): + lowered = value.strip().lower() + if lowered in {"true", "1", "yes", "on"}: + return True + if lowered in {"false", "0", "no", "off"}: + return False + return default + + verify_ssl = _parse_bool(config.get("SEARXNG_VERIFY_SSL", True)) + + safesearch_value: int | None = None + if isinstance(safesearch, str): + safesearch_clean = safesearch.strip() + if safesearch_clean.isdigit(): + safesearch_value = int(safesearch_clean) + elif isinstance(safesearch, (int, float)): + safesearch_value = int(safesearch) + + def _format_list(value: Any) -> str | None: + if value is None: + return None + if isinstance(value, str): + value = value.strip() + return value or None + if isinstance(value, (list, tuple, set)): + cleaned = [str(item).strip() for item in value if str(item).strip()] + return ",".join(cleaned) if cleaned else None + return str(value) + + params: dict[str, Any] = { + "q": user_query, + "format": "json", + "language": language or "", + "limit": max(1, min(top_k, 50)), + } + + engines_param = _format_list(engines) + if engines_param: + params["engines"] = engines_param + + categories_param = _format_list(categories) + if categories_param: + params["categories"] = categories_param + + if safesearch_value is not None: + params["safesearch"] = safesearch_value + + if not params.get("language"): + params.pop("language") + + headers = {"Accept": "application/json"} + if api_key: + headers["Authorization"] = f"Bearer {api_key}" + + searx_endpoint = urljoin(host if host.endswith("/") else f"{host}/", "search") + + try: + async with httpx.AsyncClient(timeout=20.0, verify=verify_ssl) as client: + response = await client.get( + searx_endpoint, + params=params, + headers=headers, + ) + response.raise_for_status() + except httpx.HTTPError as exc: + print(f"Error searching with SearxNG: {exc!s}") + return { + "id": 11, + "name": "SearxNG Search", + "type": "SEARXNG_API", + "sources": [], + }, [] + + try: + data = response.json() + except ValueError: + print("Failed to decode JSON response from SearxNG") + return { + "id": 11, + "name": "SearxNG Search", + "type": "SEARXNG_API", + "sources": [], + }, [] + + searx_results = data.get("results", []) + if not searx_results: + return { + "id": 11, + "name": "SearxNG Search", + "type": "SEARXNG_API", + "sources": [], + }, [] + + sources_list: list[dict[str, Any]] = [] + documents: list[dict[str, Any]] = [] + + async with self.counter_lock: + for result in searx_results: + description = result.get("content") or result.get("snippet") or "" + if len(description) > 160: + description = f"{description[:157]}..." + + source = { + "id": self.source_id_counter, + "title": result.get("title", "SearxNG Result"), + "description": description, + "url": result.get("url", ""), + } + sources_list.append(source) + + metadata = { + "url": result.get("url", ""), + "engines": result.get("engines", []), + "category": result.get("category"), + "source": "SEARXNG_API", + } + + document = { + "chunk_id": self.source_id_counter, + "content": description or result.get("content", ""), + "score": result.get("score", 0.0), + "document": { + "id": self.source_id_counter, + "title": result.get("title", "SearxNG Result"), + "document_type": "SEARXNG_API", + "metadata": metadata, + }, + } + documents.append(document) + self.source_id_counter += 1 + + result_object = { + "id": 11, + "name": "SearxNG Search", + "type": "SEARXNG_API", + "sources": sources_list, + } + + return result_object, documents + # Initialize Tavily client with API key from connector config tavily_api_key = tavily_connector.config.get("TAVILY_API_KEY") tavily_client = TavilyClient(api_key=tavily_api_key)