diff --git a/docker/.env.example b/docker/.env.example index c31b87185..a226c2624 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -36,6 +36,7 @@ EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2 # BACKEND_PORT=8929 # FRONTEND_PORT=3929 # ELECTRIC_PORT=5929 +# SEARXNG_PORT=8888 # FLOWER_PORT=5555 # ============================================================================== @@ -199,6 +200,16 @@ STT_SERVICE=local/base # COMPOSIO_ENABLED=TRUE # COMPOSIO_REDIRECT_URI=http://localhost:8000/api/v1/auth/composio/connector/callback +# ------------------------------------------------------------------------------ +# SearXNG (bundled web search — works out of the box, no config needed) +# ------------------------------------------------------------------------------ +# SearXNG provides web search to all search spaces automatically. +# To access the SearXNG UI directly: http://localhost:8888 +# To disable the service entirely: docker compose up --scale searxng=0 +# To point at your own SearXNG instance instead of the bundled one: +# SEARXNG_DEFAULT_HOST=http://your-searxng:8080 +# SEARXNG_SECRET=surfsense-searxng-secret + # ------------------------------------------------------------------------------ # Daytona Sandbox (optional — cloud code execution for the deep agent) # ------------------------------------------------------------------------------ diff --git a/docker/docker-compose.dev.yml b/docker/docker-compose.dev.yml index 4d602f584..30f758b61 100644 --- a/docker/docker-compose.dev.yml +++ b/docker/docker-compose.dev.yml @@ -57,6 +57,23 @@ services: timeout: 5s retries: 5 + searxng: + image: searxng/searxng:2024.12.23 + ports: + - "${SEARXNG_PORT:-8888}:8080" + volumes: + - ./searxng/settings.yml:/etc/searxng/settings.yml:ro + - ./searxng/limiter.toml:/etc/searxng/limiter.toml:ro + - searxng_data:/etc/searxng + environment: + - SEARXNG_BASE_URL=http://localhost:${SEARXNG_PORT:-8888}/ + - SEARXNG_SECRET=${SEARXNG_SECRET:-surfsense-searxng-secret} + healthcheck: + test: ["CMD", "wget", "--spider", "-q", "http://localhost:8080/healthz"] + interval: 10s + timeout: 5s + retries: 5 + backend: build: ../surfsense_backend ports: @@ -81,6 +98,7 @@ services: - ELECTRIC_DB_PASSWORD=${ELECTRIC_DB_PASSWORD:-electric_password} - AUTH_TYPE=${AUTH_TYPE:-LOCAL} - NEXT_FRONTEND_URL=${NEXT_FRONTEND_URL:-http://localhost:3000} + - SEARXNG_DEFAULT_HOST=${SEARXNG_DEFAULT_HOST:-http://searxng:8080} # Daytona Sandbox – uncomment and set credentials to enable cloud code execution # - DAYTONA_SANDBOX_ENABLED=TRUE # - DAYTONA_API_KEY=${DAYTONA_API_KEY:-} @@ -92,6 +110,8 @@ services: condition: service_healthy redis: condition: service_healthy + searxng: + condition: service_healthy healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8000/health"] interval: 15s @@ -115,6 +135,7 @@ services: - PYTHONPATH=/app - ELECTRIC_DB_USER=${ELECTRIC_DB_USER:-electric} - ELECTRIC_DB_PASSWORD=${ELECTRIC_DB_PASSWORD:-electric_password} + - SEARXNG_DEFAULT_HOST=${SEARXNG_DEFAULT_HOST:-http://searxng:8080} - SERVICE_ROLE=worker depends_on: db: @@ -205,3 +226,5 @@ volumes: name: surfsense-dev-redis shared_temp: name: surfsense-dev-shared-temp + searxng_data: + name: surfsense-dev-searxng diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index ca20e3ed4..a86874b52 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -42,6 +42,24 @@ services: timeout: 5s retries: 5 + searxng: + image: searxng/searxng:2024.12.23 + volumes: + - ./searxng/settings.yml:/etc/searxng/settings.yml:ro + - ./searxng/limiter.toml:/etc/searxng/limiter.toml:ro + - searxng_data:/etc/searxng + environment: + SEARXNG_BASE_URL: http://localhost:${SEARXNG_PORT:-8888}/ + SEARXNG_SECRET: ${SEARXNG_SECRET:-surfsense-searxng-secret} + ports: + - "${SEARXNG_PORT:-8888}:8080" + restart: unless-stopped + healthcheck: + test: ["CMD", "wget", "--spider", "-q", "http://localhost:8080/healthz"] + interval: 10s + timeout: 5s + retries: 5 + backend: image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest} ports: @@ -62,6 +80,7 @@ services: ELECTRIC_DB_USER: ${ELECTRIC_DB_USER:-electric} ELECTRIC_DB_PASSWORD: ${ELECTRIC_DB_PASSWORD:-electric_password} NEXT_FRONTEND_URL: ${NEXT_FRONTEND_URL:-http://localhost:${FRONTEND_PORT:-3929}} + SEARXNG_DEFAULT_HOST: ${SEARXNG_DEFAULT_HOST:-http://searxng:8080} # Daytona Sandbox – uncomment and set credentials to enable cloud code execution # DAYTONA_SANDBOX_ENABLED: "TRUE" # DAYTONA_API_KEY: ${DAYTONA_API_KEY:-} @@ -75,6 +94,8 @@ services: condition: service_healthy redis: condition: service_healthy + searxng: + condition: service_healthy restart: unless-stopped healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8000/health"] @@ -98,6 +119,7 @@ services: PYTHONPATH: /app ELECTRIC_DB_USER: ${ELECTRIC_DB_USER:-electric} ELECTRIC_DB_PASSWORD: ${ELECTRIC_DB_PASSWORD:-electric_password} + SEARXNG_DEFAULT_HOST: ${SEARXNG_DEFAULT_HOST:-http://searxng:8080} SERVICE_ROLE: worker depends_on: db: @@ -193,3 +215,5 @@ volumes: name: surfsense-redis shared_temp: name: surfsense-shared-temp + searxng_data: + name: surfsense-searxng diff --git a/docker/searxng/limiter.toml b/docker/searxng/limiter.toml new file mode 100644 index 000000000..dce84146f --- /dev/null +++ b/docker/searxng/limiter.toml @@ -0,0 +1,5 @@ +[botdetection.ip_limit] +link_token = false + +[botdetection.ip_lists] +pass_ip = ["0.0.0.0/0"] diff --git a/docker/searxng/settings.yml b/docker/searxng/settings.yml new file mode 100644 index 000000000..0c41f482c --- /dev/null +++ b/docker/searxng/settings.yml @@ -0,0 +1,60 @@ +use_default_settings: true + +server: + secret_key: "override-me-via-env" + limiter: false + image_proxy: false + method: "GET" + default_http_headers: + X-Robots-Tag: "noindex, nofollow" + +search: + formats: + - html + - json + default_lang: "auto" + autocomplete: "" + safe_search: 0 + +ui: + static_use_hash: true + +outgoing: + request_timeout: 12.0 + pool_connections: 100 + pool_maxsize: 20 + enable_http2: true + +engines: + - name: google + disabled: false + weight: 1.2 + - name: duckduckgo + disabled: false + weight: 1.1 + - name: brave + disabled: false + weight: 1.0 + - name: qwant + disabled: false + weight: 0.9 + - name: mojeek + disabled: false + weight: 0.7 + - name: wikipedia + disabled: false + weight: 0.8 + - name: wikidata + disabled: false + weight: 0.6 + - name: currency_convert + disabled: false + - name: duckduckgo definitions + disabled: false + - name: stackoverflow + disabled: false + weight: 0.7 + - name: bing + disabled: true + - name: yahoo + disabled: true diff --git a/surfsense_backend/alembic/versions/106_add_platform_web_search.py b/surfsense_backend/alembic/versions/106_add_platform_web_search.py new file mode 100644 index 000000000..e4ba59cbd --- /dev/null +++ b/surfsense_backend/alembic/versions/106_add_platform_web_search.py @@ -0,0 +1,51 @@ +"""106_add_platform_web_search + +Revision ID: 106 +Revises: 105 +Create Date: 2026-03-14 + +Adds web_search_enabled and web_search_config columns to searchspaces for +per-space control over the platform web search capability. + +Also removes legacy SEARXNG_API connector rows — web search is now a platform +service, not a per-user connector. +""" + +from __future__ import annotations + +from collections.abc import Sequence + +import sqlalchemy as sa +from sqlalchemy.dialects.postgresql import JSONB + +from alembic import op + +revision: str = "106" +down_revision: str | None = "105" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + op.add_column( + "searchspaces", + sa.Column( + "web_search_enabled", + sa.Boolean(), + nullable=False, + server_default=sa.text("true"), + ), + ) + op.add_column( + "searchspaces", + sa.Column("web_search_config", JSONB, nullable=True), + ) + + op.execute( + "DELETE FROM search_source_connectors WHERE connector_type = 'SEARXNG_API'" + ) + + +def downgrade() -> None: + op.drop_column("searchspaces", "web_search_config") + op.drop_column("searchspaces", "web_search_enabled") diff --git a/surfsense_backend/app/agents/new_chat/chat_deepagent.py b/surfsense_backend/app/agents/new_chat/chat_deepagent.py index f3d988e5b..1dcc1d393 100644 --- a/surfsense_backend/app/agents/new_chat/chat_deepagent.py +++ b/surfsense_backend/app/agents/new_chat/chat_deepagent.py @@ -233,6 +233,28 @@ async def create_surfsense_deep_agent( available_document_types = await connector_service.get_available_document_types( search_space_id ) + + # Platform web search: inject SEARXNG_API when the service is available + # and the search space hasn't disabled web search. + from app.db import SearchSpace + from app.services import web_search_service + + _LIVE_SEARCH_CONNECTORS = {"TAVILY_API", "SEARXNG_API", "LINKUP_API", "BAIDU_SEARCH_API"} + + space = await db_session.get(SearchSpace, search_space_id) + web_search_enabled = space.web_search_enabled if space else True + + if web_search_enabled and web_search_service.is_available(): + if available_connectors is None: + available_connectors = list(_ALWAYS_AVAILABLE_DOC_TYPES) + if "SEARXNG_API" not in available_connectors: + available_connectors.append("SEARXNG_API") + + if not web_search_enabled and available_connectors: + available_connectors = [ + c for c in available_connectors if c not in _LIVE_SEARCH_CONNECTORS + ] + except Exception as e: logging.warning(f"Failed to discover available connectors/document types: {e}") _perf_log.info( diff --git a/surfsense_backend/app/agents/new_chat/system_prompt.py b/surfsense_backend/app/agents/new_chat/system_prompt.py index b042f75c3..30fe62dd8 100644 --- a/surfsense_backend/app/agents/new_chat/system_prompt.py +++ b/surfsense_backend/app/agents/new_chat/system_prompt.py @@ -101,8 +101,9 @@ _TOOL_INSTRUCTIONS["search_knowledge_base"] = """ including calendar apps, note-taking apps (Obsidian, Notion), chat apps (Slack, Discord), and more. - IMPORTANT (REAL-TIME / PUBLIC WEB QUERIES): For questions that require current public web data (e.g., live exchange rates, stock prices, breaking news, weather, current events), you MUST call - `search_knowledge_base` using live web connectors via `connectors_to_search`: - ["LINKUP_API", "TAVILY_API", "SEARXNG_API", "BAIDU_SEARCH_API"]. + `search_knowledge_base` using live web connectors via `connectors_to_search`. + Use whichever of these live connectors are available: ["LINKUP_API", "TAVILY_API", "SEARXNG_API", "BAIDU_SEARCH_API"]. + Only connectors listed in the tool's available connector enums section will actually return results. - For these real-time/public web queries, DO NOT answer from memory and DO NOT say you lack internet access before attempting a live connector search. - If the live connectors return no relevant results, explain that live web sources did not return enough diff --git a/surfsense_backend/app/app.py b/surfsense_backend/app/app.py index e6db5670e..9bd266d39 100644 --- a/surfsense_backend/app/app.py +++ b/surfsense_backend/app/app.py @@ -513,6 +513,15 @@ async def health_check(): return {"status": "ok"} +@app.get("/api/v1/platform/web-search/health", tags=["platform"]) +@limiter.exempt +async def web_search_health(user: User = Depends(current_active_user)): + """Return the health status of the platform web search (SearXNG) service.""" + from app.services import web_search_service + + return await web_search_service.health_check() + + @app.get("/verify-token") async def authenticated_route( user: User = Depends(current_active_user), diff --git a/surfsense_backend/app/config/__init__.py b/surfsense_backend/app/config/__init__.py index aaf77a54f..186936325 100644 --- a/surfsense_backend/app/config/__init__.py +++ b/surfsense_backend/app/config/__init__.py @@ -224,6 +224,9 @@ class Config: os.getenv("CONNECTOR_INDEXING_LOCK_TTL_SECONDS", str(8 * 60 * 60)) ) + # Platform web search (SearXNG) + SEARXNG_DEFAULT_HOST = os.getenv("SEARXNG_DEFAULT_HOST") + NEXT_FRONTEND_URL = os.getenv("NEXT_FRONTEND_URL") # Backend URL to override the http to https in the OAuth redirect URI BACKEND_URL = os.getenv("BACKEND_URL") diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index 062b11b3a..ac6ad549c 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -1204,6 +1204,12 @@ class SearchSpace(BaseModel, TimestampMixin): Integer, nullable=True, default=0 ) # For image generation, defaults to Auto mode + # Platform web search capability (opt-out via toggle) + web_search_enabled = Column( + Boolean, nullable=False, default=True, server_default=text("true") + ) + web_search_config = Column(JSONB, nullable=True) + user_id = Column( UUID(as_uuid=True), ForeignKey("user.id", ondelete="CASCADE"), nullable=False ) diff --git a/surfsense_backend/app/routes/search_spaces_routes.py b/surfsense_backend/app/routes/search_spaces_routes.py index 7f6638e2c..3eaa985f0 100644 --- a/surfsense_backend/app/routes/search_spaces_routes.py +++ b/surfsense_backend/app/routes/search_spaces_routes.py @@ -180,6 +180,8 @@ async def read_search_spaces( user_id=space.user_id, citations_enabled=space.citations_enabled, qna_custom_instructions=space.qna_custom_instructions, + web_search_enabled=space.web_search_enabled, + web_search_config=space.web_search_config, member_count=member_count, is_owner=is_owner, ) diff --git a/surfsense_backend/app/schemas/search_space.py b/surfsense_backend/app/schemas/search_space.py index 729ff4e7d..23640b2d8 100644 --- a/surfsense_backend/app/schemas/search_space.py +++ b/surfsense_backend/app/schemas/search_space.py @@ -1,5 +1,6 @@ import uuid from datetime import datetime +from typing import Any from pydantic import BaseModel, ConfigDict @@ -12,26 +13,29 @@ class SearchSpaceBase(BaseModel): class SearchSpaceCreate(SearchSpaceBase): - # Optional on create, will use defaults if not provided citations_enabled: bool = True qna_custom_instructions: str | None = None + web_search_enabled: bool = True + web_search_config: dict[str, Any] | None = None class SearchSpaceUpdate(BaseModel): - # All fields optional on update - only send what you want to change name: str | None = None description: str | None = None citations_enabled: bool | None = None qna_custom_instructions: str | None = None + web_search_enabled: bool | None = None + web_search_config: dict[str, Any] | None = None class SearchSpaceRead(SearchSpaceBase, IDModel, TimestampModel): id: int created_at: datetime user_id: uuid.UUID - # QnA configuration citations_enabled: bool qna_custom_instructions: str | None = None + web_search_enabled: bool + web_search_config: dict[str, Any] | None = None model_config = ConfigDict(from_attributes=True) diff --git a/surfsense_backend/app/services/connector_service.py b/surfsense_backend/app/services/connector_service.py index 870e175d3..f065a24eb 100644 --- a/surfsense_backend/app/services/connector_service.py +++ b/surfsense_backend/app/services/connector_service.py @@ -16,6 +16,7 @@ from app.db import ( Document, SearchSourceConnector, SearchSourceConnectorType, + SearchSpace, async_session_maker, ) from app.retriever.chunks_hybrid_search import ChucksHybridSearchRetriever @@ -577,185 +578,44 @@ class ConnectorService: search_space_id: int, top_k: int = 20, ) -> tuple: + """Search using the platform SearXNG instance. + + Delegates to ``WebSearchService`` which handles caching, circuit + breaking, and retries. Per-search-space overrides are read from the + ``SearchSpace.web_search_config`` JSONB column. """ - Search using a configured SearxNG instance and return both sources and documents. - """ - searx_connector = await self.get_connector_by_type( - SearchSourceConnectorType.SEARXNG_API, search_space_id + from app.services import web_search_service + + if not web_search_service.is_available(): + return { + "id": 11, + "name": "Web Search", + "type": "SEARXNG_API", + "sources": [], + }, [] + + # Fetch optional per-space overrides + engines: str | None = None + language: str | None = None + safesearch: int | None = None + + space = await self.session.get(SearchSpace, search_space_id) + if space and space.web_search_config: + cfg = space.web_search_config + engines = cfg.get("engines") + language = cfg.get("language") + raw_ss = cfg.get("safesearch") + if isinstance(raw_ss, int) and 0 <= raw_ss <= 2: + safesearch = raw_ss + + return await web_search_service.search( + query=user_query, + top_k=top_k, + engines=engines, + language=language, + safesearch=safesearch, ) - if not searx_connector: - return { - "id": 11, - "name": "SearxNG Search", - "type": "SEARXNG_API", - "sources": [], - }, [] - - config = searx_connector.config or {} - host = config.get("SEARXNG_HOST") - - if not host: - print("SearxNG connector is missing SEARXNG_HOST configuration") - return { - "id": 11, - "name": "SearxNG Search", - "type": "SEARXNG_API", - "sources": [], - }, [] - - api_key = config.get("SEARXNG_API_KEY") - engines = config.get("SEARXNG_ENGINES") - categories = config.get("SEARXNG_CATEGORIES") - language = config.get("SEARXNG_LANGUAGE") - safesearch = config.get("SEARXNG_SAFESEARCH") - - def _parse_bool(value: Any, default: bool = True) -> bool: - if isinstance(value, bool): - return value - if isinstance(value, str): - lowered = value.strip().lower() - if lowered in {"true", "1", "yes", "on"}: - return True - if lowered in {"false", "0", "no", "off"}: - return False - return default - - verify_ssl = _parse_bool(config.get("SEARXNG_VERIFY_SSL", True)) - - safesearch_value: int | None = None - if isinstance(safesearch, str): - safesearch_clean = safesearch.strip() - if safesearch_clean.isdigit(): - safesearch_value = int(safesearch_clean) - elif isinstance(safesearch, int | float): - safesearch_value = int(safesearch) - - if safesearch_value is not None and not (0 <= safesearch_value <= 2): - safesearch_value = None - - def _format_list(value: Any) -> str | None: - if value is None: - return None - if isinstance(value, str): - value = value.strip() - return value or None - if isinstance(value, list | tuple | set): - cleaned = [str(item).strip() for item in value if str(item).strip()] - return ",".join(cleaned) if cleaned else None - return str(value) - - params: dict[str, Any] = { - "q": user_query, - "format": "json", - "language": language or "", - "limit": max(1, min(top_k, 50)), - } - - engines_param = _format_list(engines) - if engines_param: - params["engines"] = engines_param - - categories_param = _format_list(categories) - if categories_param: - params["categories"] = categories_param - - if safesearch_value is not None: - params["safesearch"] = safesearch_value - - if not params.get("language"): - params.pop("language") - - headers = {"Accept": "application/json"} - if api_key: - headers["X-API-KEY"] = api_key - - searx_endpoint = urljoin(host if host.endswith("/") else f"{host}/", "search") - - try: - async with httpx.AsyncClient(timeout=20.0, verify=verify_ssl) as client: - response = await client.get( - searx_endpoint, - params=params, - headers=headers, - ) - response.raise_for_status() - except httpx.HTTPError as exc: - print(f"Error searching with SearxNG: {exc!s}") - return { - "id": 11, - "name": "SearxNG Search", - "type": "SEARXNG_API", - "sources": [], - }, [] - - try: - data = response.json() - except ValueError: - print("Failed to decode JSON response from SearxNG") - return { - "id": 11, - "name": "SearxNG Search", - "type": "SEARXNG_API", - "sources": [], - }, [] - - searx_results = data.get("results", []) - if not searx_results: - return { - "id": 11, - "name": "SearxNG Search", - "type": "SEARXNG_API", - "sources": [], - }, [] - - sources_list: list[dict[str, Any]] = [] - documents: list[dict[str, Any]] = [] - - async with self.counter_lock: - for result in searx_results: - description = result.get("content") or result.get("snippet") or "" - if len(description) > 160: - description = f"{description}" - - source = { - "id": self.source_id_counter, - "title": result.get("title", "SearxNG Result"), - "description": description, - "url": result.get("url", ""), - } - sources_list.append(source) - - metadata = { - "url": result.get("url", ""), - "engines": result.get("engines", []), - "category": result.get("category"), - "source": "SEARXNG_API", - } - - document = { - "chunk_id": self.source_id_counter, - "content": description or result.get("content", ""), - "score": result.get("score", 0.0), - "document": { - "id": self.source_id_counter, - "title": result.get("title", "SearxNG Result"), - "document_type": "SEARXNG_API", - "metadata": metadata, - }, - } - documents.append(document) - self.source_id_counter += 1 - - result_object = { - "id": 11, - "name": "SearxNG Search", - "type": "SEARXNG_API", - "sources": sources_list, - } - - return result_object, documents - async def search_baidu( self, user_query: str, diff --git a/surfsense_backend/app/services/web_search_service.py b/surfsense_backend/app/services/web_search_service.py new file mode 100644 index 000000000..70da23333 --- /dev/null +++ b/surfsense_backend/app/services/web_search_service.py @@ -0,0 +1,292 @@ +""" +Platform-level web search service backed by SearXNG. + +Provides caching via Redis, a circuit breaker for resilience, and a health +check endpoint. Configuration is read from environment variables rather +than per-search-space database rows — this service is a platform capability +that is always available when ``SEARXNG_DEFAULT_HOST`` is set. +""" + +from __future__ import annotations + +import hashlib +import json +import logging +import time +from typing import Any +from urllib.parse import urljoin + +import httpx +import redis + +from app.config import config + +logger = logging.getLogger(__name__) + +_EMPTY_RESULT: dict[str, Any] = { + "id": 11, + "name": "Web Search", + "type": "SEARXNG_API", + "sources": [], +} + +# --------------------------------------------------------------------------- +# Redis helpers +# --------------------------------------------------------------------------- + +_redis_client: redis.Redis | None = None + + +def _get_redis() -> redis.Redis: + global _redis_client + if _redis_client is None: + _redis_client = redis.from_url(config.REDIS_APP_URL, decode_responses=True) + return _redis_client + + +# --------------------------------------------------------------------------- +# Circuit Breaker +# --------------------------------------------------------------------------- + +_CB_FAILURES_KEY = "websearch:circuit:failures" +_CB_OPEN_KEY = "websearch:circuit:open" +_CB_FAILURE_THRESHOLD = 5 +_CB_FAILURE_WINDOW_SECONDS = 60 +_CB_COOLDOWN_SECONDS = 30 + + +def _circuit_is_open() -> bool: + try: + return _get_redis().exists(_CB_OPEN_KEY) == 1 + except redis.RedisError: + return False + + +def _record_failure() -> None: + try: + r = _get_redis() + pipe = r.pipeline() + pipe.incr(_CB_FAILURES_KEY) + pipe.expire(_CB_FAILURES_KEY, _CB_FAILURE_WINDOW_SECONDS) + pipe.execute() + + failures = int(r.get(_CB_FAILURES_KEY) or 0) + if failures >= _CB_FAILURE_THRESHOLD: + r.setex(_CB_OPEN_KEY, _CB_COOLDOWN_SECONDS, "1") + logger.warning( + "Circuit breaker OPENED after %d failures — " + "SearXNG calls paused for %ds", + failures, + _CB_COOLDOWN_SECONDS, + ) + except redis.RedisError: + pass + + +def _record_success() -> None: + try: + r = _get_redis() + r.delete(_CB_FAILURES_KEY, _CB_OPEN_KEY) + except redis.RedisError: + pass + + +# --------------------------------------------------------------------------- +# Result Caching +# --------------------------------------------------------------------------- + +_CACHE_TTL_SECONDS = 300 # 5 minutes +_CACHE_PREFIX = "websearch:cache:" + + +def _cache_key(query: str, engines: str | None, language: str | None) -> str: + raw = f"{query}|{engines or ''}|{language or ''}" + digest = hashlib.sha256(raw.encode()).hexdigest()[:24] + return f"{_CACHE_PREFIX}{digest}" + + +def _cache_get(key: str) -> dict | None: + try: + data = _get_redis().get(key) + if data: + return json.loads(data) + except (redis.RedisError, json.JSONDecodeError): + pass + return None + + +def _cache_set(key: str, value: dict) -> None: + try: + _get_redis().setex(key, _CACHE_TTL_SECONDS, json.dumps(value)) + except redis.RedisError: + pass + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + + +def is_available() -> bool: + """Return ``True`` when the platform SearXNG host is configured.""" + return bool(config.SEARXNG_DEFAULT_HOST) + + +async def health_check() -> dict[str, Any]: + """Ping the SearXNG ``/healthz`` endpoint and return status info.""" + host = config.SEARXNG_DEFAULT_HOST + if not host: + return {"status": "unavailable", "error": "SEARXNG_DEFAULT_HOST not set"} + + healthz_url = urljoin(host if host.endswith("/") else f"{host}/", "healthz") + t0 = time.perf_counter() + try: + async with httpx.AsyncClient(timeout=5.0, verify=False) as client: + resp = await client.get(healthz_url) + resp.raise_for_status() + elapsed_ms = round((time.perf_counter() - t0) * 1000) + return { + "status": "healthy", + "response_time_ms": elapsed_ms, + "circuit_breaker": "open" if _circuit_is_open() else "closed", + } + except Exception as exc: + elapsed_ms = round((time.perf_counter() - t0) * 1000) + return { + "status": "unhealthy", + "error": str(exc), + "response_time_ms": elapsed_ms, + "circuit_breaker": "open" if _circuit_is_open() else "closed", + } + + +async def search( + query: str, + top_k: int = 20, + *, + engines: str | None = None, + language: str | None = None, + safesearch: int | None = None, +) -> tuple[dict[str, Any], list[dict[str, Any]]]: + """Execute a web search against the platform SearXNG instance. + + Returns the standard ``(result_object, documents)`` tuple expected by + ``ConnectorService.search_searxng``. + """ + host = config.SEARXNG_DEFAULT_HOST + if not host: + return dict(_EMPTY_RESULT), [] + + # --- Circuit breaker --- + if _circuit_is_open(): + logger.info("Web search skipped — circuit breaker is open") + result = dict(_EMPTY_RESULT) + result["error"] = "Web search temporarily unavailable (circuit open)" + result["status"] = "degraded" + return result, [] + + # --- Cache lookup --- + ck = _cache_key(query, engines, language) + cached = _cache_get(ck) + if cached is not None: + logger.debug("Web search cache HIT for query=%r", query[:60]) + return cached["result"], cached["documents"] + + # --- Build request --- + params: dict[str, Any] = { + "q": query, + "format": "json", + "limit": max(1, min(top_k, 50)), + } + if engines: + params["engines"] = engines + if language: + params["language"] = language + if safesearch is not None and 0 <= safesearch <= 2: + params["safesearch"] = safesearch + + searx_endpoint = urljoin(host if host.endswith("/") else f"{host}/", "search") + headers = {"Accept": "application/json"} + + # --- HTTP call with one retry on transient errors --- + data: dict[str, Any] | None = None + last_error: Exception | None = None + + for attempt in range(2): + try: + async with httpx.AsyncClient(timeout=15.0, verify=False) as client: + response = await client.get( + searx_endpoint, params=params, headers=headers, + ) + response.raise_for_status() + data = response.json() + break + except (httpx.HTTPStatusError, httpx.TimeoutException) as exc: + last_error = exc + if attempt == 0 and ( + isinstance(exc, httpx.TimeoutException) + or (isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code >= 500) + ): + continue + break + except httpx.HTTPError as exc: + last_error = exc + break + except ValueError as exc: + last_error = exc + break + + if data is None: + _record_failure() + logger.warning("Web search failed after retries: %s", last_error) + return dict(_EMPTY_RESULT), [] + + _record_success() + + searx_results = data.get("results", []) + if not searx_results: + return dict(_EMPTY_RESULT), [] + + # --- Format results --- + sources_list: list[dict[str, Any]] = [] + documents: list[dict[str, Any]] = [] + + for idx, result in enumerate(searx_results): + source_id = 200_000 + idx + description = result.get("content") or result.get("snippet") or "" + + sources_list.append({ + "id": source_id, + "title": result.get("title", "Web Search Result"), + "description": description, + "url": result.get("url", ""), + }) + + documents.append({ + "chunk_id": source_id, + "content": description or result.get("content", ""), + "score": result.get("score", 0.0), + "document": { + "id": source_id, + "title": result.get("title", "Web Search Result"), + "document_type": "SEARXNG_API", + "metadata": { + "url": result.get("url", ""), + "engines": result.get("engines", []), + "category": result.get("category"), + "source": "SEARXNG_API", + }, + }, + }) + + result_object: dict[str, Any] = { + "id": 11, + "name": "Web Search", + "type": "SEARXNG_API", + "sources": sources_list, + } + + # --- Cache store --- + _cache_set(ck, {"result": result_object, "documents": documents}) + + return result_object, documents diff --git a/surfsense_web/app/dashboard/[search_space_id]/settings/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/settings/page.tsx index e385e3983..ea82b29d7 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/settings/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/settings/page.tsx @@ -1,6 +1,6 @@ "use client"; -import { Bot, Brain, FileText, Globe, ImageIcon, MessageSquare, Shield } from "lucide-react"; +import { Bot, Brain, FileText, Globe, ImageIcon, MessageSquare, Search, Shield } from "lucide-react"; import { useParams, useRouter, useSearchParams } from "next/navigation"; import { useTranslations } from "next-intl"; import { useCallback, useEffect } from "react"; @@ -11,6 +11,7 @@ import { LLMRoleManager } from "@/components/settings/llm-role-manager"; import { ModelConfigManager } from "@/components/settings/model-config-manager"; import { PromptConfigManager } from "@/components/settings/prompt-config-manager"; import { RolesManager } from "@/components/settings/roles-manager"; +import { WebSearchSettingsManager } from "@/components/settings/web-search-settings-manager"; import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/animated-tabs"; import { trackSettingsViewed } from "@/lib/posthog/events"; @@ -20,6 +21,7 @@ const VALID_TABS = [ "roles", "image-models", "prompts", + "web-search", "public-links", "team-roles", ] as const; @@ -80,6 +82,10 @@ export default function SettingsPage() { {t("nav_system_instructions")} + + + {t("nav_web_search")} + {t("nav_public_links")} @@ -100,6 +106,9 @@ export default function SettingsPage() { + + + diff --git a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/connector-benefits.ts b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/connector-benefits.ts index 392de4bc8..0dc093100 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/connector-benefits.ts +++ b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/connector-benefits.ts @@ -23,11 +23,6 @@ export function getConnectorBenefits(connectorType: string): string[] | null { "Real-time information from the web", "Enhanced search capabilities for your projects", ], - SEARXNG_API: [ - "Privacy-focused meta-search across multiple engines", - "Self-hosted search instance for full control", - "Real-time web search results from multiple sources", - ], LINKUP_API: [ "AI-powered search results tailored to your queries", "Real-time information from the web", diff --git a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/index.tsx b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/index.tsx index ffaeb1478..37d4ad5d8 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/index.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/index.tsx @@ -8,7 +8,6 @@ import { LinkupApiConnectForm } from "./components/linkup-api-connect-form"; import { LumaConnectForm } from "./components/luma-connect-form"; import { MCPConnectForm } from "./components/mcp-connect-form"; import { ObsidianConnectForm } from "./components/obsidian-connect-form"; -import { SearxngConnectForm } from "./components/searxng-connect-form"; import { TavilyApiConnectForm } from "./components/tavily-api-connect-form"; export interface ConnectFormProps { @@ -41,8 +40,6 @@ export function getConnectFormComponent(connectorType: string): ConnectFormCompo switch (connectorType) { case "TAVILY_API": return TavilyApiConnectForm; - case "SEARXNG_API": - return SearxngConnectForm; case "LINKUP_API": return LinkupApiConnectForm; case "BAIDU_SEARCH_API": diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/index.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/index.tsx index 6b4d86b5a..cef0c99ac 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/index.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/index.tsx @@ -19,7 +19,6 @@ import { LinkupApiConfig } from "./components/linkup-api-config"; import { LumaConfig } from "./components/luma-config"; import { MCPConfig } from "./components/mcp-config"; import { ObsidianConfig } from "./components/obsidian-config"; -import { SearxngConfig } from "./components/searxng-config"; import { SlackConfig } from "./components/slack-config"; import { TavilyApiConfig } from "./components/tavily-api-config"; import { TeamsConfig } from "./components/teams-config"; @@ -45,8 +44,6 @@ export function getConnectorConfigComponent( return GoogleDriveConfig; case "TAVILY_API": return TavilyApiConfig; - case "SEARXNG_API": - return SearxngConfig; case "LINKUP_API": return LinkupApiConfig; case "BAIDU_SEARCH_API": diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-connect-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-connect-view.tsx index 423819363..596b98e93 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-connect-view.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-connect-view.tsx @@ -11,7 +11,6 @@ import { getConnectFormComponent } from "../../connect-forms"; const FORM_ID_MAP: Record = { TAVILY_API: "tavily-connect-form", - SEARXNG_API: "searxng-connect-form", LINKUP_API: "linkup-api-connect-form", BAIDU_SEARCH_API: "baidu-search-api-connect-form", ELASTICSEARCH_CONNECTOR: "elasticsearch-connect-form", diff --git a/surfsense_web/components/assistant-ui/connector-popup/constants/connector-constants.ts b/surfsense_web/components/assistant-ui/connector-popup/constants/connector-constants.ts index 9889708d7..a16989747 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/constants/connector-constants.ts +++ b/surfsense_web/components/assistant-ui/connector-popup/constants/connector-constants.ts @@ -136,12 +136,6 @@ export const OTHER_CONNECTORS = [ description: "Search with Tavily", connectorType: EnumConnectorName.TAVILY_API, }, - { - id: "searxng", - title: "SearxNG", - description: "Search with SearxNG", - connectorType: EnumConnectorName.SEARXNG_API, - }, { id: "linkup-api", title: "Linkup API", diff --git a/surfsense_web/components/settings/web-search-settings-manager.tsx b/surfsense_web/components/settings/web-search-settings-manager.tsx new file mode 100644 index 000000000..d819eece9 --- /dev/null +++ b/surfsense_web/components/settings/web-search-settings-manager.tsx @@ -0,0 +1,258 @@ +"use client"; + +import { useQuery } from "@tanstack/react-query"; +import { useAtomValue } from "jotai"; +import { Globe, Loader2, Save } from "lucide-react"; +import { useTranslations } from "next-intl"; +import { useCallback, useEffect, useState } from "react"; +import { toast } from "sonner"; +import { updateSearchSpaceMutationAtom } from "@/atoms/search-spaces/search-space-mutation.atoms"; +import { Alert, AlertDescription } from "@/components/ui/alert"; +import { Button } from "@/components/ui/button"; +import { + Card, + CardContent, + CardDescription, + CardHeader, + CardTitle, +} from "@/components/ui/card"; +import { Input } from "@/components/ui/input"; +import { Label } from "@/components/ui/label"; +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from "@/components/ui/select"; +import { Skeleton } from "@/components/ui/skeleton"; +import { Switch } from "@/components/ui/switch"; +import { searchSpacesApiService } from "@/lib/apis/search-spaces-api.service"; +import { baseApiService } from "@/lib/apis/base-api.service"; +import { cacheKeys } from "@/lib/query-client/cache-keys"; + +interface WebSearchSettingsManagerProps { + searchSpaceId: number; +} + +interface HealthStatus { + status: string; + response_time_ms?: number; + error?: string; + circuit_breaker?: string; +} + +export function WebSearchSettingsManager({ searchSpaceId }: WebSearchSettingsManagerProps) { + const t = useTranslations("searchSpaceSettings"); + const { + data: searchSpace, + isLoading, + refetch, + } = useQuery({ + queryKey: cacheKeys.searchSpaces.detail(searchSpaceId.toString()), + queryFn: () => searchSpacesApiService.getSearchSpace({ id: searchSpaceId }), + enabled: !!searchSpaceId, + }); + + const { data: healthData } = useQuery({ + queryKey: ["web-search-health"], + queryFn: async () => { + const response = await baseApiService.get("/api/v1/platform/web-search/health"); + return response as HealthStatus; + }, + refetchInterval: 30000, + staleTime: 10000, + }); + + const { mutateAsync: updateSearchSpace } = useAtomValue(updateSearchSpaceMutationAtom); + + const [enabled, setEnabled] = useState(true); + const [engines, setEngines] = useState(""); + const [language, setLanguage] = useState(""); + const [safesearch, setSafesearch] = useState(""); + const [saving, setSaving] = useState(false); + + useEffect(() => { + if (searchSpace) { + setEnabled(searchSpace.web_search_enabled ?? true); + const cfg = searchSpace.web_search_config as Record | null; + setEngines((cfg?.engines as string) ?? ""); + setLanguage((cfg?.language as string) ?? ""); + const ss = cfg?.safesearch; + setSafesearch(ss !== null && ss !== undefined ? String(ss) : ""); + } + }, [searchSpace]); + + const handleSave = useCallback(async () => { + try { + setSaving(true); + + const webSearchConfig: Record = {}; + if (engines.trim()) webSearchConfig.engines = engines.trim(); + if (language.trim()) webSearchConfig.language = language.trim(); + if (safesearch !== "") webSearchConfig.safesearch = Number(safesearch); + + await updateSearchSpace({ + id: searchSpaceId, + data: { + web_search_enabled: enabled, + web_search_config: Object.keys(webSearchConfig).length > 0 ? webSearchConfig : null, + }, + }); + + toast.success(t("web_search_saved")); + await refetch(); + } catch (error: unknown) { + console.error("Error saving web search settings:", error); + const message = error instanceof Error ? error.message : "Failed to save web search settings"; + toast.error(message); + } finally { + setSaving(false); + } + }, [searchSpaceId, enabled, engines, language, safesearch, updateSearchSpace, refetch, t]); + + if (isLoading) { + return ( +
+ + + + + + + + + + +
+ ); + } + + const isHealthy = healthData?.status === "healthy"; + const isUnavailable = healthData?.status === "unavailable"; + + return ( +
+ + + + {t("web_search_description")} + + + + {healthData && ( +
+ + + {isHealthy + ? `${t("web_search_status_healthy")} (${healthData.response_time_ms}ms)` + : isUnavailable + ? t("web_search_status_not_configured") + : t("web_search_status_unhealthy")} + +
+ )} + + + + {t("web_search_title")} + + {t("web_search_enabled_description")} + + + +
+
+ +

+ {t("web_search_enabled_description")} +

+
+ +
+ + {enabled && ( +
+
+ + setEngines(e.target.value)} + className="text-sm md:text-base h-9 md:h-10" + /> +

+ {t("web_search_engines_description")} +

+
+ +
+
+ + setLanguage(e.target.value)} + className="text-sm md:text-base h-9 md:h-10" + /> +

+ {t("web_search_language_description")} +

+
+ +
+ + +

+ {t("web_search_safesearch_description")} +

+
+
+
+ )} +
+
+ +
+ +
+
+ ); +} diff --git a/surfsense_web/contracts/types/search-space.types.ts b/surfsense_web/contracts/types/search-space.types.ts index aed08710e..a71c1bbbb 100644 --- a/surfsense_web/contracts/types/search-space.types.ts +++ b/surfsense_web/contracts/types/search-space.types.ts @@ -9,6 +9,8 @@ export const searchSpace = z.object({ user_id: z.string(), citations_enabled: z.boolean(), qna_custom_instructions: z.string().nullable(), + web_search_enabled: z.boolean(), + web_search_config: z.record(z.unknown()).nullable(), member_count: z.number(), is_owner: z.boolean(), }); @@ -49,7 +51,14 @@ export const getSearchSpaceResponse = searchSpace.omit({ member_count: true, is_ export const updateSearchSpaceRequest = z.object({ id: z.number(), data: searchSpace - .pick({ name: true, description: true, citations_enabled: true, qna_custom_instructions: true }) + .pick({ + name: true, + description: true, + citations_enabled: true, + qna_custom_instructions: true, + web_search_enabled: true, + web_search_config: true, + }) .partial(), }); diff --git a/surfsense_web/messages/en.json b/surfsense_web/messages/en.json index d6ad50f29..a454ea5c9 100644 --- a/surfsense_web/messages/en.json +++ b/surfsense_web/messages/en.json @@ -757,7 +757,27 @@ "general_reset": "Reset Changes", "general_save": "Save Changes", "general_saving": "Saving", - "general_unsaved_changes": "You have unsaved changes. Click \"Save Changes\" to apply them." + "general_unsaved_changes": "You have unsaved changes. Click \"Save Changes\" to apply them.", + "nav_web_search": "Web Search", + "nav_web_search_desc": "Built-in web search settings", + "web_search_title": "Web Search", + "web_search_description": "Web search is powered by a built-in SearXNG instance. All queries are proxied through your server — no data is sent to third parties.", + "web_search_enabled_label": "Enable Web Search", + "web_search_enabled_description": "When enabled, the AI agent can search the web for real-time information like news, prices, and current events.", + "web_search_status_healthy": "Web search service is healthy", + "web_search_status_unhealthy": "Web search service is unavailable", + "web_search_status_not_configured": "Web search service is not configured", + "web_search_engines_label": "Search Engines", + "web_search_engines_placeholder": "google,brave,duckduckgo", + "web_search_engines_description": "Comma-separated list of SearXNG engines to use. Leave empty for defaults.", + "web_search_language_label": "Preferred Language", + "web_search_language_placeholder": "en", + "web_search_language_description": "IETF language tag (e.g. en, en-US). Leave empty for auto-detect.", + "web_search_safesearch_label": "SafeSearch Level", + "web_search_safesearch_description": "0 = off, 1 = moderate, 2 = strict", + "web_search_save": "Save Web Search Settings", + "web_search_saving": "Saving...", + "web_search_saved": "Web search settings saved" }, "homepage": { "hero_title_part1": "The AI Workspace",