feat: integrate SearXNG web search service with platform capabilities

- Added SearXNG service configuration to Docker setup, including environment variables and health checks.
- Introduced new settings management for web search in the frontend, allowing users to enable/disable and configure search engines and language preferences.
- Updated backend to support web search functionality, including database schema changes and service integration.
- Implemented health check endpoint for the web search service and integrated it into the application.
- Removed legacy SearXNG API connector references in favor of the new platform service approach.
This commit is contained in:
Anish Sarkar 2026-03-14 20:25:25 +05:30
parent 49d8f41b09
commit d40c6bf743
24 changed files with 853 additions and 202 deletions

View file

@ -36,6 +36,7 @@ EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
# BACKEND_PORT=8929
# FRONTEND_PORT=3929
# ELECTRIC_PORT=5929
# SEARXNG_PORT=8888
# FLOWER_PORT=5555
# ==============================================================================
@ -199,6 +200,16 @@ STT_SERVICE=local/base
# COMPOSIO_ENABLED=TRUE
# COMPOSIO_REDIRECT_URI=http://localhost:8000/api/v1/auth/composio/connector/callback
# ------------------------------------------------------------------------------
# SearXNG (bundled web search — works out of the box, no config needed)
# ------------------------------------------------------------------------------
# SearXNG provides web search to all search spaces automatically.
# To access the SearXNG UI directly: http://localhost:8888
# To disable the service entirely: docker compose up --scale searxng=0
# To point at your own SearXNG instance instead of the bundled one:
# SEARXNG_DEFAULT_HOST=http://your-searxng:8080
# SEARXNG_SECRET=surfsense-searxng-secret
# ------------------------------------------------------------------------------
# Daytona Sandbox (optional — cloud code execution for the deep agent)
# ------------------------------------------------------------------------------

View file

@ -57,6 +57,23 @@ services:
timeout: 5s
retries: 5
searxng:
image: searxng/searxng:2024.12.23
ports:
- "${SEARXNG_PORT:-8888}:8080"
volumes:
- ./searxng/settings.yml:/etc/searxng/settings.yml:ro
- ./searxng/limiter.toml:/etc/searxng/limiter.toml:ro
- searxng_data:/etc/searxng
environment:
- SEARXNG_BASE_URL=http://localhost:${SEARXNG_PORT:-8888}/
- SEARXNG_SECRET=${SEARXNG_SECRET:-surfsense-searxng-secret}
healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:8080/healthz"]
interval: 10s
timeout: 5s
retries: 5
backend:
build: ../surfsense_backend
ports:
@ -81,6 +98,7 @@ services:
- ELECTRIC_DB_PASSWORD=${ELECTRIC_DB_PASSWORD:-electric_password}
- AUTH_TYPE=${AUTH_TYPE:-LOCAL}
- NEXT_FRONTEND_URL=${NEXT_FRONTEND_URL:-http://localhost:3000}
- SEARXNG_DEFAULT_HOST=${SEARXNG_DEFAULT_HOST:-http://searxng:8080}
# Daytona Sandbox uncomment and set credentials to enable cloud code execution
# - DAYTONA_SANDBOX_ENABLED=TRUE
# - DAYTONA_API_KEY=${DAYTONA_API_KEY:-}
@ -92,6 +110,8 @@ services:
condition: service_healthy
redis:
condition: service_healthy
searxng:
condition: service_healthy
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
interval: 15s
@ -115,6 +135,7 @@ services:
- PYTHONPATH=/app
- ELECTRIC_DB_USER=${ELECTRIC_DB_USER:-electric}
- ELECTRIC_DB_PASSWORD=${ELECTRIC_DB_PASSWORD:-electric_password}
- SEARXNG_DEFAULT_HOST=${SEARXNG_DEFAULT_HOST:-http://searxng:8080}
- SERVICE_ROLE=worker
depends_on:
db:
@ -205,3 +226,5 @@ volumes:
name: surfsense-dev-redis
shared_temp:
name: surfsense-dev-shared-temp
searxng_data:
name: surfsense-dev-searxng

View file

@ -42,6 +42,24 @@ services:
timeout: 5s
retries: 5
searxng:
image: searxng/searxng:2024.12.23
volumes:
- ./searxng/settings.yml:/etc/searxng/settings.yml:ro
- ./searxng/limiter.toml:/etc/searxng/limiter.toml:ro
- searxng_data:/etc/searxng
environment:
SEARXNG_BASE_URL: http://localhost:${SEARXNG_PORT:-8888}/
SEARXNG_SECRET: ${SEARXNG_SECRET:-surfsense-searxng-secret}
ports:
- "${SEARXNG_PORT:-8888}:8080"
restart: unless-stopped
healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:8080/healthz"]
interval: 10s
timeout: 5s
retries: 5
backend:
image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}
ports:
@ -62,6 +80,7 @@ services:
ELECTRIC_DB_USER: ${ELECTRIC_DB_USER:-electric}
ELECTRIC_DB_PASSWORD: ${ELECTRIC_DB_PASSWORD:-electric_password}
NEXT_FRONTEND_URL: ${NEXT_FRONTEND_URL:-http://localhost:${FRONTEND_PORT:-3929}}
SEARXNG_DEFAULT_HOST: ${SEARXNG_DEFAULT_HOST:-http://searxng:8080}
# Daytona Sandbox uncomment and set credentials to enable cloud code execution
# DAYTONA_SANDBOX_ENABLED: "TRUE"
# DAYTONA_API_KEY: ${DAYTONA_API_KEY:-}
@ -75,6 +94,8 @@ services:
condition: service_healthy
redis:
condition: service_healthy
searxng:
condition: service_healthy
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
@ -98,6 +119,7 @@ services:
PYTHONPATH: /app
ELECTRIC_DB_USER: ${ELECTRIC_DB_USER:-electric}
ELECTRIC_DB_PASSWORD: ${ELECTRIC_DB_PASSWORD:-electric_password}
SEARXNG_DEFAULT_HOST: ${SEARXNG_DEFAULT_HOST:-http://searxng:8080}
SERVICE_ROLE: worker
depends_on:
db:
@ -193,3 +215,5 @@ volumes:
name: surfsense-redis
shared_temp:
name: surfsense-shared-temp
searxng_data:
name: surfsense-searxng

View file

@ -0,0 +1,5 @@
[botdetection.ip_limit]
link_token = false
[botdetection.ip_lists]
pass_ip = ["0.0.0.0/0"]

View file

@ -0,0 +1,60 @@
use_default_settings: true
server:
secret_key: "override-me-via-env"
limiter: false
image_proxy: false
method: "GET"
default_http_headers:
X-Robots-Tag: "noindex, nofollow"
search:
formats:
- html
- json
default_lang: "auto"
autocomplete: ""
safe_search: 0
ui:
static_use_hash: true
outgoing:
request_timeout: 12.0
pool_connections: 100
pool_maxsize: 20
enable_http2: true
engines:
- name: google
disabled: false
weight: 1.2
- name: duckduckgo
disabled: false
weight: 1.1
- name: brave
disabled: false
weight: 1.0
- name: qwant
disabled: false
weight: 0.9
- name: mojeek
disabled: false
weight: 0.7
- name: wikipedia
disabled: false
weight: 0.8
- name: wikidata
disabled: false
weight: 0.6
- name: currency_convert
disabled: false
- name: duckduckgo definitions
disabled: false
- name: stackoverflow
disabled: false
weight: 0.7
- name: bing
disabled: true
- name: yahoo
disabled: true

View file

@ -0,0 +1,51 @@
"""106_add_platform_web_search
Revision ID: 106
Revises: 105
Create Date: 2026-03-14
Adds web_search_enabled and web_search_config columns to searchspaces for
per-space control over the platform web search capability.
Also removes legacy SEARXNG_API connector rows web search is now a platform
service, not a per-user connector.
"""
from __future__ import annotations
from collections.abc import Sequence
import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import JSONB
from alembic import op
revision: str = "106"
down_revision: str | None = "105"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
op.add_column(
"searchspaces",
sa.Column(
"web_search_enabled",
sa.Boolean(),
nullable=False,
server_default=sa.text("true"),
),
)
op.add_column(
"searchspaces",
sa.Column("web_search_config", JSONB, nullable=True),
)
op.execute(
"DELETE FROM search_source_connectors WHERE connector_type = 'SEARXNG_API'"
)
def downgrade() -> None:
op.drop_column("searchspaces", "web_search_config")
op.drop_column("searchspaces", "web_search_enabled")

View file

@ -233,6 +233,28 @@ async def create_surfsense_deep_agent(
available_document_types = await connector_service.get_available_document_types(
search_space_id
)
# Platform web search: inject SEARXNG_API when the service is available
# and the search space hasn't disabled web search.
from app.db import SearchSpace
from app.services import web_search_service
_LIVE_SEARCH_CONNECTORS = {"TAVILY_API", "SEARXNG_API", "LINKUP_API", "BAIDU_SEARCH_API"}
space = await db_session.get(SearchSpace, search_space_id)
web_search_enabled = space.web_search_enabled if space else True
if web_search_enabled and web_search_service.is_available():
if available_connectors is None:
available_connectors = list(_ALWAYS_AVAILABLE_DOC_TYPES)
if "SEARXNG_API" not in available_connectors:
available_connectors.append("SEARXNG_API")
if not web_search_enabled and available_connectors:
available_connectors = [
c for c in available_connectors if c not in _LIVE_SEARCH_CONNECTORS
]
except Exception as e:
logging.warning(f"Failed to discover available connectors/document types: {e}")
_perf_log.info(

View file

@ -101,8 +101,9 @@ _TOOL_INSTRUCTIONS["search_knowledge_base"] = """
including calendar apps, note-taking apps (Obsidian, Notion), chat apps (Slack, Discord), and more.
- IMPORTANT (REAL-TIME / PUBLIC WEB QUERIES): For questions that require current public web data
(e.g., live exchange rates, stock prices, breaking news, weather, current events), you MUST call
`search_knowledge_base` using live web connectors via `connectors_to_search`:
["LINKUP_API", "TAVILY_API", "SEARXNG_API", "BAIDU_SEARCH_API"].
`search_knowledge_base` using live web connectors via `connectors_to_search`.
Use whichever of these live connectors are available: ["LINKUP_API", "TAVILY_API", "SEARXNG_API", "BAIDU_SEARCH_API"].
Only connectors listed in the tool's available connector enums section will actually return results.
- For these real-time/public web queries, DO NOT answer from memory and DO NOT say you lack internet
access before attempting a live connector search.
- If the live connectors return no relevant results, explain that live web sources did not return enough

View file

@ -513,6 +513,15 @@ async def health_check():
return {"status": "ok"}
@app.get("/api/v1/platform/web-search/health", tags=["platform"])
@limiter.exempt
async def web_search_health(user: User = Depends(current_active_user)):
"""Return the health status of the platform web search (SearXNG) service."""
from app.services import web_search_service
return await web_search_service.health_check()
@app.get("/verify-token")
async def authenticated_route(
user: User = Depends(current_active_user),

View file

@ -224,6 +224,9 @@ class Config:
os.getenv("CONNECTOR_INDEXING_LOCK_TTL_SECONDS", str(8 * 60 * 60))
)
# Platform web search (SearXNG)
SEARXNG_DEFAULT_HOST = os.getenv("SEARXNG_DEFAULT_HOST")
NEXT_FRONTEND_URL = os.getenv("NEXT_FRONTEND_URL")
# Backend URL to override the http to https in the OAuth redirect URI
BACKEND_URL = os.getenv("BACKEND_URL")

View file

@ -1204,6 +1204,12 @@ class SearchSpace(BaseModel, TimestampMixin):
Integer, nullable=True, default=0
) # For image generation, defaults to Auto mode
# Platform web search capability (opt-out via toggle)
web_search_enabled = Column(
Boolean, nullable=False, default=True, server_default=text("true")
)
web_search_config = Column(JSONB, nullable=True)
user_id = Column(
UUID(as_uuid=True), ForeignKey("user.id", ondelete="CASCADE"), nullable=False
)

View file

@ -180,6 +180,8 @@ async def read_search_spaces(
user_id=space.user_id,
citations_enabled=space.citations_enabled,
qna_custom_instructions=space.qna_custom_instructions,
web_search_enabled=space.web_search_enabled,
web_search_config=space.web_search_config,
member_count=member_count,
is_owner=is_owner,
)

View file

@ -1,5 +1,6 @@
import uuid
from datetime import datetime
from typing import Any
from pydantic import BaseModel, ConfigDict
@ -12,26 +13,29 @@ class SearchSpaceBase(BaseModel):
class SearchSpaceCreate(SearchSpaceBase):
# Optional on create, will use defaults if not provided
citations_enabled: bool = True
qna_custom_instructions: str | None = None
web_search_enabled: bool = True
web_search_config: dict[str, Any] | None = None
class SearchSpaceUpdate(BaseModel):
# All fields optional on update - only send what you want to change
name: str | None = None
description: str | None = None
citations_enabled: bool | None = None
qna_custom_instructions: str | None = None
web_search_enabled: bool | None = None
web_search_config: dict[str, Any] | None = None
class SearchSpaceRead(SearchSpaceBase, IDModel, TimestampModel):
id: int
created_at: datetime
user_id: uuid.UUID
# QnA configuration
citations_enabled: bool
qna_custom_instructions: str | None = None
web_search_enabled: bool
web_search_config: dict[str, Any] | None = None
model_config = ConfigDict(from_attributes=True)

View file

@ -16,6 +16,7 @@ from app.db import (
Document,
SearchSourceConnector,
SearchSourceConnectorType,
SearchSpace,
async_session_maker,
)
from app.retriever.chunks_hybrid_search import ChucksHybridSearchRetriever
@ -577,185 +578,44 @@ class ConnectorService:
search_space_id: int,
top_k: int = 20,
) -> tuple:
"""Search using the platform SearXNG instance.
Delegates to ``WebSearchService`` which handles caching, circuit
breaking, and retries. Per-search-space overrides are read from the
``SearchSpace.web_search_config`` JSONB column.
"""
Search using a configured SearxNG instance and return both sources and documents.
"""
searx_connector = await self.get_connector_by_type(
SearchSourceConnectorType.SEARXNG_API, search_space_id
from app.services import web_search_service
if not web_search_service.is_available():
return {
"id": 11,
"name": "Web Search",
"type": "SEARXNG_API",
"sources": [],
}, []
# Fetch optional per-space overrides
engines: str | None = None
language: str | None = None
safesearch: int | None = None
space = await self.session.get(SearchSpace, search_space_id)
if space and space.web_search_config:
cfg = space.web_search_config
engines = cfg.get("engines")
language = cfg.get("language")
raw_ss = cfg.get("safesearch")
if isinstance(raw_ss, int) and 0 <= raw_ss <= 2:
safesearch = raw_ss
return await web_search_service.search(
query=user_query,
top_k=top_k,
engines=engines,
language=language,
safesearch=safesearch,
)
if not searx_connector:
return {
"id": 11,
"name": "SearxNG Search",
"type": "SEARXNG_API",
"sources": [],
}, []
config = searx_connector.config or {}
host = config.get("SEARXNG_HOST")
if not host:
print("SearxNG connector is missing SEARXNG_HOST configuration")
return {
"id": 11,
"name": "SearxNG Search",
"type": "SEARXNG_API",
"sources": [],
}, []
api_key = config.get("SEARXNG_API_KEY")
engines = config.get("SEARXNG_ENGINES")
categories = config.get("SEARXNG_CATEGORIES")
language = config.get("SEARXNG_LANGUAGE")
safesearch = config.get("SEARXNG_SAFESEARCH")
def _parse_bool(value: Any, default: bool = True) -> bool:
if isinstance(value, bool):
return value
if isinstance(value, str):
lowered = value.strip().lower()
if lowered in {"true", "1", "yes", "on"}:
return True
if lowered in {"false", "0", "no", "off"}:
return False
return default
verify_ssl = _parse_bool(config.get("SEARXNG_VERIFY_SSL", True))
safesearch_value: int | None = None
if isinstance(safesearch, str):
safesearch_clean = safesearch.strip()
if safesearch_clean.isdigit():
safesearch_value = int(safesearch_clean)
elif isinstance(safesearch, int | float):
safesearch_value = int(safesearch)
if safesearch_value is not None and not (0 <= safesearch_value <= 2):
safesearch_value = None
def _format_list(value: Any) -> str | None:
if value is None:
return None
if isinstance(value, str):
value = value.strip()
return value or None
if isinstance(value, list | tuple | set):
cleaned = [str(item).strip() for item in value if str(item).strip()]
return ",".join(cleaned) if cleaned else None
return str(value)
params: dict[str, Any] = {
"q": user_query,
"format": "json",
"language": language or "",
"limit": max(1, min(top_k, 50)),
}
engines_param = _format_list(engines)
if engines_param:
params["engines"] = engines_param
categories_param = _format_list(categories)
if categories_param:
params["categories"] = categories_param
if safesearch_value is not None:
params["safesearch"] = safesearch_value
if not params.get("language"):
params.pop("language")
headers = {"Accept": "application/json"}
if api_key:
headers["X-API-KEY"] = api_key
searx_endpoint = urljoin(host if host.endswith("/") else f"{host}/", "search")
try:
async with httpx.AsyncClient(timeout=20.0, verify=verify_ssl) as client:
response = await client.get(
searx_endpoint,
params=params,
headers=headers,
)
response.raise_for_status()
except httpx.HTTPError as exc:
print(f"Error searching with SearxNG: {exc!s}")
return {
"id": 11,
"name": "SearxNG Search",
"type": "SEARXNG_API",
"sources": [],
}, []
try:
data = response.json()
except ValueError:
print("Failed to decode JSON response from SearxNG")
return {
"id": 11,
"name": "SearxNG Search",
"type": "SEARXNG_API",
"sources": [],
}, []
searx_results = data.get("results", [])
if not searx_results:
return {
"id": 11,
"name": "SearxNG Search",
"type": "SEARXNG_API",
"sources": [],
}, []
sources_list: list[dict[str, Any]] = []
documents: list[dict[str, Any]] = []
async with self.counter_lock:
for result in searx_results:
description = result.get("content") or result.get("snippet") or ""
if len(description) > 160:
description = f"{description}"
source = {
"id": self.source_id_counter,
"title": result.get("title", "SearxNG Result"),
"description": description,
"url": result.get("url", ""),
}
sources_list.append(source)
metadata = {
"url": result.get("url", ""),
"engines": result.get("engines", []),
"category": result.get("category"),
"source": "SEARXNG_API",
}
document = {
"chunk_id": self.source_id_counter,
"content": description or result.get("content", ""),
"score": result.get("score", 0.0),
"document": {
"id": self.source_id_counter,
"title": result.get("title", "SearxNG Result"),
"document_type": "SEARXNG_API",
"metadata": metadata,
},
}
documents.append(document)
self.source_id_counter += 1
result_object = {
"id": 11,
"name": "SearxNG Search",
"type": "SEARXNG_API",
"sources": sources_list,
}
return result_object, documents
async def search_baidu(
self,
user_query: str,

View file

@ -0,0 +1,292 @@
"""
Platform-level web search service backed by SearXNG.
Provides caching via Redis, a circuit breaker for resilience, and a health
check endpoint. Configuration is read from environment variables rather
than per-search-space database rows this service is a platform capability
that is always available when ``SEARXNG_DEFAULT_HOST`` is set.
"""
from __future__ import annotations
import hashlib
import json
import logging
import time
from typing import Any
from urllib.parse import urljoin
import httpx
import redis
from app.config import config
logger = logging.getLogger(__name__)
_EMPTY_RESULT: dict[str, Any] = {
"id": 11,
"name": "Web Search",
"type": "SEARXNG_API",
"sources": [],
}
# ---------------------------------------------------------------------------
# Redis helpers
# ---------------------------------------------------------------------------
_redis_client: redis.Redis | None = None
def _get_redis() -> redis.Redis:
global _redis_client
if _redis_client is None:
_redis_client = redis.from_url(config.REDIS_APP_URL, decode_responses=True)
return _redis_client
# ---------------------------------------------------------------------------
# Circuit Breaker
# ---------------------------------------------------------------------------
_CB_FAILURES_KEY = "websearch:circuit:failures"
_CB_OPEN_KEY = "websearch:circuit:open"
_CB_FAILURE_THRESHOLD = 5
_CB_FAILURE_WINDOW_SECONDS = 60
_CB_COOLDOWN_SECONDS = 30
def _circuit_is_open() -> bool:
try:
return _get_redis().exists(_CB_OPEN_KEY) == 1
except redis.RedisError:
return False
def _record_failure() -> None:
try:
r = _get_redis()
pipe = r.pipeline()
pipe.incr(_CB_FAILURES_KEY)
pipe.expire(_CB_FAILURES_KEY, _CB_FAILURE_WINDOW_SECONDS)
pipe.execute()
failures = int(r.get(_CB_FAILURES_KEY) or 0)
if failures >= _CB_FAILURE_THRESHOLD:
r.setex(_CB_OPEN_KEY, _CB_COOLDOWN_SECONDS, "1")
logger.warning(
"Circuit breaker OPENED after %d failures — "
"SearXNG calls paused for %ds",
failures,
_CB_COOLDOWN_SECONDS,
)
except redis.RedisError:
pass
def _record_success() -> None:
try:
r = _get_redis()
r.delete(_CB_FAILURES_KEY, _CB_OPEN_KEY)
except redis.RedisError:
pass
# ---------------------------------------------------------------------------
# Result Caching
# ---------------------------------------------------------------------------
_CACHE_TTL_SECONDS = 300 # 5 minutes
_CACHE_PREFIX = "websearch:cache:"
def _cache_key(query: str, engines: str | None, language: str | None) -> str:
raw = f"{query}|{engines or ''}|{language or ''}"
digest = hashlib.sha256(raw.encode()).hexdigest()[:24]
return f"{_CACHE_PREFIX}{digest}"
def _cache_get(key: str) -> dict | None:
try:
data = _get_redis().get(key)
if data:
return json.loads(data)
except (redis.RedisError, json.JSONDecodeError):
pass
return None
def _cache_set(key: str, value: dict) -> None:
try:
_get_redis().setex(key, _CACHE_TTL_SECONDS, json.dumps(value))
except redis.RedisError:
pass
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
def is_available() -> bool:
"""Return ``True`` when the platform SearXNG host is configured."""
return bool(config.SEARXNG_DEFAULT_HOST)
async def health_check() -> dict[str, Any]:
"""Ping the SearXNG ``/healthz`` endpoint and return status info."""
host = config.SEARXNG_DEFAULT_HOST
if not host:
return {"status": "unavailable", "error": "SEARXNG_DEFAULT_HOST not set"}
healthz_url = urljoin(host if host.endswith("/") else f"{host}/", "healthz")
t0 = time.perf_counter()
try:
async with httpx.AsyncClient(timeout=5.0, verify=False) as client:
resp = await client.get(healthz_url)
resp.raise_for_status()
elapsed_ms = round((time.perf_counter() - t0) * 1000)
return {
"status": "healthy",
"response_time_ms": elapsed_ms,
"circuit_breaker": "open" if _circuit_is_open() else "closed",
}
except Exception as exc:
elapsed_ms = round((time.perf_counter() - t0) * 1000)
return {
"status": "unhealthy",
"error": str(exc),
"response_time_ms": elapsed_ms,
"circuit_breaker": "open" if _circuit_is_open() else "closed",
}
async def search(
query: str,
top_k: int = 20,
*,
engines: str | None = None,
language: str | None = None,
safesearch: int | None = None,
) -> tuple[dict[str, Any], list[dict[str, Any]]]:
"""Execute a web search against the platform SearXNG instance.
Returns the standard ``(result_object, documents)`` tuple expected by
``ConnectorService.search_searxng``.
"""
host = config.SEARXNG_DEFAULT_HOST
if not host:
return dict(_EMPTY_RESULT), []
# --- Circuit breaker ---
if _circuit_is_open():
logger.info("Web search skipped — circuit breaker is open")
result = dict(_EMPTY_RESULT)
result["error"] = "Web search temporarily unavailable (circuit open)"
result["status"] = "degraded"
return result, []
# --- Cache lookup ---
ck = _cache_key(query, engines, language)
cached = _cache_get(ck)
if cached is not None:
logger.debug("Web search cache HIT for query=%r", query[:60])
return cached["result"], cached["documents"]
# --- Build request ---
params: dict[str, Any] = {
"q": query,
"format": "json",
"limit": max(1, min(top_k, 50)),
}
if engines:
params["engines"] = engines
if language:
params["language"] = language
if safesearch is not None and 0 <= safesearch <= 2:
params["safesearch"] = safesearch
searx_endpoint = urljoin(host if host.endswith("/") else f"{host}/", "search")
headers = {"Accept": "application/json"}
# --- HTTP call with one retry on transient errors ---
data: dict[str, Any] | None = None
last_error: Exception | None = None
for attempt in range(2):
try:
async with httpx.AsyncClient(timeout=15.0, verify=False) as client:
response = await client.get(
searx_endpoint, params=params, headers=headers,
)
response.raise_for_status()
data = response.json()
break
except (httpx.HTTPStatusError, httpx.TimeoutException) as exc:
last_error = exc
if attempt == 0 and (
isinstance(exc, httpx.TimeoutException)
or (isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code >= 500)
):
continue
break
except httpx.HTTPError as exc:
last_error = exc
break
except ValueError as exc:
last_error = exc
break
if data is None:
_record_failure()
logger.warning("Web search failed after retries: %s", last_error)
return dict(_EMPTY_RESULT), []
_record_success()
searx_results = data.get("results", [])
if not searx_results:
return dict(_EMPTY_RESULT), []
# --- Format results ---
sources_list: list[dict[str, Any]] = []
documents: list[dict[str, Any]] = []
for idx, result in enumerate(searx_results):
source_id = 200_000 + idx
description = result.get("content") or result.get("snippet") or ""
sources_list.append({
"id": source_id,
"title": result.get("title", "Web Search Result"),
"description": description,
"url": result.get("url", ""),
})
documents.append({
"chunk_id": source_id,
"content": description or result.get("content", ""),
"score": result.get("score", 0.0),
"document": {
"id": source_id,
"title": result.get("title", "Web Search Result"),
"document_type": "SEARXNG_API",
"metadata": {
"url": result.get("url", ""),
"engines": result.get("engines", []),
"category": result.get("category"),
"source": "SEARXNG_API",
},
},
})
result_object: dict[str, Any] = {
"id": 11,
"name": "Web Search",
"type": "SEARXNG_API",
"sources": sources_list,
}
# --- Cache store ---
_cache_set(ck, {"result": result_object, "documents": documents})
return result_object, documents

View file

@ -1,6 +1,6 @@
"use client";
import { Bot, Brain, FileText, Globe, ImageIcon, MessageSquare, Shield } from "lucide-react";
import { Bot, Brain, FileText, Globe, ImageIcon, MessageSquare, Search, Shield } from "lucide-react";
import { useParams, useRouter, useSearchParams } from "next/navigation";
import { useTranslations } from "next-intl";
import { useCallback, useEffect } from "react";
@ -11,6 +11,7 @@ import { LLMRoleManager } from "@/components/settings/llm-role-manager";
import { ModelConfigManager } from "@/components/settings/model-config-manager";
import { PromptConfigManager } from "@/components/settings/prompt-config-manager";
import { RolesManager } from "@/components/settings/roles-manager";
import { WebSearchSettingsManager } from "@/components/settings/web-search-settings-manager";
import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/animated-tabs";
import { trackSettingsViewed } from "@/lib/posthog/events";
@ -20,6 +21,7 @@ const VALID_TABS = [
"roles",
"image-models",
"prompts",
"web-search",
"public-links",
"team-roles",
] as const;
@ -80,6 +82,10 @@ export default function SettingsPage() {
<MessageSquare className="mr-2 h-4 w-4" />
{t("nav_system_instructions")}
</TabsTrigger>
<TabsTrigger value="web-search">
<Search className="mr-2 h-4 w-4" />
{t("nav_web_search")}
</TabsTrigger>
<TabsTrigger value="public-links">
<Globe className="mr-2 h-4 w-4" />
{t("nav_public_links")}
@ -100,6 +106,9 @@ export default function SettingsPage() {
<TabsContent value="prompts" className="mt-6">
<PromptConfigManager searchSpaceId={searchSpaceId} />
</TabsContent>
<TabsContent value="web-search" className="mt-6">
<WebSearchSettingsManager searchSpaceId={searchSpaceId} />
</TabsContent>
<TabsContent value="public-links" className="mt-6">
<PublicChatSnapshotsManager searchSpaceId={searchSpaceId} />
</TabsContent>

View file

@ -23,11 +23,6 @@ export function getConnectorBenefits(connectorType: string): string[] | null {
"Real-time information from the web",
"Enhanced search capabilities for your projects",
],
SEARXNG_API: [
"Privacy-focused meta-search across multiple engines",
"Self-hosted search instance for full control",
"Real-time web search results from multiple sources",
],
LINKUP_API: [
"AI-powered search results tailored to your queries",
"Real-time information from the web",

View file

@ -8,7 +8,6 @@ import { LinkupApiConnectForm } from "./components/linkup-api-connect-form";
import { LumaConnectForm } from "./components/luma-connect-form";
import { MCPConnectForm } from "./components/mcp-connect-form";
import { ObsidianConnectForm } from "./components/obsidian-connect-form";
import { SearxngConnectForm } from "./components/searxng-connect-form";
import { TavilyApiConnectForm } from "./components/tavily-api-connect-form";
export interface ConnectFormProps {
@ -41,8 +40,6 @@ export function getConnectFormComponent(connectorType: string): ConnectFormCompo
switch (connectorType) {
case "TAVILY_API":
return TavilyApiConnectForm;
case "SEARXNG_API":
return SearxngConnectForm;
case "LINKUP_API":
return LinkupApiConnectForm;
case "BAIDU_SEARCH_API":

View file

@ -19,7 +19,6 @@ import { LinkupApiConfig } from "./components/linkup-api-config";
import { LumaConfig } from "./components/luma-config";
import { MCPConfig } from "./components/mcp-config";
import { ObsidianConfig } from "./components/obsidian-config";
import { SearxngConfig } from "./components/searxng-config";
import { SlackConfig } from "./components/slack-config";
import { TavilyApiConfig } from "./components/tavily-api-config";
import { TeamsConfig } from "./components/teams-config";
@ -45,8 +44,6 @@ export function getConnectorConfigComponent(
return GoogleDriveConfig;
case "TAVILY_API":
return TavilyApiConfig;
case "SEARXNG_API":
return SearxngConfig;
case "LINKUP_API":
return LinkupApiConfig;
case "BAIDU_SEARCH_API":

View file

@ -11,7 +11,6 @@ import { getConnectFormComponent } from "../../connect-forms";
const FORM_ID_MAP: Record<string, string> = {
TAVILY_API: "tavily-connect-form",
SEARXNG_API: "searxng-connect-form",
LINKUP_API: "linkup-api-connect-form",
BAIDU_SEARCH_API: "baidu-search-api-connect-form",
ELASTICSEARCH_CONNECTOR: "elasticsearch-connect-form",

View file

@ -136,12 +136,6 @@ export const OTHER_CONNECTORS = [
description: "Search with Tavily",
connectorType: EnumConnectorName.TAVILY_API,
},
{
id: "searxng",
title: "SearxNG",
description: "Search with SearxNG",
connectorType: EnumConnectorName.SEARXNG_API,
},
{
id: "linkup-api",
title: "Linkup API",

View file

@ -0,0 +1,258 @@
"use client";
import { useQuery } from "@tanstack/react-query";
import { useAtomValue } from "jotai";
import { Globe, Loader2, Save } from "lucide-react";
import { useTranslations } from "next-intl";
import { useCallback, useEffect, useState } from "react";
import { toast } from "sonner";
import { updateSearchSpaceMutationAtom } from "@/atoms/search-spaces/search-space-mutation.atoms";
import { Alert, AlertDescription } from "@/components/ui/alert";
import { Button } from "@/components/ui/button";
import {
Card,
CardContent,
CardDescription,
CardHeader,
CardTitle,
} from "@/components/ui/card";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import {
Select,
SelectContent,
SelectItem,
SelectTrigger,
SelectValue,
} from "@/components/ui/select";
import { Skeleton } from "@/components/ui/skeleton";
import { Switch } from "@/components/ui/switch";
import { searchSpacesApiService } from "@/lib/apis/search-spaces-api.service";
import { baseApiService } from "@/lib/apis/base-api.service";
import { cacheKeys } from "@/lib/query-client/cache-keys";
interface WebSearchSettingsManagerProps {
searchSpaceId: number;
}
interface HealthStatus {
status: string;
response_time_ms?: number;
error?: string;
circuit_breaker?: string;
}
export function WebSearchSettingsManager({ searchSpaceId }: WebSearchSettingsManagerProps) {
const t = useTranslations("searchSpaceSettings");
const {
data: searchSpace,
isLoading,
refetch,
} = useQuery({
queryKey: cacheKeys.searchSpaces.detail(searchSpaceId.toString()),
queryFn: () => searchSpacesApiService.getSearchSpace({ id: searchSpaceId }),
enabled: !!searchSpaceId,
});
const { data: healthData } = useQuery<HealthStatus>({
queryKey: ["web-search-health"],
queryFn: async () => {
const response = await baseApiService.get("/api/v1/platform/web-search/health");
return response as HealthStatus;
},
refetchInterval: 30000,
staleTime: 10000,
});
const { mutateAsync: updateSearchSpace } = useAtomValue(updateSearchSpaceMutationAtom);
const [enabled, setEnabled] = useState(true);
const [engines, setEngines] = useState("");
const [language, setLanguage] = useState("");
const [safesearch, setSafesearch] = useState<string>("");
const [saving, setSaving] = useState(false);
useEffect(() => {
if (searchSpace) {
setEnabled(searchSpace.web_search_enabled ?? true);
const cfg = searchSpace.web_search_config as Record<string, unknown> | null;
setEngines((cfg?.engines as string) ?? "");
setLanguage((cfg?.language as string) ?? "");
const ss = cfg?.safesearch;
setSafesearch(ss !== null && ss !== undefined ? String(ss) : "");
}
}, [searchSpace]);
const handleSave = useCallback(async () => {
try {
setSaving(true);
const webSearchConfig: Record<string, unknown> = {};
if (engines.trim()) webSearchConfig.engines = engines.trim();
if (language.trim()) webSearchConfig.language = language.trim();
if (safesearch !== "") webSearchConfig.safesearch = Number(safesearch);
await updateSearchSpace({
id: searchSpaceId,
data: {
web_search_enabled: enabled,
web_search_config: Object.keys(webSearchConfig).length > 0 ? webSearchConfig : null,
},
});
toast.success(t("web_search_saved"));
await refetch();
} catch (error: unknown) {
console.error("Error saving web search settings:", error);
const message = error instanceof Error ? error.message : "Failed to save web search settings";
toast.error(message);
} finally {
setSaving(false);
}
}, [searchSpaceId, enabled, engines, language, safesearch, updateSearchSpace, refetch, t]);
if (isLoading) {
return (
<div className="space-y-4 md:space-y-6">
<Card>
<CardHeader className="px-3 md:px-6 pt-3 md:pt-6 pb-2 md:pb-3">
<Skeleton className="h-5 md:h-6 w-36 md:w-48" />
<Skeleton className="h-3 md:h-4 w-full max-w-md mt-2" />
</CardHeader>
<CardContent className="space-y-3 md:space-y-4 px-3 md:px-6 pb-3 md:pb-6">
<Skeleton className="h-10 md:h-12 w-full" />
<Skeleton className="h-10 md:h-12 w-full" />
</CardContent>
</Card>
</div>
);
}
const isHealthy = healthData?.status === "healthy";
const isUnavailable = healthData?.status === "unavailable";
return (
<div className="space-y-4 md:space-y-6">
<Alert className="bg-muted/50 py-3 md:py-4">
<Globe className="h-3 w-3 md:h-4 md:w-4 shrink-0" />
<AlertDescription className="text-xs md:text-sm">
{t("web_search_description")}
</AlertDescription>
</Alert>
{healthData && (
<div className="flex items-center gap-2 text-xs md:text-sm">
<span
className={`inline-block h-2 w-2 rounded-full ${
isHealthy
? "bg-green-500"
: isUnavailable
? "bg-gray-400"
: "bg-red-500"
}`}
/>
<span className="text-muted-foreground">
{isHealthy
? `${t("web_search_status_healthy")} (${healthData.response_time_ms}ms)`
: isUnavailable
? t("web_search_status_not_configured")
: t("web_search_status_unhealthy")}
</span>
</div>
)}
<Card>
<CardHeader className="px-3 md:px-6 pt-3 md:pt-6 pb-2 md:pb-3">
<CardTitle className="text-base md:text-lg">{t("web_search_title")}</CardTitle>
<CardDescription className="text-xs md:text-sm">
{t("web_search_enabled_description")}
</CardDescription>
</CardHeader>
<CardContent className="space-y-5 md:space-y-6 px-3 md:px-6 pb-3 md:pb-6">
<div className="flex items-center justify-between rounded-lg border p-3 md:p-4">
<div className="space-y-0.5">
<Label className="text-sm md:text-base font-medium">
{t("web_search_enabled_label")}
</Label>
<p className="text-[10px] md:text-xs text-muted-foreground">
{t("web_search_enabled_description")}
</p>
</div>
<Switch checked={enabled} onCheckedChange={setEnabled} />
</div>
{enabled && (
<div className="space-y-4 md:space-y-5">
<div className="space-y-1.5 md:space-y-2">
<Label className="text-sm md:text-base font-medium">
{t("web_search_engines_label")}
</Label>
<Input
placeholder={t("web_search_engines_placeholder")}
value={engines}
onChange={(e) => setEngines(e.target.value)}
className="text-sm md:text-base h-9 md:h-10"
/>
<p className="text-[10px] md:text-xs text-muted-foreground">
{t("web_search_engines_description")}
</p>
</div>
<div className="grid grid-cols-1 gap-4 sm:grid-cols-2">
<div className="space-y-1.5 md:space-y-2">
<Label className="text-sm md:text-base font-medium">
{t("web_search_language_label")}
</Label>
<Input
placeholder={t("web_search_language_placeholder")}
value={language}
onChange={(e) => setLanguage(e.target.value)}
className="text-sm md:text-base h-9 md:h-10"
/>
<p className="text-[10px] md:text-xs text-muted-foreground">
{t("web_search_language_description")}
</p>
</div>
<div className="space-y-1.5 md:space-y-2">
<Label className="text-sm md:text-base font-medium">
{t("web_search_safesearch_label")}
</Label>
<Select value={safesearch} onValueChange={setSafesearch}>
<SelectTrigger className="h-9 md:h-10 text-sm md:text-base">
<SelectValue placeholder="Default" />
</SelectTrigger>
<SelectContent>
<SelectItem value="">Default</SelectItem>
<SelectItem value="0">Off (0)</SelectItem>
<SelectItem value="1">Moderate (1)</SelectItem>
<SelectItem value="2">Strict (2)</SelectItem>
</SelectContent>
</Select>
<p className="text-[10px] md:text-xs text-muted-foreground">
{t("web_search_safesearch_description")}
</p>
</div>
</div>
</div>
)}
</CardContent>
</Card>
<div className="flex justify-end pt-3 md:pt-4">
<Button
onClick={handleSave}
disabled={saving}
className="flex items-center gap-2 text-xs md:text-sm h-9 md:h-10"
>
{saving ? (
<Loader2 className="h-3.5 w-3.5 md:h-4 md:w-4 animate-spin" />
) : (
<Save className="h-3.5 w-3.5 md:h-4 md:w-4" />
)}
{saving ? t("web_search_saving") : t("web_search_save")}
</Button>
</div>
</div>
);
}

View file

@ -9,6 +9,8 @@ export const searchSpace = z.object({
user_id: z.string(),
citations_enabled: z.boolean(),
qna_custom_instructions: z.string().nullable(),
web_search_enabled: z.boolean(),
web_search_config: z.record(z.unknown()).nullable(),
member_count: z.number(),
is_owner: z.boolean(),
});
@ -49,7 +51,14 @@ export const getSearchSpaceResponse = searchSpace.omit({ member_count: true, is_
export const updateSearchSpaceRequest = z.object({
id: z.number(),
data: searchSpace
.pick({ name: true, description: true, citations_enabled: true, qna_custom_instructions: true })
.pick({
name: true,
description: true,
citations_enabled: true,
qna_custom_instructions: true,
web_search_enabled: true,
web_search_config: true,
})
.partial(),
});

View file

@ -757,7 +757,27 @@
"general_reset": "Reset Changes",
"general_save": "Save Changes",
"general_saving": "Saving",
"general_unsaved_changes": "You have unsaved changes. Click \"Save Changes\" to apply them."
"general_unsaved_changes": "You have unsaved changes. Click \"Save Changes\" to apply them.",
"nav_web_search": "Web Search",
"nav_web_search_desc": "Built-in web search settings",
"web_search_title": "Web Search",
"web_search_description": "Web search is powered by a built-in SearXNG instance. All queries are proxied through your server — no data is sent to third parties.",
"web_search_enabled_label": "Enable Web Search",
"web_search_enabled_description": "When enabled, the AI agent can search the web for real-time information like news, prices, and current events.",
"web_search_status_healthy": "Web search service is healthy",
"web_search_status_unhealthy": "Web search service is unavailable",
"web_search_status_not_configured": "Web search service is not configured",
"web_search_engines_label": "Search Engines",
"web_search_engines_placeholder": "google,brave,duckduckgo",
"web_search_engines_description": "Comma-separated list of SearXNG engines to use. Leave empty for defaults.",
"web_search_language_label": "Preferred Language",
"web_search_language_placeholder": "en",
"web_search_language_description": "IETF language tag (e.g. en, en-US). Leave empty for auto-detect.",
"web_search_safesearch_label": "SafeSearch Level",
"web_search_safesearch_description": "0 = off, 1 = moderate, 2 = strict",
"web_search_save": "Save Web Search Settings",
"web_search_saving": "Saving...",
"web_search_saved": "Web search settings saved"
},
"homepage": {
"hero_title_part1": "The AI Workspace",