refactor: simplify SearXNG service configuration in Docker setup

- Consolidated volume mappings for SearXNG to use a single directory.
- Removed unnecessary port mappings and legacy data volume definitions.
- Updated web search service documentation to clarify Redis usage and circuit breaker implementation, eliminating Redis dependency for circuit breaker logic.
This commit is contained in:
Anish Sarkar 2026-03-14 20:34:34 +05:30
parent d40c6bf743
commit d9e312ddaa
3 changed files with 29 additions and 53 deletions

View file

@ -59,14 +59,9 @@ services:
searxng: searxng:
image: searxng/searxng:2024.12.23 image: searxng/searxng:2024.12.23
ports:
- "${SEARXNG_PORT:-8888}:8080"
volumes: volumes:
- ./searxng/settings.yml:/etc/searxng/settings.yml:ro - ./searxng:/etc/searxng
- ./searxng/limiter.toml:/etc/searxng/limiter.toml:ro
- searxng_data:/etc/searxng
environment: environment:
- SEARXNG_BASE_URL=http://localhost:${SEARXNG_PORT:-8888}/
- SEARXNG_SECRET=${SEARXNG_SECRET:-surfsense-searxng-secret} - SEARXNG_SECRET=${SEARXNG_SECRET:-surfsense-searxng-secret}
healthcheck: healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:8080/healthz"] test: ["CMD", "wget", "--spider", "-q", "http://localhost:8080/healthz"]
@ -226,5 +221,3 @@ volumes:
name: surfsense-dev-redis name: surfsense-dev-redis
shared_temp: shared_temp:
name: surfsense-dev-shared-temp name: surfsense-dev-shared-temp
searxng_data:
name: surfsense-dev-searxng

View file

@ -45,14 +45,9 @@ services:
searxng: searxng:
image: searxng/searxng:2024.12.23 image: searxng/searxng:2024.12.23
volumes: volumes:
- ./searxng/settings.yml:/etc/searxng/settings.yml:ro - ./searxng:/etc/searxng
- ./searxng/limiter.toml:/etc/searxng/limiter.toml:ro
- searxng_data:/etc/searxng
environment: environment:
SEARXNG_BASE_URL: http://localhost:${SEARXNG_PORT:-8888}/
SEARXNG_SECRET: ${SEARXNG_SECRET:-surfsense-searxng-secret} SEARXNG_SECRET: ${SEARXNG_SECRET:-surfsense-searxng-secret}
ports:
- "${SEARXNG_PORT:-8888}:8080"
restart: unless-stopped restart: unless-stopped
healthcheck: healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:8080/healthz"] test: ["CMD", "wget", "--spider", "-q", "http://localhost:8080/healthz"]
@ -215,5 +210,3 @@ volumes:
name: surfsense-redis name: surfsense-redis
shared_temp: shared_temp:
name: surfsense-shared-temp name: surfsense-shared-temp
searxng_data:
name: surfsense-searxng

View file

@ -1,10 +1,9 @@
""" """
Platform-level web search service backed by SearXNG. Platform-level web search service backed by SearXNG.
Provides caching via Redis, a circuit breaker for resilience, and a health Redis is used only for result caching (graceful degradation if unavailable).
check endpoint. Configuration is read from environment variables rather The circuit breaker is fully in-process no external dependency, zero
than per-search-space database rows this service is a platform capability latency overhead.
that is always available when ``SEARXNG_DEFAULT_HOST`` is set.
""" """
from __future__ import annotations from __future__ import annotations
@ -12,6 +11,7 @@ from __future__ import annotations
import hashlib import hashlib
import json import json
import logging import logging
import threading
import time import time
from typing import Any from typing import Any
from urllib.parse import urljoin from urllib.parse import urljoin
@ -31,7 +31,7 @@ _EMPTY_RESULT: dict[str, Any] = {
} }
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Redis helpers # Redis — used only for result caching
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
_redis_client: redis.Redis | None = None _redis_client: redis.Redis | None = None
@ -45,54 +45,50 @@ def _get_redis() -> redis.Redis:
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Circuit Breaker # In-process Circuit Breaker (no Redis dependency)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
_CB_FAILURES_KEY = "websearch:circuit:failures"
_CB_OPEN_KEY = "websearch:circuit:open"
_CB_FAILURE_THRESHOLD = 5 _CB_FAILURE_THRESHOLD = 5
_CB_FAILURE_WINDOW_SECONDS = 60 _CB_FAILURE_WINDOW_SECONDS = 60
_CB_COOLDOWN_SECONDS = 30 _CB_COOLDOWN_SECONDS = 30
_cb_lock = threading.Lock()
_cb_failure_count: int = 0
_cb_last_failure_time: float = 0.0
_cb_open_until: float = 0.0
def _circuit_is_open() -> bool: def _circuit_is_open() -> bool:
try: return time.monotonic() < _cb_open_until
return _get_redis().exists(_CB_OPEN_KEY) == 1
except redis.RedisError:
return False
def _record_failure() -> None: def _record_failure() -> None:
try: global _cb_failure_count, _cb_last_failure_time, _cb_open_until
r = _get_redis() now = time.monotonic()
pipe = r.pipeline() with _cb_lock:
pipe.incr(_CB_FAILURES_KEY) if now - _cb_last_failure_time > _CB_FAILURE_WINDOW_SECONDS:
pipe.expire(_CB_FAILURES_KEY, _CB_FAILURE_WINDOW_SECONDS) _cb_failure_count = 0
pipe.execute() _cb_failure_count += 1
_cb_last_failure_time = now
failures = int(r.get(_CB_FAILURES_KEY) or 0) if _cb_failure_count >= _CB_FAILURE_THRESHOLD:
if failures >= _CB_FAILURE_THRESHOLD: _cb_open_until = now + _CB_COOLDOWN_SECONDS
r.setex(_CB_OPEN_KEY, _CB_COOLDOWN_SECONDS, "1")
logger.warning( logger.warning(
"Circuit breaker OPENED after %d failures — " "Circuit breaker OPENED after %d failures — "
"SearXNG calls paused for %ds", "SearXNG calls paused for %ds",
failures, _cb_failure_count,
_CB_COOLDOWN_SECONDS, _CB_COOLDOWN_SECONDS,
) )
except redis.RedisError:
pass
def _record_success() -> None: def _record_success() -> None:
try: global _cb_failure_count, _cb_open_until
r = _get_redis() with _cb_lock:
r.delete(_CB_FAILURES_KEY, _CB_OPEN_KEY) _cb_failure_count = 0
except redis.RedisError: _cb_open_until = 0.0
pass
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Result Caching # Result Caching (Redis, graceful degradation)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
_CACHE_TTL_SECONDS = 300 # 5 minutes _CACHE_TTL_SECONDS = 300 # 5 minutes
@ -177,7 +173,6 @@ async def search(
if not host: if not host:
return dict(_EMPTY_RESULT), [] return dict(_EMPTY_RESULT), []
# --- Circuit breaker ---
if _circuit_is_open(): if _circuit_is_open():
logger.info("Web search skipped — circuit breaker is open") logger.info("Web search skipped — circuit breaker is open")
result = dict(_EMPTY_RESULT) result = dict(_EMPTY_RESULT)
@ -185,14 +180,12 @@ async def search(
result["status"] = "degraded" result["status"] = "degraded"
return result, [] return result, []
# --- Cache lookup ---
ck = _cache_key(query, engines, language) ck = _cache_key(query, engines, language)
cached = _cache_get(ck) cached = _cache_get(ck)
if cached is not None: if cached is not None:
logger.debug("Web search cache HIT for query=%r", query[:60]) logger.debug("Web search cache HIT for query=%r", query[:60])
return cached["result"], cached["documents"] return cached["result"], cached["documents"]
# --- Build request ---
params: dict[str, Any] = { params: dict[str, Any] = {
"q": query, "q": query,
"format": "json", "format": "json",
@ -208,7 +201,6 @@ async def search(
searx_endpoint = urljoin(host if host.endswith("/") else f"{host}/", "search") searx_endpoint = urljoin(host if host.endswith("/") else f"{host}/", "search")
headers = {"Accept": "application/json"} headers = {"Accept": "application/json"}
# --- HTTP call with one retry on transient errors ---
data: dict[str, Any] | None = None data: dict[str, Any] | None = None
last_error: Exception | None = None last_error: Exception | None = None
@ -247,7 +239,6 @@ async def search(
if not searx_results: if not searx_results:
return dict(_EMPTY_RESULT), [] return dict(_EMPTY_RESULT), []
# --- Format results ---
sources_list: list[dict[str, Any]] = [] sources_list: list[dict[str, Any]] = []
documents: list[dict[str, Any]] = [] documents: list[dict[str, Any]] = []
@ -286,7 +277,6 @@ async def search(
"sources": sources_list, "sources": sources_list,
} }
# --- Cache store ---
_cache_set(ck, {"result": result_object, "documents": documents}) _cache_set(ck, {"result": result_object, "documents": documents})
return result_object, documents return result_object, documents