From d9e312ddaa5f2d7dfef92fe8b9a99f4c19094a2a Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Sat, 14 Mar 2026 20:34:34 +0530 Subject: [PATCH] refactor: simplify SearXNG service configuration in Docker setup - Consolidated volume mappings for SearXNG to use a single directory. - Removed unnecessary port mappings and legacy data volume definitions. - Updated web search service documentation to clarify Redis usage and circuit breaker implementation, eliminating Redis dependency for circuit breaker logic. --- docker/docker-compose.dev.yml | 9 +-- docker/docker-compose.yml | 9 +-- .../app/services/web_search_service.py | 64 ++++++++----------- 3 files changed, 29 insertions(+), 53 deletions(-) diff --git a/docker/docker-compose.dev.yml b/docker/docker-compose.dev.yml index 30f758b61..7c0c4ff79 100644 --- a/docker/docker-compose.dev.yml +++ b/docker/docker-compose.dev.yml @@ -59,14 +59,9 @@ services: searxng: image: searxng/searxng:2024.12.23 - ports: - - "${SEARXNG_PORT:-8888}:8080" volumes: - - ./searxng/settings.yml:/etc/searxng/settings.yml:ro - - ./searxng/limiter.toml:/etc/searxng/limiter.toml:ro - - searxng_data:/etc/searxng + - ./searxng:/etc/searxng environment: - - SEARXNG_BASE_URL=http://localhost:${SEARXNG_PORT:-8888}/ - SEARXNG_SECRET=${SEARXNG_SECRET:-surfsense-searxng-secret} healthcheck: test: ["CMD", "wget", "--spider", "-q", "http://localhost:8080/healthz"] @@ -226,5 +221,3 @@ volumes: name: surfsense-dev-redis shared_temp: name: surfsense-dev-shared-temp - searxng_data: - name: surfsense-dev-searxng diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index a86874b52..2541545ec 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -45,14 +45,9 @@ services: searxng: image: searxng/searxng:2024.12.23 volumes: - - ./searxng/settings.yml:/etc/searxng/settings.yml:ro - - ./searxng/limiter.toml:/etc/searxng/limiter.toml:ro - - searxng_data:/etc/searxng + - ./searxng:/etc/searxng environment: - SEARXNG_BASE_URL: http://localhost:${SEARXNG_PORT:-8888}/ SEARXNG_SECRET: ${SEARXNG_SECRET:-surfsense-searxng-secret} - ports: - - "${SEARXNG_PORT:-8888}:8080" restart: unless-stopped healthcheck: test: ["CMD", "wget", "--spider", "-q", "http://localhost:8080/healthz"] @@ -215,5 +210,3 @@ volumes: name: surfsense-redis shared_temp: name: surfsense-shared-temp - searxng_data: - name: surfsense-searxng diff --git a/surfsense_backend/app/services/web_search_service.py b/surfsense_backend/app/services/web_search_service.py index 70da23333..6cf0f4566 100644 --- a/surfsense_backend/app/services/web_search_service.py +++ b/surfsense_backend/app/services/web_search_service.py @@ -1,10 +1,9 @@ """ Platform-level web search service backed by SearXNG. -Provides caching via Redis, a circuit breaker for resilience, and a health -check endpoint. Configuration is read from environment variables rather -than per-search-space database rows — this service is a platform capability -that is always available when ``SEARXNG_DEFAULT_HOST`` is set. +Redis is used only for result caching (graceful degradation if unavailable). +The circuit breaker is fully in-process — no external dependency, zero +latency overhead. """ from __future__ import annotations @@ -12,6 +11,7 @@ from __future__ import annotations import hashlib import json import logging +import threading import time from typing import Any from urllib.parse import urljoin @@ -31,7 +31,7 @@ _EMPTY_RESULT: dict[str, Any] = { } # --------------------------------------------------------------------------- -# Redis helpers +# Redis — used only for result caching # --------------------------------------------------------------------------- _redis_client: redis.Redis | None = None @@ -45,54 +45,50 @@ def _get_redis() -> redis.Redis: # --------------------------------------------------------------------------- -# Circuit Breaker +# In-process Circuit Breaker (no Redis dependency) # --------------------------------------------------------------------------- -_CB_FAILURES_KEY = "websearch:circuit:failures" -_CB_OPEN_KEY = "websearch:circuit:open" _CB_FAILURE_THRESHOLD = 5 _CB_FAILURE_WINDOW_SECONDS = 60 _CB_COOLDOWN_SECONDS = 30 +_cb_lock = threading.Lock() +_cb_failure_count: int = 0 +_cb_last_failure_time: float = 0.0 +_cb_open_until: float = 0.0 + def _circuit_is_open() -> bool: - try: - return _get_redis().exists(_CB_OPEN_KEY) == 1 - except redis.RedisError: - return False + return time.monotonic() < _cb_open_until def _record_failure() -> None: - try: - r = _get_redis() - pipe = r.pipeline() - pipe.incr(_CB_FAILURES_KEY) - pipe.expire(_CB_FAILURES_KEY, _CB_FAILURE_WINDOW_SECONDS) - pipe.execute() - - failures = int(r.get(_CB_FAILURES_KEY) or 0) - if failures >= _CB_FAILURE_THRESHOLD: - r.setex(_CB_OPEN_KEY, _CB_COOLDOWN_SECONDS, "1") + global _cb_failure_count, _cb_last_failure_time, _cb_open_until + now = time.monotonic() + with _cb_lock: + if now - _cb_last_failure_time > _CB_FAILURE_WINDOW_SECONDS: + _cb_failure_count = 0 + _cb_failure_count += 1 + _cb_last_failure_time = now + if _cb_failure_count >= _CB_FAILURE_THRESHOLD: + _cb_open_until = now + _CB_COOLDOWN_SECONDS logger.warning( "Circuit breaker OPENED after %d failures — " "SearXNG calls paused for %ds", - failures, + _cb_failure_count, _CB_COOLDOWN_SECONDS, ) - except redis.RedisError: - pass def _record_success() -> None: - try: - r = _get_redis() - r.delete(_CB_FAILURES_KEY, _CB_OPEN_KEY) - except redis.RedisError: - pass + global _cb_failure_count, _cb_open_until + with _cb_lock: + _cb_failure_count = 0 + _cb_open_until = 0.0 # --------------------------------------------------------------------------- -# Result Caching +# Result Caching (Redis, graceful degradation) # --------------------------------------------------------------------------- _CACHE_TTL_SECONDS = 300 # 5 minutes @@ -177,7 +173,6 @@ async def search( if not host: return dict(_EMPTY_RESULT), [] - # --- Circuit breaker --- if _circuit_is_open(): logger.info("Web search skipped — circuit breaker is open") result = dict(_EMPTY_RESULT) @@ -185,14 +180,12 @@ async def search( result["status"] = "degraded" return result, [] - # --- Cache lookup --- ck = _cache_key(query, engines, language) cached = _cache_get(ck) if cached is not None: logger.debug("Web search cache HIT for query=%r", query[:60]) return cached["result"], cached["documents"] - # --- Build request --- params: dict[str, Any] = { "q": query, "format": "json", @@ -208,7 +201,6 @@ async def search( searx_endpoint = urljoin(host if host.endswith("/") else f"{host}/", "search") headers = {"Accept": "application/json"} - # --- HTTP call with one retry on transient errors --- data: dict[str, Any] | None = None last_error: Exception | None = None @@ -247,7 +239,6 @@ async def search( if not searx_results: return dict(_EMPTY_RESULT), [] - # --- Format results --- sources_list: list[dict[str, Any]] = [] documents: list[dict[str, Any]] = [] @@ -286,7 +277,6 @@ async def search( "sources": sources_list, } - # --- Cache store --- _cache_set(ck, {"result": result_object, "documents": documents}) return result_object, documents