From c2beaf1e5a21a4adcd504aabedafa3286ee65351 Mon Sep 17 00:00:00 2001 From: "DESKTOP-RTLN3BA\\$punk" Date: Mon, 8 Jun 2026 13:50:16 -0700 Subject: [PATCH] refactor(config): centralize configuration management across modules - Replaced environment variable usage with a centralized configuration system in multiple modules, including `celery_app`, `agent_cache_store`, `sandbox`, `file_storage`, and `connector_service`. - Enhanced maintainability and readability by sourcing configuration values from the `config` module instead of directly from environment variables. - Updated relevant settings to ensure consistent access to configuration values across the application. --- .../main_agent/runtime/agent_cache_store.py | 6 +- .../shared/middleware/filesystem/sandbox.py | 17 +- surfsense_backend/app/celery_app.py | 17 +- surfsense_backend/app/config/__init__.py | 233 +++++++++++------- .../app/file_storage/settings.py | 20 +- .../app/services/connector_service.py | 6 +- 6 files changed, 176 insertions(+), 123 deletions(-) diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/runtime/agent_cache_store.py b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/runtime/agent_cache_store.py index ee51b4176..00866adf9 100644 --- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/runtime/agent_cache_store.py +++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/runtime/agent_cache_store.py @@ -67,13 +67,13 @@ from __future__ import annotations import asyncio import hashlib import logging -import os import time from collections import OrderedDict from collections.abc import Awaitable, Callable from dataclasses import dataclass from typing import Any +from app.config import config from app.utils.perf import get_perf_logger logger = logging.getLogger(__name__) @@ -328,8 +328,8 @@ def _short(key: str, n: int = 16) -> str: # Module-level singleton # --------------------------------------------------------------------------- -_DEFAULT_MAXSIZE = int(os.getenv("SURFSENSE_AGENT_CACHE_MAXSIZE", "256")) -_DEFAULT_TTL = float(os.getenv("SURFSENSE_AGENT_CACHE_TTL_SECONDS", "1800")) +_DEFAULT_MAXSIZE = config.AGENT_CACHE_MAXSIZE +_DEFAULT_TTL = config.AGENT_CACHE_TTL_SECONDS _cache: _AgentCache = _AgentCache(maxsize=_DEFAULT_MAXSIZE, ttl_seconds=_DEFAULT_TTL) diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/sandbox.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/sandbox.py index efac7aae8..338a188d6 100644 --- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/sandbox.py +++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/sandbox.py @@ -14,7 +14,6 @@ from __future__ import annotations import asyncio import contextlib import logging -import os import shutil import threading from pathlib import Path @@ -29,6 +28,10 @@ from daytona.common.errors import DaytonaError from deepagents.backends.protocol import ExecuteResponse from langchain_daytona import DaytonaSandbox +# Aliased to avoid clashing with the local ``config = DaytonaConfig(...)`` +# variable used inside ``_get_client``. +from app.config import config as app_config + logger = logging.getLogger(__name__) @@ -73,7 +76,7 @@ SANDBOX_DOCUMENTS_ROOT = "/home/daytona/documents" def is_sandbox_enabled() -> bool: - return os.environ.get("DAYTONA_SANDBOX_ENABLED", "FALSE").upper() == "TRUE" + return app_config.DAYTONA_SANDBOX_ENABLED def _get_client() -> Daytona: @@ -81,9 +84,9 @@ def _get_client() -> Daytona: with _client_lock: if _daytona_client is None: config = DaytonaConfig( - api_key=os.environ.get("DAYTONA_API_KEY", ""), - api_url=os.environ.get("DAYTONA_API_URL", "https://app.daytona.io/api"), - target=os.environ.get("DAYTONA_TARGET", "us"), + api_key=app_config.DAYTONA_API_KEY, + api_url=app_config.DAYTONA_API_URL, + target=app_config.DAYTONA_TARGET, ) _daytona_client = Daytona(config) return _daytona_client @@ -92,7 +95,7 @@ def _get_client() -> Daytona: def _sandbox_create_params( labels: dict[str, str], ) -> CreateSandboxFromSnapshotParams: - snapshot_id = os.environ.get("DAYTONA_SNAPSHOT_ID") or None + snapshot_id = app_config.DAYTONA_SNAPSHOT_ID return CreateSandboxFromSnapshotParams( language="python", labels=labels, @@ -302,7 +305,7 @@ async def delete_sandbox(thread_id: int | str) -> None: def _get_sandbox_files_dir() -> Path: - return Path(os.environ.get("SANDBOX_FILES_DIR", "sandbox_files")) + return Path(app_config.SANDBOX_FILES_DIR) def _local_path_for(thread_id: int | str, sandbox_path: str) -> Path: diff --git a/surfsense_backend/app/celery_app.py b/surfsense_backend/app/celery_app.py index c7ee72667..0e852b801 100644 --- a/surfsense_backend/app/celery_app.py +++ b/surfsense_backend/app/celery_app.py @@ -1,7 +1,6 @@ """Celery application configuration and setup.""" import contextlib -import os import time from celery import Celery @@ -19,6 +18,8 @@ try: except ImportError: # pragma: no cover - optional OTel dependency trace = None # type: ignore[assignment] +from app.config import config + # Load environment variables load_dotenv() @@ -124,16 +125,16 @@ def init_worker(**kwargs): initialize_vision_llm_router() -# Get Celery configuration from environment -CELERY_BROKER_URL = os.getenv("CELERY_BROKER_URL", "redis://localhost:6379/0") -CELERY_RESULT_BACKEND = os.getenv("CELERY_RESULT_BACKEND", "redis://localhost:6379/0") -CELERY_TASK_DEFAULT_QUEUE = os.getenv("CELERY_TASK_DEFAULT_QUEUE", "surfsense") +# Celery configuration, sourced from the central Config singleton +CELERY_BROKER_URL = config.CELERY_BROKER_URL +CELERY_RESULT_BACKEND = config.CELERY_RESULT_BACKEND +CELERY_TASK_DEFAULT_QUEUE = config.CELERY_TASK_DEFAULT_QUEUE -# Get schedule checker interval from environment +# Schedule checker interval # Format: "" where unit is 'm' (minutes) or 'h' (hours) # Examples: "1m" (every minute), "5m" (every 5 minutes), "1h" (every hour) -SCHEDULE_CHECKER_INTERVAL = os.getenv("SCHEDULE_CHECKER_INTERVAL", "2m") -STRIPE_RECONCILIATION_INTERVAL = os.getenv("STRIPE_RECONCILIATION_INTERVAL", "10m") +SCHEDULE_CHECKER_INTERVAL = config.SCHEDULE_CHECKER_INTERVAL +STRIPE_RECONCILIATION_INTERVAL = config.STRIPE_RECONCILIATION_INTERVAL def parse_schedule_interval(interval: str) -> dict: diff --git a/surfsense_backend/app/config/__init__.py b/surfsense_backend/app/config/__init__.py index b2650e87c..eb832c69e 100644 --- a/surfsense_backend/app/config/__init__.py +++ b/surfsense_backend/app/config/__init__.py @@ -1,5 +1,7 @@ +import copy import os import shutil +from functools import lru_cache from pathlib import Path import yaml @@ -17,6 +19,37 @@ os.environ.setdefault("OR_APP_NAME", "SurfSense") os.environ.setdefault("OR_SITE_URL", "https://surfsense.com") +@lru_cache(maxsize=8) +def _read_global_config_yaml(path_str: str) -> dict: + """Read and parse ``global_llm_config.yaml`` once per resolved path. + + Cached so the seven ``load_*`` helpers (and their re-invocations during + startup) don't re-open and re-parse the same file repeatedly. Keyed on the + resolved path string so tests that monkeypatch ``BASE_DIR`` to a unique + ``tmp_path`` still get a fresh parse. Callers MUST treat the returned dict + as read-only and deep-copy any section they intend to mutate. + """ + f = Path(path_str) + if not f.exists(): + return {} + try: + with open(f, encoding="utf-8") as fh: + return yaml.safe_load(fh) or {} + except Exception as e: + print(f"Warning: Failed to read global_llm_config.yaml: {e}") + return {} + + +def _global_config_data() -> dict: + """Return the parsed global config YAML for the current ``BASE_DIR``. + + ``BASE_DIR`` is read at call time (not bound at import) so a + ``monkeypatch.setattr(config, "BASE_DIR", tmp_path)`` is honored. + """ + path = BASE_DIR / "app" / "config" / "global_llm_config.yaml" + return _read_global_config_yaml(str(path)) + + def is_ffmpeg_installed(): """ Check if ffmpeg is installed on the current system. @@ -35,17 +68,15 @@ def load_global_llm_configs(): Returns: list: List of global LLM config dictionaries, or empty list if file doesn't exist """ - # Try main config file first - global_config_file = BASE_DIR / "app" / "config" / "global_llm_config.yaml" - - if not global_config_file.exists(): + data = _global_config_data() + if not data: # No global configs available return [] try: - with open(global_config_file, encoding="utf-8") as f: - data = yaml.safe_load(f) - configs = data.get("global_llm_configs", []) + # Deep-copy so the in-place mutations below (setdefault, scoring + # stamps) never leak into the cached YAML structure. + configs = copy.deepcopy(data.get("global_llm_configs", [])) # Lazy import keeps the `app.config` -> `app.services` edge one-way # and matches the `provider_api_base` pattern used elsewhere. @@ -145,18 +176,14 @@ def load_router_settings(): "cooldown_time": 60, } - # Try main config file first - global_config_file = BASE_DIR / "app" / "config" / "global_llm_config.yaml" - - if not global_config_file.exists(): + data = _global_config_data() + if not data: return default_settings try: - with open(global_config_file, encoding="utf-8") as f: - data = yaml.safe_load(f) - settings = data.get("router_settings", {}) - # Merge with defaults - return {**default_settings, **settings} + settings = data.get("router_settings", {}) + # Merge with defaults + return {**default_settings, **settings} except Exception as e: print(f"Warning: Failed to load router settings: {e}") return default_settings @@ -169,38 +196,32 @@ def load_global_image_gen_configs(): Returns: list: List of global image generation config dictionaries, or empty list """ - global_config_file = BASE_DIR / "app" / "config" / "global_llm_config.yaml" - - if not global_config_file.exists(): + data = _global_config_data() + if not data: return [] try: - with open(global_config_file, encoding="utf-8") as f: - data = yaml.safe_load(f) - configs = data.get("global_image_generation_configs", []) or [] - for cfg in configs: - if isinstance(cfg, dict): - cfg.setdefault("billing_tier", "free") - return configs + configs = copy.deepcopy(data.get("global_image_generation_configs", []) or []) + for cfg in configs: + if isinstance(cfg, dict): + cfg.setdefault("billing_tier", "free") + return configs except Exception as e: print(f"Warning: Failed to load global image generation configs: {e}") return [] def load_global_vision_llm_configs(): - global_config_file = BASE_DIR / "app" / "config" / "global_llm_config.yaml" - - if not global_config_file.exists(): + data = _global_config_data() + if not data: return [] try: - with open(global_config_file, encoding="utf-8") as f: - data = yaml.safe_load(f) - configs = data.get("global_vision_llm_configs", []) or [] - for cfg in configs: - if isinstance(cfg, dict): - cfg.setdefault("billing_tier", "free") - return configs + configs = copy.deepcopy(data.get("global_vision_llm_configs", []) or []) + for cfg in configs: + if isinstance(cfg, dict): + cfg.setdefault("billing_tier", "free") + return configs except Exception as e: print(f"Warning: Failed to load global vision LLM configs: {e}") return [] @@ -214,16 +235,13 @@ def load_vision_llm_router_settings(): "cooldown_time": 60, } - global_config_file = BASE_DIR / "app" / "config" / "global_llm_config.yaml" - - if not global_config_file.exists(): + data = _global_config_data() + if not data: return default_settings try: - with open(global_config_file, encoding="utf-8") as f: - data = yaml.safe_load(f) - settings = data.get("vision_llm_router_settings", {}) - return {**default_settings, **settings} + settings = data.get("vision_llm_router_settings", {}) + return {**default_settings, **settings} except Exception as e: print(f"Warning: Failed to load vision LLM router settings: {e}") return default_settings @@ -243,16 +261,13 @@ def load_image_gen_router_settings(): "cooldown_time": 60, } - global_config_file = BASE_DIR / "app" / "config" / "global_llm_config.yaml" - - if not global_config_file.exists(): + data = _global_config_data() + if not data: return default_settings try: - with open(global_config_file, encoding="utf-8") as f: - data = yaml.safe_load(f) - settings = data.get("image_generation_router_settings", {}) - return {**default_settings, **settings} + settings = data.get("image_generation_router_settings", {}) + return {**default_settings, **settings} except Exception as e: print(f"Warning: Failed to load image generation router settings: {e}") return default_settings @@ -268,49 +283,48 @@ def load_openrouter_integration_settings() -> dict | None: Returns: dict with settings if present and enabled, None otherwise """ - global_config_file = BASE_DIR / "app" / "config" / "global_llm_config.yaml" - - if not global_config_file.exists(): + data = _global_config_data() + if not data: return None try: - with open(global_config_file, encoding="utf-8") as f: - data = yaml.safe_load(f) - settings = data.get("openrouter_integration") - if not settings or not settings.get("enabled"): - return None + # Deep-copy so the setdefault back-compat seeding below never mutates + # the cached YAML structure. + settings = copy.deepcopy(data.get("openrouter_integration")) + if not settings or not settings.get("enabled"): + return None - if "billing_tier" in settings: - print( - "Warning: openrouter_integration.billing_tier is deprecated; " - "tier is now derived per model from OpenRouter data " - "(':free' suffix or zero pricing). Remove this key." - ) + if "billing_tier" in settings: + print( + "Warning: openrouter_integration.billing_tier is deprecated; " + "tier is now derived per model from OpenRouter data " + "(':free' suffix or zero pricing). Remove this key." + ) - if "anonymous_enabled" in settings: - print( - "Warning: openrouter_integration.anonymous_enabled is " - "deprecated; use anonymous_enabled_paid and/or " - "anonymous_enabled_free instead. Both new flags have been " - "seeded from the legacy value for back-compat." - ) - settings.setdefault( - "anonymous_enabled_paid", settings["anonymous_enabled"] - ) - settings.setdefault( - "anonymous_enabled_free", settings["anonymous_enabled"] - ) + if "anonymous_enabled" in settings: + print( + "Warning: openrouter_integration.anonymous_enabled is " + "deprecated; use anonymous_enabled_paid and/or " + "anonymous_enabled_free instead. Both new flags have been " + "seeded from the legacy value for back-compat." + ) + settings.setdefault( + "anonymous_enabled_paid", settings["anonymous_enabled"] + ) + settings.setdefault( + "anonymous_enabled_free", settings["anonymous_enabled"] + ) - # Image generation + vision LLM emission are opt-in (issue L). - # OpenRouter's catalogue contains hundreds of image / vision - # capable models; auto-injecting all of them into every - # deployment would explode the model selector and surprise - # operators upgrading from prior versions. Default to False so - # admins must explicitly turn them on. - settings.setdefault("image_generation_enabled", False) - settings.setdefault("vision_enabled", False) + # Image generation + vision LLM emission are opt-in (issue L). + # OpenRouter's catalogue contains hundreds of image / vision + # capable models; auto-injecting all of them into every + # deployment would explode the model selector and surprise + # operators upgrading from prior versions. Default to False so + # admins must explicitly turn them on. + settings.setdefault("image_generation_enabled", False) + settings.setdefault("vision_enabled", False) - return settings + return settings except Exception as e: print(f"Warning: Failed to load OpenRouter integration settings: {e}") return None @@ -415,7 +429,9 @@ def initialize_llm_router(): static YAML configs and dynamic OpenRouter models. """ all_configs = config.GLOBAL_LLM_CONFIGS - router_settings = load_router_settings() + # Reuse the router settings already parsed at Config construction instead + # of re-reading the YAML here. + router_settings = config.ROUTER_SETTINGS if not all_configs: print("Info: No global LLM configs found, Auto mode will not be available") @@ -439,7 +455,10 @@ def initialize_image_gen_router(): This should be called during application startup. """ image_gen_configs = load_global_image_gen_configs() - router_settings = load_image_gen_router_settings() + # Reuse the router settings already parsed at Config construction. The + # *configs* list is intentionally re-read from YAML (it must exclude the + # OpenRouter-injected dynamic models held in config.GLOBAL_IMAGE_GEN_CONFIGS). + router_settings = config.IMAGE_GEN_ROUTER_SETTINGS if not image_gen_configs: print( @@ -462,7 +481,10 @@ def initialize_image_gen_router(): def initialize_vision_llm_router(): vision_configs = load_global_vision_llm_configs() - router_settings = load_vision_llm_router_settings() + # Reuse the router settings already parsed at Config construction. The + # *configs* list is intentionally re-read from YAML (it must exclude the + # OpenRouter-injected dynamic models held in config.GLOBAL_VISION_LLM_CONFIGS). + router_settings = config.VISION_LLM_ROUTER_SETTINGS if not vision_configs: print( @@ -534,6 +556,39 @@ class Config: os.getenv("CONNECTOR_INDEXING_LOCK_TTL_SECONDS", str(8 * 60 * 60)) ) + # Celery beat scheduling intervals (format: "", e.g. "2m", "1h") + SCHEDULE_CHECKER_INTERVAL = os.getenv("SCHEDULE_CHECKER_INTERVAL", "2m") + STRIPE_RECONCILIATION_INTERVAL = os.getenv("STRIPE_RECONCILIATION_INTERVAL", "10m") + + # File storage (local filesystem by default; Azure Blob optional) + FILE_STORAGE_BACKEND = os.getenv("FILE_STORAGE_BACKEND", "local").strip().lower() + AZURE_STORAGE_CONNECTION_STRING = os.getenv("AZURE_STORAGE_CONNECTION_STRING") + AZURE_STORAGE_CONTAINER = os.getenv("AZURE_STORAGE_CONTAINER") + FILE_STORAGE_LOCAL_PATH = os.getenv( + "FILE_STORAGE_LOCAL_PATH", str(BASE_DIR / ".local_object_store") + ) + + # Daytona sandbox (code execution / filesystem sandbox) + DAYTONA_SANDBOX_ENABLED = ( + os.getenv("DAYTONA_SANDBOX_ENABLED", "FALSE").upper() == "TRUE" + ) + DAYTONA_API_KEY = os.getenv("DAYTONA_API_KEY", "") + DAYTONA_API_URL = os.getenv("DAYTONA_API_URL", "https://app.daytona.io/api") + DAYTONA_TARGET = os.getenv("DAYTONA_TARGET", "us") + DAYTONA_SNAPSHOT_ID = os.getenv("DAYTONA_SNAPSHOT_ID") or None + SANDBOX_FILES_DIR = os.getenv("SANDBOX_FILES_DIR", "sandbox_files") + + # Agent cache (in-process LRU+TTL cache for built agents) + AGENT_CACHE_MAXSIZE = int(os.getenv("SURFSENSE_AGENT_CACHE_MAXSIZE", "256")) + AGENT_CACHE_TTL_SECONDS = float( + os.getenv("SURFSENSE_AGENT_CACHE_TTL_SECONDS", "1800") + ) + + # Connector discovery cache TTL + CONNECTOR_DISCOVERY_TTL_SECONDS = float( + os.getenv("SURFSENSE_CONNECTOR_DISCOVERY_TTL_SECONDS", "30") + ) + # Platform web search (SearXNG) SEARXNG_DEFAULT_HOST = os.getenv("SEARXNG_DEFAULT_HOST") diff --git a/surfsense_backend/app/file_storage/settings.py b/surfsense_backend/app/file_storage/settings.py index 612575890..2e0b08268 100644 --- a/surfsense_backend/app/file_storage/settings.py +++ b/surfsense_backend/app/file_storage/settings.py @@ -1,18 +1,12 @@ -"""Environment-driven configuration for the file-storage module.""" +"""Configuration for the file-storage module, sourced from the central Config.""" from __future__ import annotations -import os from dataclasses import dataclass -from pathlib import Path LOCAL_BACKEND = "local" AZURE_BACKEND = "azure" -# surfsense_backend/ — two levels up from app/file_storage/settings.py -_BACKEND_ROOT = Path(__file__).resolve().parents[2] -_DEFAULT_LOCAL_ROOT = str(_BACKEND_ROOT / ".local_object_store") - @dataclass(frozen=True) class StorageSettings: @@ -25,13 +19,15 @@ class StorageSettings: def load_storage_settings() -> StorageSettings: - """Read storage settings from the environment. + """Resolve storage settings from the central ``Config`` singleton. Defaults to the ``local`` backend so development needs no cloud creds. """ + from app.config import config + return StorageSettings( - backend=os.getenv("FILE_STORAGE_BACKEND", LOCAL_BACKEND).strip().lower(), - azure_connection_string=os.getenv("AZURE_STORAGE_CONNECTION_STRING"), - azure_container=os.getenv("AZURE_STORAGE_CONTAINER"), - local_root=os.getenv("FILE_STORAGE_LOCAL_PATH", _DEFAULT_LOCAL_ROOT), + backend=config.FILE_STORAGE_BACKEND, + azure_connection_string=config.AZURE_STORAGE_CONNECTION_STRING, + azure_container=config.AZURE_STORAGE_CONTAINER, + local_root=config.FILE_STORAGE_LOCAL_PATH, ) diff --git a/surfsense_backend/app/services/connector_service.py b/surfsense_backend/app/services/connector_service.py index 45bcfd00f..2694a8e69 100644 --- a/surfsense_backend/app/services/connector_service.py +++ b/surfsense_backend/app/services/connector_service.py @@ -1,5 +1,4 @@ import asyncio -import os import time from datetime import datetime from threading import Lock @@ -12,6 +11,7 @@ from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select from tavily import TavilyClient +from app.config import config from app.db import ( NATIVE_TO_LEGACY_DOCTYPE, Chunk, @@ -2856,9 +2856,7 @@ class ConnectorService: # bounded and the alternative (cross-replica fanout) is not worth the # coupling here. -_DISCOVERY_TTL_SECONDS: float = float( - os.getenv("SURFSENSE_CONNECTOR_DISCOVERY_TTL_SECONDS", "30") -) +_DISCOVERY_TTL_SECONDS: float = config.CONNECTOR_DISCOVERY_TTL_SECONDS # Per-search-space caches. Keyed by ``search_space_id``; value is # ``(expires_at_monotonic, payload)``. Plain dicts protected by a lock —