refactor(config): centralize configuration management across modules

- Replaced environment variable usage with a centralized configuration system in multiple modules, including `celery_app`, `agent_cache_store`, `sandbox`, `file_storage`, and `connector_service`.
- Enhanced maintainability and readability by sourcing configuration values from the `config` module instead of directly from environment variables.
- Updated relevant settings to ensure consistent access to configuration values across the application.
This commit is contained in:
DESKTOP-RTLN3BA\$punk 2026-06-08 13:50:16 -07:00
parent ef7a20a5d0
commit c2beaf1e5a
6 changed files with 176 additions and 123 deletions

View file

@ -67,13 +67,13 @@ from __future__ import annotations
import asyncio
import hashlib
import logging
import os
import time
from collections import OrderedDict
from collections.abc import Awaitable, Callable
from dataclasses import dataclass
from typing import Any
from app.config import config
from app.utils.perf import get_perf_logger
logger = logging.getLogger(__name__)
@ -328,8 +328,8 @@ def _short(key: str, n: int = 16) -> str:
# Module-level singleton
# ---------------------------------------------------------------------------
_DEFAULT_MAXSIZE = int(os.getenv("SURFSENSE_AGENT_CACHE_MAXSIZE", "256"))
_DEFAULT_TTL = float(os.getenv("SURFSENSE_AGENT_CACHE_TTL_SECONDS", "1800"))
_DEFAULT_MAXSIZE = config.AGENT_CACHE_MAXSIZE
_DEFAULT_TTL = config.AGENT_CACHE_TTL_SECONDS
_cache: _AgentCache = _AgentCache(maxsize=_DEFAULT_MAXSIZE, ttl_seconds=_DEFAULT_TTL)

View file

@ -14,7 +14,6 @@ from __future__ import annotations
import asyncio
import contextlib
import logging
import os
import shutil
import threading
from pathlib import Path
@ -29,6 +28,10 @@ from daytona.common.errors import DaytonaError
from deepagents.backends.protocol import ExecuteResponse
from langchain_daytona import DaytonaSandbox
# Aliased to avoid clashing with the local ``config = DaytonaConfig(...)``
# variable used inside ``_get_client``.
from app.config import config as app_config
logger = logging.getLogger(__name__)
@ -73,7 +76,7 @@ SANDBOX_DOCUMENTS_ROOT = "/home/daytona/documents"
def is_sandbox_enabled() -> bool:
return os.environ.get("DAYTONA_SANDBOX_ENABLED", "FALSE").upper() == "TRUE"
return app_config.DAYTONA_SANDBOX_ENABLED
def _get_client() -> Daytona:
@ -81,9 +84,9 @@ def _get_client() -> Daytona:
with _client_lock:
if _daytona_client is None:
config = DaytonaConfig(
api_key=os.environ.get("DAYTONA_API_KEY", ""),
api_url=os.environ.get("DAYTONA_API_URL", "https://app.daytona.io/api"),
target=os.environ.get("DAYTONA_TARGET", "us"),
api_key=app_config.DAYTONA_API_KEY,
api_url=app_config.DAYTONA_API_URL,
target=app_config.DAYTONA_TARGET,
)
_daytona_client = Daytona(config)
return _daytona_client
@ -92,7 +95,7 @@ def _get_client() -> Daytona:
def _sandbox_create_params(
labels: dict[str, str],
) -> CreateSandboxFromSnapshotParams:
snapshot_id = os.environ.get("DAYTONA_SNAPSHOT_ID") or None
snapshot_id = app_config.DAYTONA_SNAPSHOT_ID
return CreateSandboxFromSnapshotParams(
language="python",
labels=labels,
@ -302,7 +305,7 @@ async def delete_sandbox(thread_id: int | str) -> None:
def _get_sandbox_files_dir() -> Path:
return Path(os.environ.get("SANDBOX_FILES_DIR", "sandbox_files"))
return Path(app_config.SANDBOX_FILES_DIR)
def _local_path_for(thread_id: int | str, sandbox_path: str) -> Path:

View file

@ -1,7 +1,6 @@
"""Celery application configuration and setup."""
import contextlib
import os
import time
from celery import Celery
@ -19,6 +18,8 @@ try:
except ImportError: # pragma: no cover - optional OTel dependency
trace = None # type: ignore[assignment]
from app.config import config
# Load environment variables
load_dotenv()
@ -124,16 +125,16 @@ def init_worker(**kwargs):
initialize_vision_llm_router()
# Get Celery configuration from environment
CELERY_BROKER_URL = os.getenv("CELERY_BROKER_URL", "redis://localhost:6379/0")
CELERY_RESULT_BACKEND = os.getenv("CELERY_RESULT_BACKEND", "redis://localhost:6379/0")
CELERY_TASK_DEFAULT_QUEUE = os.getenv("CELERY_TASK_DEFAULT_QUEUE", "surfsense")
# Celery configuration, sourced from the central Config singleton
CELERY_BROKER_URL = config.CELERY_BROKER_URL
CELERY_RESULT_BACKEND = config.CELERY_RESULT_BACKEND
CELERY_TASK_DEFAULT_QUEUE = config.CELERY_TASK_DEFAULT_QUEUE
# Get schedule checker interval from environment
# Schedule checker interval
# Format: "<number><unit>" where unit is 'm' (minutes) or 'h' (hours)
# Examples: "1m" (every minute), "5m" (every 5 minutes), "1h" (every hour)
SCHEDULE_CHECKER_INTERVAL = os.getenv("SCHEDULE_CHECKER_INTERVAL", "2m")
STRIPE_RECONCILIATION_INTERVAL = os.getenv("STRIPE_RECONCILIATION_INTERVAL", "10m")
SCHEDULE_CHECKER_INTERVAL = config.SCHEDULE_CHECKER_INTERVAL
STRIPE_RECONCILIATION_INTERVAL = config.STRIPE_RECONCILIATION_INTERVAL
def parse_schedule_interval(interval: str) -> dict:

View file

@ -1,5 +1,7 @@
import copy
import os
import shutil
from functools import lru_cache
from pathlib import Path
import yaml
@ -17,6 +19,37 @@ os.environ.setdefault("OR_APP_NAME", "SurfSense")
os.environ.setdefault("OR_SITE_URL", "https://surfsense.com")
@lru_cache(maxsize=8)
def _read_global_config_yaml(path_str: str) -> dict:
"""Read and parse ``global_llm_config.yaml`` once per resolved path.
Cached so the seven ``load_*`` helpers (and their re-invocations during
startup) don't re-open and re-parse the same file repeatedly. Keyed on the
resolved path string so tests that monkeypatch ``BASE_DIR`` to a unique
``tmp_path`` still get a fresh parse. Callers MUST treat the returned dict
as read-only and deep-copy any section they intend to mutate.
"""
f = Path(path_str)
if not f.exists():
return {}
try:
with open(f, encoding="utf-8") as fh:
return yaml.safe_load(fh) or {}
except Exception as e:
print(f"Warning: Failed to read global_llm_config.yaml: {e}")
return {}
def _global_config_data() -> dict:
"""Return the parsed global config YAML for the current ``BASE_DIR``.
``BASE_DIR`` is read at call time (not bound at import) so a
``monkeypatch.setattr(config, "BASE_DIR", tmp_path)`` is honored.
"""
path = BASE_DIR / "app" / "config" / "global_llm_config.yaml"
return _read_global_config_yaml(str(path))
def is_ffmpeg_installed():
"""
Check if ffmpeg is installed on the current system.
@ -35,17 +68,15 @@ def load_global_llm_configs():
Returns:
list: List of global LLM config dictionaries, or empty list if file doesn't exist
"""
# Try main config file first
global_config_file = BASE_DIR / "app" / "config" / "global_llm_config.yaml"
if not global_config_file.exists():
data = _global_config_data()
if not data:
# No global configs available
return []
try:
with open(global_config_file, encoding="utf-8") as f:
data = yaml.safe_load(f)
configs = data.get("global_llm_configs", [])
# Deep-copy so the in-place mutations below (setdefault, scoring
# stamps) never leak into the cached YAML structure.
configs = copy.deepcopy(data.get("global_llm_configs", []))
# Lazy import keeps the `app.config` -> `app.services` edge one-way
# and matches the `provider_api_base` pattern used elsewhere.
@ -145,18 +176,14 @@ def load_router_settings():
"cooldown_time": 60,
}
# Try main config file first
global_config_file = BASE_DIR / "app" / "config" / "global_llm_config.yaml"
if not global_config_file.exists():
data = _global_config_data()
if not data:
return default_settings
try:
with open(global_config_file, encoding="utf-8") as f:
data = yaml.safe_load(f)
settings = data.get("router_settings", {})
# Merge with defaults
return {**default_settings, **settings}
settings = data.get("router_settings", {})
# Merge with defaults
return {**default_settings, **settings}
except Exception as e:
print(f"Warning: Failed to load router settings: {e}")
return default_settings
@ -169,38 +196,32 @@ def load_global_image_gen_configs():
Returns:
list: List of global image generation config dictionaries, or empty list
"""
global_config_file = BASE_DIR / "app" / "config" / "global_llm_config.yaml"
if not global_config_file.exists():
data = _global_config_data()
if not data:
return []
try:
with open(global_config_file, encoding="utf-8") as f:
data = yaml.safe_load(f)
configs = data.get("global_image_generation_configs", []) or []
for cfg in configs:
if isinstance(cfg, dict):
cfg.setdefault("billing_tier", "free")
return configs
configs = copy.deepcopy(data.get("global_image_generation_configs", []) or [])
for cfg in configs:
if isinstance(cfg, dict):
cfg.setdefault("billing_tier", "free")
return configs
except Exception as e:
print(f"Warning: Failed to load global image generation configs: {e}")
return []
def load_global_vision_llm_configs():
global_config_file = BASE_DIR / "app" / "config" / "global_llm_config.yaml"
if not global_config_file.exists():
data = _global_config_data()
if not data:
return []
try:
with open(global_config_file, encoding="utf-8") as f:
data = yaml.safe_load(f)
configs = data.get("global_vision_llm_configs", []) or []
for cfg in configs:
if isinstance(cfg, dict):
cfg.setdefault("billing_tier", "free")
return configs
configs = copy.deepcopy(data.get("global_vision_llm_configs", []) or [])
for cfg in configs:
if isinstance(cfg, dict):
cfg.setdefault("billing_tier", "free")
return configs
except Exception as e:
print(f"Warning: Failed to load global vision LLM configs: {e}")
return []
@ -214,16 +235,13 @@ def load_vision_llm_router_settings():
"cooldown_time": 60,
}
global_config_file = BASE_DIR / "app" / "config" / "global_llm_config.yaml"
if not global_config_file.exists():
data = _global_config_data()
if not data:
return default_settings
try:
with open(global_config_file, encoding="utf-8") as f:
data = yaml.safe_load(f)
settings = data.get("vision_llm_router_settings", {})
return {**default_settings, **settings}
settings = data.get("vision_llm_router_settings", {})
return {**default_settings, **settings}
except Exception as e:
print(f"Warning: Failed to load vision LLM router settings: {e}")
return default_settings
@ -243,16 +261,13 @@ def load_image_gen_router_settings():
"cooldown_time": 60,
}
global_config_file = BASE_DIR / "app" / "config" / "global_llm_config.yaml"
if not global_config_file.exists():
data = _global_config_data()
if not data:
return default_settings
try:
with open(global_config_file, encoding="utf-8") as f:
data = yaml.safe_load(f)
settings = data.get("image_generation_router_settings", {})
return {**default_settings, **settings}
settings = data.get("image_generation_router_settings", {})
return {**default_settings, **settings}
except Exception as e:
print(f"Warning: Failed to load image generation router settings: {e}")
return default_settings
@ -268,49 +283,48 @@ def load_openrouter_integration_settings() -> dict | None:
Returns:
dict with settings if present and enabled, None otherwise
"""
global_config_file = BASE_DIR / "app" / "config" / "global_llm_config.yaml"
if not global_config_file.exists():
data = _global_config_data()
if not data:
return None
try:
with open(global_config_file, encoding="utf-8") as f:
data = yaml.safe_load(f)
settings = data.get("openrouter_integration")
if not settings or not settings.get("enabled"):
return None
# Deep-copy so the setdefault back-compat seeding below never mutates
# the cached YAML structure.
settings = copy.deepcopy(data.get("openrouter_integration"))
if not settings or not settings.get("enabled"):
return None
if "billing_tier" in settings:
print(
"Warning: openrouter_integration.billing_tier is deprecated; "
"tier is now derived per model from OpenRouter data "
"(':free' suffix or zero pricing). Remove this key."
)
if "billing_tier" in settings:
print(
"Warning: openrouter_integration.billing_tier is deprecated; "
"tier is now derived per model from OpenRouter data "
"(':free' suffix or zero pricing). Remove this key."
)
if "anonymous_enabled" in settings:
print(
"Warning: openrouter_integration.anonymous_enabled is "
"deprecated; use anonymous_enabled_paid and/or "
"anonymous_enabled_free instead. Both new flags have been "
"seeded from the legacy value for back-compat."
)
settings.setdefault(
"anonymous_enabled_paid", settings["anonymous_enabled"]
)
settings.setdefault(
"anonymous_enabled_free", settings["anonymous_enabled"]
)
if "anonymous_enabled" in settings:
print(
"Warning: openrouter_integration.anonymous_enabled is "
"deprecated; use anonymous_enabled_paid and/or "
"anonymous_enabled_free instead. Both new flags have been "
"seeded from the legacy value for back-compat."
)
settings.setdefault(
"anonymous_enabled_paid", settings["anonymous_enabled"]
)
settings.setdefault(
"anonymous_enabled_free", settings["anonymous_enabled"]
)
# Image generation + vision LLM emission are opt-in (issue L).
# OpenRouter's catalogue contains hundreds of image / vision
# capable models; auto-injecting all of them into every
# deployment would explode the model selector and surprise
# operators upgrading from prior versions. Default to False so
# admins must explicitly turn them on.
settings.setdefault("image_generation_enabled", False)
settings.setdefault("vision_enabled", False)
# Image generation + vision LLM emission are opt-in (issue L).
# OpenRouter's catalogue contains hundreds of image / vision
# capable models; auto-injecting all of them into every
# deployment would explode the model selector and surprise
# operators upgrading from prior versions. Default to False so
# admins must explicitly turn them on.
settings.setdefault("image_generation_enabled", False)
settings.setdefault("vision_enabled", False)
return settings
return settings
except Exception as e:
print(f"Warning: Failed to load OpenRouter integration settings: {e}")
return None
@ -415,7 +429,9 @@ def initialize_llm_router():
static YAML configs and dynamic OpenRouter models.
"""
all_configs = config.GLOBAL_LLM_CONFIGS
router_settings = load_router_settings()
# Reuse the router settings already parsed at Config construction instead
# of re-reading the YAML here.
router_settings = config.ROUTER_SETTINGS
if not all_configs:
print("Info: No global LLM configs found, Auto mode will not be available")
@ -439,7 +455,10 @@ def initialize_image_gen_router():
This should be called during application startup.
"""
image_gen_configs = load_global_image_gen_configs()
router_settings = load_image_gen_router_settings()
# Reuse the router settings already parsed at Config construction. The
# *configs* list is intentionally re-read from YAML (it must exclude the
# OpenRouter-injected dynamic models held in config.GLOBAL_IMAGE_GEN_CONFIGS).
router_settings = config.IMAGE_GEN_ROUTER_SETTINGS
if not image_gen_configs:
print(
@ -462,7 +481,10 @@ def initialize_image_gen_router():
def initialize_vision_llm_router():
vision_configs = load_global_vision_llm_configs()
router_settings = load_vision_llm_router_settings()
# Reuse the router settings already parsed at Config construction. The
# *configs* list is intentionally re-read from YAML (it must exclude the
# OpenRouter-injected dynamic models held in config.GLOBAL_VISION_LLM_CONFIGS).
router_settings = config.VISION_LLM_ROUTER_SETTINGS
if not vision_configs:
print(
@ -534,6 +556,39 @@ class Config:
os.getenv("CONNECTOR_INDEXING_LOCK_TTL_SECONDS", str(8 * 60 * 60))
)
# Celery beat scheduling intervals (format: "<number><unit>", e.g. "2m", "1h")
SCHEDULE_CHECKER_INTERVAL = os.getenv("SCHEDULE_CHECKER_INTERVAL", "2m")
STRIPE_RECONCILIATION_INTERVAL = os.getenv("STRIPE_RECONCILIATION_INTERVAL", "10m")
# File storage (local filesystem by default; Azure Blob optional)
FILE_STORAGE_BACKEND = os.getenv("FILE_STORAGE_BACKEND", "local").strip().lower()
AZURE_STORAGE_CONNECTION_STRING = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
AZURE_STORAGE_CONTAINER = os.getenv("AZURE_STORAGE_CONTAINER")
FILE_STORAGE_LOCAL_PATH = os.getenv(
"FILE_STORAGE_LOCAL_PATH", str(BASE_DIR / ".local_object_store")
)
# Daytona sandbox (code execution / filesystem sandbox)
DAYTONA_SANDBOX_ENABLED = (
os.getenv("DAYTONA_SANDBOX_ENABLED", "FALSE").upper() == "TRUE"
)
DAYTONA_API_KEY = os.getenv("DAYTONA_API_KEY", "")
DAYTONA_API_URL = os.getenv("DAYTONA_API_URL", "https://app.daytona.io/api")
DAYTONA_TARGET = os.getenv("DAYTONA_TARGET", "us")
DAYTONA_SNAPSHOT_ID = os.getenv("DAYTONA_SNAPSHOT_ID") or None
SANDBOX_FILES_DIR = os.getenv("SANDBOX_FILES_DIR", "sandbox_files")
# Agent cache (in-process LRU+TTL cache for built agents)
AGENT_CACHE_MAXSIZE = int(os.getenv("SURFSENSE_AGENT_CACHE_MAXSIZE", "256"))
AGENT_CACHE_TTL_SECONDS = float(
os.getenv("SURFSENSE_AGENT_CACHE_TTL_SECONDS", "1800")
)
# Connector discovery cache TTL
CONNECTOR_DISCOVERY_TTL_SECONDS = float(
os.getenv("SURFSENSE_CONNECTOR_DISCOVERY_TTL_SECONDS", "30")
)
# Platform web search (SearXNG)
SEARXNG_DEFAULT_HOST = os.getenv("SEARXNG_DEFAULT_HOST")

View file

@ -1,18 +1,12 @@
"""Environment-driven configuration for the file-storage module."""
"""Configuration for the file-storage module, sourced from the central Config."""
from __future__ import annotations
import os
from dataclasses import dataclass
from pathlib import Path
LOCAL_BACKEND = "local"
AZURE_BACKEND = "azure"
# surfsense_backend/ — two levels up from app/file_storage/settings.py
_BACKEND_ROOT = Path(__file__).resolve().parents[2]
_DEFAULT_LOCAL_ROOT = str(_BACKEND_ROOT / ".local_object_store")
@dataclass(frozen=True)
class StorageSettings:
@ -25,13 +19,15 @@ class StorageSettings:
def load_storage_settings() -> StorageSettings:
"""Read storage settings from the environment.
"""Resolve storage settings from the central ``Config`` singleton.
Defaults to the ``local`` backend so development needs no cloud creds.
"""
from app.config import config
return StorageSettings(
backend=os.getenv("FILE_STORAGE_BACKEND", LOCAL_BACKEND).strip().lower(),
azure_connection_string=os.getenv("AZURE_STORAGE_CONNECTION_STRING"),
azure_container=os.getenv("AZURE_STORAGE_CONTAINER"),
local_root=os.getenv("FILE_STORAGE_LOCAL_PATH", _DEFAULT_LOCAL_ROOT),
backend=config.FILE_STORAGE_BACKEND,
azure_connection_string=config.AZURE_STORAGE_CONNECTION_STRING,
azure_container=config.AZURE_STORAGE_CONTAINER,
local_root=config.FILE_STORAGE_LOCAL_PATH,
)

View file

@ -1,5 +1,4 @@
import asyncio
import os
import time
from datetime import datetime
from threading import Lock
@ -12,6 +11,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.future import select
from tavily import TavilyClient
from app.config import config
from app.db import (
NATIVE_TO_LEGACY_DOCTYPE,
Chunk,
@ -2856,9 +2856,7 @@ class ConnectorService:
# bounded and the alternative (cross-replica fanout) is not worth the
# coupling here.
_DISCOVERY_TTL_SECONDS: float = float(
os.getenv("SURFSENSE_CONNECTOR_DISCOVERY_TTL_SECONDS", "30")
)
_DISCOVERY_TTL_SECONDS: float = config.CONNECTOR_DISCOVERY_TTL_SECONDS
# Per-search-space caches. Keyed by ``search_space_id``; value is
# ``(expires_at_monotonic, payload)``. Plain dicts protected by a lock —