mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-28 21:49:40 +02:00
feat(index-cache): add settings, eligibility, and config flags
This commit is contained in:
parent
ad6da7c6af
commit
daccd304ee
4 changed files with 76 additions and 0 deletions
|
|
@ -328,6 +328,20 @@ ETL_CACHE_ENABLED=false
|
||||||
# ETL_CACHE_STORAGE_CONTAINER=surfsense-etl-cache
|
# ETL_CACHE_STORAGE_CONTAINER=surfsense-etl-cache
|
||||||
# ETL_CACHE_STORAGE_LOCAL_PATH=/var/lib/surfsense/etl-cache
|
# ETL_CACHE_STORAGE_LOCAL_PATH=/var/lib/surfsense/etl-cache
|
||||||
|
|
||||||
|
# Index Cache
|
||||||
|
# Reuse chunk+embedding output for identical markdown across workspaces (skips
|
||||||
|
# re-chunking and re-embedding). Blobs share the ETL_CACHE_STORAGE_* backend.
|
||||||
|
# Off by default.
|
||||||
|
INDEX_CACHE_ENABLED=false
|
||||||
|
# Bump to invalidate all cached embedding sets after a chunker change.
|
||||||
|
# INDEX_CACHE_CHUNKER_VERSION=1
|
||||||
|
# Prune entries unused for this many days.
|
||||||
|
# INDEX_CACHE_TTL_DAYS=90
|
||||||
|
# Soft cap on total cached embeddings; coldest entries are evicted past it.
|
||||||
|
# INDEX_CACHE_MAX_TOTAL_MB=5120
|
||||||
|
# Rows deleted per eviction pass.
|
||||||
|
# INDEX_CACHE_EVICTION_BATCH=500
|
||||||
|
|
||||||
# Daytona Sandbox (isolated code execution)
|
# Daytona Sandbox (isolated code execution)
|
||||||
# DAYTONA_SANDBOX_ENABLED=FALSE
|
# DAYTONA_SANDBOX_ENABLED=FALSE
|
||||||
# DAYTONA_API_KEY=your-daytona-api-key
|
# DAYTONA_API_KEY=your-daytona-api-key
|
||||||
|
|
|
||||||
|
|
@ -964,6 +964,17 @@ class Config:
|
||||||
ETL_CACHE_STORAGE_CONTAINER = os.getenv("ETL_CACHE_STORAGE_CONTAINER")
|
ETL_CACHE_STORAGE_CONTAINER = os.getenv("ETL_CACHE_STORAGE_CONTAINER")
|
||||||
ETL_CACHE_STORAGE_LOCAL_PATH = os.getenv("ETL_CACHE_STORAGE_LOCAL_PATH")
|
ETL_CACHE_STORAGE_LOCAL_PATH = os.getenv("ETL_CACHE_STORAGE_LOCAL_PATH")
|
||||||
|
|
||||||
|
# Index cache: reuse chunk+embedding output for identical markdown across
|
||||||
|
# workspaces. Blobs share the ETL_CACHE_STORAGE_* backend.
|
||||||
|
INDEX_CACHE_ENABLED = (
|
||||||
|
os.getenv("INDEX_CACHE_ENABLED", "false").strip().lower() == "true"
|
||||||
|
)
|
||||||
|
# Bump to invalidate every cached embedding set after a chunker change.
|
||||||
|
INDEX_CACHE_CHUNKER_VERSION = int(os.getenv("INDEX_CACHE_CHUNKER_VERSION", "1"))
|
||||||
|
INDEX_CACHE_TTL_DAYS = int(os.getenv("INDEX_CACHE_TTL_DAYS", "90"))
|
||||||
|
INDEX_CACHE_MAX_TOTAL_MB = int(os.getenv("INDEX_CACHE_MAX_TOTAL_MB", "5120"))
|
||||||
|
INDEX_CACHE_EVICTION_BATCH = int(os.getenv("INDEX_CACHE_EVICTION_BATCH", "500"))
|
||||||
|
|
||||||
# Proxy provider selection. Maps to a ProxyProvider implementation registered
|
# Proxy provider selection. Maps to a ProxyProvider implementation registered
|
||||||
# in app/utils/proxy/registry.py. Add new vendors there and switch via this var.
|
# in app/utils/proxy/registry.py. Add new vendors there and switch via this var.
|
||||||
PROXY_PROVIDER = os.getenv("PROXY_PROVIDER", "anonymous_proxies")
|
PROXY_PROVIDER = os.getenv("PROXY_PROVIDER", "anonymous_proxies")
|
||||||
|
|
|
||||||
21
surfsense_backend/app/indexing_pipeline/cache/eligibility.py
vendored
Normal file
21
surfsense_backend/app/indexing_pipeline/cache/eligibility.py
vendored
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
"""Gating rule: may this document be served from / written to the index cache?"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
|
||||||
|
def is_index_cacheable(
|
||||||
|
*,
|
||||||
|
cache_enabled: bool,
|
||||||
|
embedding_model: str | None,
|
||||||
|
embedding_dim: int | None,
|
||||||
|
) -> bool:
|
||||||
|
"""Cache only when a concrete embedding model and dimension are configured.
|
||||||
|
|
||||||
|
Without a model there is nothing to key against, and without a dimension the
|
||||||
|
blob's integrity guard cannot run -- both bypass the cache.
|
||||||
|
"""
|
||||||
|
if not cache_enabled:
|
||||||
|
return False
|
||||||
|
if not embedding_model:
|
||||||
|
return False
|
||||||
|
return bool(embedding_dim)
|
||||||
30
surfsense_backend/app/indexing_pipeline/cache/settings.py
vendored
Normal file
30
surfsense_backend/app/indexing_pipeline/cache/settings.py
vendored
Normal file
|
|
@ -0,0 +1,30 @@
|
||||||
|
"""Index-cache configuration resolved from the central ``Config``.
|
||||||
|
|
||||||
|
The blob backend is intentionally not configured here: it is shared with the ETL
|
||||||
|
parse cache (see ``ETL_CACHE_STORAGE_*``).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class IndexCacheSettings:
|
||||||
|
enabled: bool
|
||||||
|
chunker_version: int
|
||||||
|
ttl_days: int
|
||||||
|
max_total_bytes: int
|
||||||
|
eviction_batch: int
|
||||||
|
|
||||||
|
|
||||||
|
def load_index_cache_settings() -> IndexCacheSettings:
|
||||||
|
from app.config import config
|
||||||
|
|
||||||
|
return IndexCacheSettings(
|
||||||
|
enabled=config.INDEX_CACHE_ENABLED,
|
||||||
|
chunker_version=config.INDEX_CACHE_CHUNKER_VERSION,
|
||||||
|
ttl_days=config.INDEX_CACHE_TTL_DAYS,
|
||||||
|
max_total_bytes=config.INDEX_CACHE_MAX_TOTAL_MB * 1024 * 1024,
|
||||||
|
eviction_batch=config.INDEX_CACHE_EVICTION_BATCH,
|
||||||
|
)
|
||||||
Loading…
Add table
Add a link
Reference in a new issue