mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-27 19:25:15 +02:00
feat(auto_pin): add short-TTL healthy-status cache for preflight reuse
This commit is contained in:
parent
25ccc959cf
commit
14686cdf82
2 changed files with 110 additions and 0 deletions
|
|
@ -34,6 +34,7 @@ logger = logging.getLogger(__name__)
|
|||
AUTO_FASTEST_ID = 0
|
||||
AUTO_FASTEST_MODE = "auto_fastest"
|
||||
_RUNTIME_COOLDOWN_SECONDS = 600
|
||||
_HEALTHY_TTL_SECONDS = 45
|
||||
|
||||
# In-memory runtime cooldown map for configs that recently hard-failed at
|
||||
# provider runtime (e.g. OpenRouter 429 on a pinned free model). This keeps
|
||||
|
|
@ -41,6 +42,13 @@ _RUNTIME_COOLDOWN_SECONDS = 600
|
|||
_runtime_cooldown_until: dict[int, float] = {}
|
||||
_runtime_cooldown_lock = threading.Lock()
|
||||
|
||||
# Short-TTL "recently healthy" cache for configs that just passed a runtime
|
||||
# preflight ping. Lets back-to-back turns on the same model skip the probe
|
||||
# without eroding correctness — entries auto-expire and are wiped any time
|
||||
# the same config is cooled down or the OR catalogue is refreshed.
|
||||
_healthy_until: dict[int, float] = {}
|
||||
_healthy_lock = threading.Lock()
|
||||
|
||||
|
||||
@dataclass
|
||||
class AutoPinResolution:
|
||||
|
|
@ -89,6 +97,9 @@ def mark_runtime_cooldown(
|
|||
with _runtime_cooldown_lock:
|
||||
_runtime_cooldown_until[int(config_id)] = until
|
||||
_prune_runtime_cooldowns()
|
||||
# A cooled cfg can never be "recently healthy"; drop any stale credit so
|
||||
# the next turn that resolves to it (after cooldown) re-runs preflight.
|
||||
clear_healthy(int(config_id))
|
||||
logger.info(
|
||||
"auto_pin_runtime_cooled_down config_id=%s reason=%s cooldown_seconds=%s",
|
||||
config_id,
|
||||
|
|
@ -106,6 +117,52 @@ def clear_runtime_cooldown(config_id: int | None = None) -> None:
|
|||
_runtime_cooldown_until.pop(int(config_id), None)
|
||||
|
||||
|
||||
def _prune_healthy(now_ts: float | None = None) -> None:
|
||||
now = time.time() if now_ts is None else now_ts
|
||||
stale = [cid for cid, until in _healthy_until.items() if until <= now]
|
||||
for cid in stale:
|
||||
_healthy_until.pop(cid, None)
|
||||
|
||||
|
||||
def is_recently_healthy(config_id: int) -> bool:
|
||||
"""Return True if ``config_id`` passed preflight within the TTL window."""
|
||||
with _healthy_lock:
|
||||
_prune_healthy()
|
||||
return int(config_id) in _healthy_until
|
||||
|
||||
|
||||
def mark_healthy(
|
||||
config_id: int,
|
||||
*,
|
||||
ttl_seconds: int = _HEALTHY_TTL_SECONDS,
|
||||
) -> None:
|
||||
"""Record that ``config_id`` just passed a preflight probe.
|
||||
|
||||
Subsequent calls within ``ttl_seconds`` can skip the preflight ping. The
|
||||
healthy state is intentionally process-local — it's a latency hint, not a
|
||||
correctness primitive — so multi-worker drift is acceptable.
|
||||
"""
|
||||
if ttl_seconds <= 0:
|
||||
ttl_seconds = _HEALTHY_TTL_SECONDS
|
||||
until = time.time() + int(ttl_seconds)
|
||||
with _healthy_lock:
|
||||
_healthy_until[int(config_id)] = until
|
||||
_prune_healthy()
|
||||
|
||||
|
||||
def clear_healthy(config_id: int | None = None) -> None:
|
||||
"""Drop one (or all) healthy-cache entries.
|
||||
|
||||
Called from runtime cooldown and OR catalogue refresh so a freshly cooled
|
||||
or replaced config never carries stale "healthy" credit.
|
||||
"""
|
||||
with _healthy_lock:
|
||||
if config_id is None:
|
||||
_healthy_until.clear()
|
||||
return
|
||||
_healthy_until.pop(int(config_id), None)
|
||||
|
||||
|
||||
def _global_candidates() -> list[dict]:
|
||||
"""Return Auto-eligible global cfgs.
|
||||
|
||||
|
|
|
|||
|
|
@ -6,7 +6,10 @@ from types import SimpleNamespace
|
|||
import pytest
|
||||
|
||||
from app.services.auto_model_pin_service import (
|
||||
clear_healthy,
|
||||
clear_runtime_cooldown,
|
||||
is_recently_healthy,
|
||||
mark_healthy,
|
||||
mark_runtime_cooldown,
|
||||
resolve_or_get_pinned_llm_config_id,
|
||||
)
|
||||
|
|
@ -17,8 +20,10 @@ pytestmark = pytest.mark.unit
|
|||
@pytest.fixture(autouse=True)
|
||||
def _clear_runtime_cooldown_map():
|
||||
clear_runtime_cooldown()
|
||||
clear_healthy()
|
||||
yield
|
||||
clear_runtime_cooldown()
|
||||
clear_healthy()
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -866,3 +871,51 @@ async def test_auto_pin_repin_excludes_previous_config_on_runtime_retry(monkeypa
|
|||
)
|
||||
assert result.resolved_llm_config_id == -2
|
||||
assert result.from_existing_pin is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Healthy-status cache (preflight TTL companion)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_mark_healthy_then_is_recently_healthy_true_within_ttl():
|
||||
mark_healthy(-42, ttl_seconds=60)
|
||||
assert is_recently_healthy(-42) is True
|
||||
|
||||
|
||||
def test_healthy_expires_after_ttl(monkeypatch):
|
||||
import app.services.auto_model_pin_service as svc
|
||||
|
||||
real_time = svc.time.time
|
||||
base = real_time()
|
||||
|
||||
monkeypatch.setattr(svc.time, "time", lambda: base)
|
||||
mark_healthy(-7, ttl_seconds=10)
|
||||
assert is_recently_healthy(-7) is True
|
||||
|
||||
monkeypatch.setattr(svc.time, "time", lambda: base + 11)
|
||||
assert is_recently_healthy(-7) is False
|
||||
|
||||
|
||||
def test_mark_runtime_cooldown_invalidates_healthy_cache():
|
||||
mark_healthy(-9, ttl_seconds=60)
|
||||
assert is_recently_healthy(-9) is True
|
||||
|
||||
mark_runtime_cooldown(-9, reason="test", cooldown_seconds=60)
|
||||
assert is_recently_healthy(-9) is False
|
||||
|
||||
|
||||
def test_clear_healthy_removes_single_entry():
|
||||
mark_healthy(-11, ttl_seconds=60)
|
||||
mark_healthy(-12, ttl_seconds=60)
|
||||
clear_healthy(-11)
|
||||
assert is_recently_healthy(-11) is False
|
||||
assert is_recently_healthy(-12) is True
|
||||
|
||||
|
||||
def test_clear_healthy_no_args_drops_all_entries():
|
||||
mark_healthy(-21, ttl_seconds=60)
|
||||
mark_healthy(-22, ttl_seconds=60)
|
||||
clear_healthy()
|
||||
assert is_recently_healthy(-21) is False
|
||||
assert is_recently_healthy(-22) is False
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue