diff --git a/surfsense_backend/app/services/auto_model_pin_service.py b/surfsense_backend/app/services/auto_model_pin_service.py
index f6a223866..b2acd6f56 100644
--- a/surfsense_backend/app/services/auto_model_pin_service.py
+++ b/surfsense_backend/app/services/auto_model_pin_service.py
@@ -34,6 +34,7 @@ logger = logging.getLogger(__name__)
 AUTO_FASTEST_ID = 0
 AUTO_FASTEST_MODE = "auto_fastest"
 _RUNTIME_COOLDOWN_SECONDS = 600
+_HEALTHY_TTL_SECONDS = 45
 
 # In-memory runtime cooldown map for configs that recently hard-failed at
 # provider runtime (e.g. OpenRouter 429 on a pinned free model). This keeps
@@ -41,6 +42,13 @@ _RUNTIME_COOLDOWN_SECONDS = 600
 _runtime_cooldown_until: dict[int, float] = {}
 _runtime_cooldown_lock = threading.Lock()
 
+# Short-TTL "recently healthy" cache for configs that just passed a runtime
+# preflight ping. Lets back-to-back turns on the same model skip the probe
+# without eroding correctness — entries auto-expire and are wiped any time
+# the same config is cooled down or the OR catalogue is refreshed.
+_healthy_until: dict[int, float] = {}
+_healthy_lock = threading.Lock()
+
 
 @dataclass
 class AutoPinResolution:
@@ -89,6 +97,9 @@ def mark_runtime_cooldown(
     with _runtime_cooldown_lock:
         _runtime_cooldown_until[int(config_id)] = until
         _prune_runtime_cooldowns()
+    # A cooled cfg can never be "recently healthy"; drop any stale credit so
+    # the next turn that resolves to it (after cooldown) re-runs preflight.
+    clear_healthy(int(config_id))
     logger.info(
         "auto_pin_runtime_cooled_down config_id=%s reason=%s cooldown_seconds=%s",
         config_id,
@@ -106,6 +117,52 @@ def clear_runtime_cooldown(config_id: int | None = None) -> None:
         _runtime_cooldown_until.pop(int(config_id), None)
 
 
+def _prune_healthy(now_ts: float | None = None) -> None:
+    now = time.time() if now_ts is None else now_ts
+    stale = [cid for cid, until in _healthy_until.items() if until <= now]
+    for cid in stale:
+        _healthy_until.pop(cid, None)
+
+
+def is_recently_healthy(config_id: int) -> bool:
+    """Return True if ``config_id`` passed preflight within the TTL window."""
+    with _healthy_lock:
+        _prune_healthy()
+        return int(config_id) in _healthy_until
+
+
+def mark_healthy(
+    config_id: int,
+    *,
+    ttl_seconds: int = _HEALTHY_TTL_SECONDS,
+) -> None:
+    """Record that ``config_id`` just passed a preflight probe.
+
+    Subsequent calls within ``ttl_seconds`` can skip the preflight ping. The
+    healthy state is intentionally process-local — it's a latency hint, not a
+    correctness primitive — so multi-worker drift is acceptable.
+    """
+    if ttl_seconds <= 0:
+        ttl_seconds = _HEALTHY_TTL_SECONDS
+    until = time.time() + int(ttl_seconds)
+    with _healthy_lock:
+        _healthy_until[int(config_id)] = until
+        _prune_healthy()
+
+
+def clear_healthy(config_id: int | None = None) -> None:
+    """Drop one (or all) healthy-cache entries.
+
+    Called from runtime cooldown and OR catalogue refresh so a freshly cooled
+    or replaced config never carries stale "healthy" credit.
+    """
+    with _healthy_lock:
+        if config_id is None:
+            _healthy_until.clear()
+            return
+        _healthy_until.pop(int(config_id), None)
+
+
 def _global_candidates() -> list[dict]:
     """Return Auto-eligible global cfgs.
 
diff --git a/surfsense_backend/tests/unit/services/test_auto_model_pin_service.py b/surfsense_backend/tests/unit/services/test_auto_model_pin_service.py
index 8696a8829..d333f0b7a 100644
--- a/surfsense_backend/tests/unit/services/test_auto_model_pin_service.py
+++ b/surfsense_backend/tests/unit/services/test_auto_model_pin_service.py
@@ -6,7 +6,10 @@ from types import SimpleNamespace
 import pytest
 
 from app.services.auto_model_pin_service import (
+    clear_healthy,
     clear_runtime_cooldown,
+    is_recently_healthy,
+    mark_healthy,
     mark_runtime_cooldown,
     resolve_or_get_pinned_llm_config_id,
 )
@@ -17,8 +20,10 @@ pytestmark = pytest.mark.unit
 @pytest.fixture(autouse=True)
 def _clear_runtime_cooldown_map():
     clear_runtime_cooldown()
+    clear_healthy()
     yield
     clear_runtime_cooldown()
+    clear_healthy()
 
 
 @dataclass
@@ -866,3 +871,51 @@ async def test_auto_pin_repin_excludes_previous_config_on_runtime_retry(monkeypa
     )
     assert result.resolved_llm_config_id == -2
     assert result.from_existing_pin is False
+
+
+# ---------------------------------------------------------------------------
+# Healthy-status cache (preflight TTL companion)
+# ---------------------------------------------------------------------------
+
+
+def test_mark_healthy_then_is_recently_healthy_true_within_ttl():
+    mark_healthy(-42, ttl_seconds=60)
+    assert is_recently_healthy(-42) is True
+
+
+def test_healthy_expires_after_ttl(monkeypatch):
+    import app.services.auto_model_pin_service as svc
+
+    real_time = svc.time.time
+    base = real_time()
+
+    monkeypatch.setattr(svc.time, "time", lambda: base)
+    mark_healthy(-7, ttl_seconds=10)
+    assert is_recently_healthy(-7) is True
+
+    monkeypatch.setattr(svc.time, "time", lambda: base + 11)
+    assert is_recently_healthy(-7) is False
+
+
+def test_mark_runtime_cooldown_invalidates_healthy_cache():
+    mark_healthy(-9, ttl_seconds=60)
+    assert is_recently_healthy(-9) is True
+
+    mark_runtime_cooldown(-9, reason="test", cooldown_seconds=60)
+    assert is_recently_healthy(-9) is False
+
+
+def test_clear_healthy_removes_single_entry():
+    mark_healthy(-11, ttl_seconds=60)
+    mark_healthy(-12, ttl_seconds=60)
+    clear_healthy(-11)
+    assert is_recently_healthy(-11) is False
+    assert is_recently_healthy(-12) is True
+
+
+def test_clear_healthy_no_args_drops_all_entries():
+    mark_healthy(-21, ttl_seconds=60)
+    mark_healthy(-22, ttl_seconds=60)
+    clear_healthy()
+    assert is_recently_healthy(-21) is False
+    assert is_recently_healthy(-22) is False