feat(auto_pin): quality-aware tier-locked selection with health gate

This commit is contained in:
Anish Sarkar 2026-05-01 23:38:53 +05:30
parent 1eedcaa551
commit 4bef75d298
2 changed files with 387 additions and 5 deletions

View file

@ -365,3 +365,339 @@ async def test_invalid_pinned_config_repairs_with_new_pin(monkeypatch):
assert result.resolved_llm_config_id == -2
assert session.thread.pinned_llm_config_id == -2
assert session.commit_count == 1
# ---------------------------------------------------------------------------
# Quality-aware pin selection (Auto Fastest upgrade)
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_health_gated_config_is_excluded_from_selection(monkeypatch):
"""A cfg flagged ``health_gated`` must never be picked even if it has
the highest score among eligible cfgs."""
from app.config import config
session = _FakeSession(_thread())
monkeypatch.setattr(
config,
"GLOBAL_LLM_CONFIGS",
[
{
"id": -1,
"provider": "OPENROUTER",
"model_name": "venice/dead-model",
"api_key": "k1",
"billing_tier": "free",
"auto_pin_tier": "C",
"quality_score": 95,
"health_gated": True,
},
{
"id": -2,
"provider": "OPENROUTER",
"model_name": "google/gemini-flash",
"api_key": "k1",
"billing_tier": "free",
"auto_pin_tier": "C",
"quality_score": 60,
"health_gated": False,
},
],
)
async def _blocked(*_args, **_kwargs):
return _FakeQuotaResult(allowed=False)
monkeypatch.setattr(
"app.services.auto_model_pin_service.TokenQuotaService.premium_get_usage",
_blocked,
)
result = await resolve_or_get_pinned_llm_config_id(
session,
thread_id=1,
search_space_id=10,
user_id="00000000-0000-0000-0000-000000000001",
selected_llm_config_id=0,
)
assert result.resolved_llm_config_id == -2
@pytest.mark.asyncio
async def test_tier_a_locks_first_premium_user_skips_or(monkeypatch):
"""Premium-eligible users with Tier A available should never spill to
Tier B even if a B cfg ranks higher by ``quality_score``."""
from app.config import config
session = _FakeSession(_thread())
monkeypatch.setattr(
config,
"GLOBAL_LLM_CONFIGS",
[
{
"id": -1,
"provider": "AZURE_OPENAI",
"model_name": "gpt-5",
"api_key": "k-yaml",
"billing_tier": "premium",
"auto_pin_tier": "A",
"quality_score": 70,
"health_gated": False,
},
{
"id": -2,
"provider": "OPENROUTER",
"model_name": "openai/gpt-5",
"api_key": "k-or",
"billing_tier": "premium",
"auto_pin_tier": "B",
"quality_score": 95,
"health_gated": False,
},
],
)
async def _allowed(*_args, **_kwargs):
return _FakeQuotaResult(allowed=True)
monkeypatch.setattr(
"app.services.auto_model_pin_service.TokenQuotaService.premium_get_usage",
_allowed,
)
result = await resolve_or_get_pinned_llm_config_id(
session,
thread_id=1,
search_space_id=10,
user_id="00000000-0000-0000-0000-000000000001",
selected_llm_config_id=0,
)
assert result.resolved_llm_config_id == -1
assert result.resolved_tier == "premium"
@pytest.mark.asyncio
async def test_tier_a_falls_through_to_or_when_a_pool_empty_for_user(monkeypatch):
"""Free-only user with no Tier A free cfg should pick from Tier C."""
from app.config import config
session = _FakeSession(_thread())
monkeypatch.setattr(
config,
"GLOBAL_LLM_CONFIGS",
[
{
"id": -1,
"provider": "AZURE_OPENAI",
"model_name": "gpt-5",
"api_key": "k-yaml",
"billing_tier": "premium",
"auto_pin_tier": "A",
"quality_score": 100,
"health_gated": False,
},
{
"id": -2,
"provider": "OPENROUTER",
"model_name": "google/gemini-flash:free",
"api_key": "k-or",
"billing_tier": "free",
"auto_pin_tier": "C",
"quality_score": 60,
"health_gated": False,
},
],
)
async def _blocked(*_args, **_kwargs):
return _FakeQuotaResult(allowed=False)
monkeypatch.setattr(
"app.services.auto_model_pin_service.TokenQuotaService.premium_get_usage",
_blocked,
)
result = await resolve_or_get_pinned_llm_config_id(
session,
thread_id=1,
search_space_id=10,
user_id="00000000-0000-0000-0000-000000000001",
selected_llm_config_id=0,
)
assert result.resolved_llm_config_id == -2
@pytest.mark.asyncio
async def test_top_k_picks_only_high_score_models(monkeypatch):
"""Different thread IDs should spread across top-K, never pick the
obvious low-quality cfg even when it sits in the candidate list."""
from app.config import config
high_score_cfgs = [
{
"id": -i,
"provider": "AZURE_OPENAI",
"model_name": f"gpt-x-{i}",
"api_key": "k",
"billing_tier": "premium",
"auto_pin_tier": "A",
"quality_score": 90,
"health_gated": False,
}
for i in range(1, 6) # 5 high-quality Tier A cfgs
]
low_score_trap = {
"id": -99,
"provider": "AZURE_OPENAI",
"model_name": "tiny-legacy",
"api_key": "k",
"billing_tier": "premium",
"auto_pin_tier": "A",
"quality_score": 10,
"health_gated": False,
}
monkeypatch.setattr(
config,
"GLOBAL_LLM_CONFIGS",
high_score_cfgs + [low_score_trap],
)
async def _allowed(*_args, **_kwargs):
return _FakeQuotaResult(allowed=True)
monkeypatch.setattr(
"app.services.auto_model_pin_service.TokenQuotaService.premium_get_usage",
_allowed,
)
high_score_ids = {c["id"] for c in high_score_cfgs}
seen = set()
for thread_id in range(1, 50):
session = _FakeSession(_thread())
result = await resolve_or_get_pinned_llm_config_id(
session,
thread_id=thread_id,
search_space_id=10,
user_id="00000000-0000-0000-0000-000000000001",
selected_llm_config_id=0,
)
seen.add(result.resolved_llm_config_id)
assert result.resolved_llm_config_id != -99, (
"low-score trap cfg should never be picked"
)
assert result.resolved_llm_config_id in high_score_ids
# Spread across at least a couple of top-K cfgs.
assert len(seen) > 1
@pytest.mark.asyncio
async def test_pin_reuse_survives_health_gating_for_existing_pin(monkeypatch):
"""An *already* pinned cfg that later flips to ``health_gated`` should
still not be reused gated cfgs are filtered out of the candidate
pool, which forces a repair to a healthy cfg.
This guards the no-silent-tier-switch invariant: we don't keep using
a known-broken model just because the thread happened to be pinned
to it before the gate fired."""
from app.config import config
session = _FakeSession(_thread(pinned_llm_config_id=-1))
monkeypatch.setattr(
config,
"GLOBAL_LLM_CONFIGS",
[
{
"id": -1,
"provider": "OPENROUTER",
"model_name": "venice/dead-model",
"api_key": "k",
"billing_tier": "premium",
"auto_pin_tier": "B",
"quality_score": 50,
"health_gated": True,
},
{
"id": -2,
"provider": "AZURE_OPENAI",
"model_name": "gpt-5",
"api_key": "k",
"billing_tier": "premium",
"auto_pin_tier": "A",
"quality_score": 90,
"health_gated": False,
},
],
)
async def _allowed(*_args, **_kwargs):
return _FakeQuotaResult(allowed=True)
monkeypatch.setattr(
"app.services.auto_model_pin_service.TokenQuotaService.premium_get_usage",
_allowed,
)
result = await resolve_or_get_pinned_llm_config_id(
session,
thread_id=1,
search_space_id=10,
user_id="00000000-0000-0000-0000-000000000001",
selected_llm_config_id=0,
)
assert result.resolved_llm_config_id == -2
assert result.from_existing_pin is False
@pytest.mark.asyncio
async def test_pin_reuse_regression_existing_healthy_pin(monkeypatch):
"""Existing pin reuse must short-circuit the new tier/score logic."""
from app.config import config
session = _FakeSession(_thread(pinned_llm_config_id=-1))
monkeypatch.setattr(
config,
"GLOBAL_LLM_CONFIGS",
[
{
"id": -1,
"provider": "AZURE_OPENAI",
"model_name": "gpt-5",
"api_key": "k",
"billing_tier": "premium",
"auto_pin_tier": "A",
"quality_score": 50, # lower than -2
"health_gated": False,
},
{
"id": -2,
"provider": "AZURE_OPENAI",
"model_name": "gpt-5-pro",
"api_key": "k",
"billing_tier": "premium",
"auto_pin_tier": "A",
"quality_score": 99,
"health_gated": False,
},
],
)
async def _must_not_call(*_args, **_kwargs):
raise AssertionError("premium_get_usage should not run on pin reuse")
monkeypatch.setattr(
"app.services.auto_model_pin_service.TokenQuotaService.premium_get_usage",
_must_not_call,
)
result = await resolve_or_get_pinned_llm_config_id(
session,
thread_id=1,
search_space_id=10,
user_id="00000000-0000-0000-0000-000000000001",
selected_llm_config_id=0,
)
assert result.resolved_llm_config_id == -1
assert result.from_existing_pin is True
assert session.commit_count == 0