feat(auto_pin): add pure-function quality scoring module

2026-07-10 22:32:16 +02:00 · 2026-05-01 23:37:49 +05:30 · 2026-05-01 23:37:49 +05:30 · d9058b73f5
commit d9058b73f5
parent 421a4d7d08
2 changed files with 724 additions and 0 deletions
--- a/surfsense_backend/app/services/quality_score.py
+++ b/surfsense_backend/app/services/quality_score.py
@ -0,0 +1,382 @@
+"""Pure-function quality scoring for Auto (Fastest) model selection.
+
+This module is import-free of any service / request-path dependencies. All
+numbers are computed once during the OpenRouter refresh tick (or YAML load)
+and cached on the cfg dict, so the chat hot path only does a precomputed
+sort and a SHA256 pick.
+
+Score components (0-100 scale, higher is better):
+
+* ``static_score_or``    – derived from the bulk ``/api/v1/models`` payload
+  (provider prestige + ``created`` recency + pricing band + context window
+  + capabilities + narrow tiny/legacy slug penalty).
+* ``static_score_yaml``  – same shape for hand-curated YAML configs, plus
+  an operator-trust bonus (the operator deliberately picked this model).
+* ``aggregate_health``   – run on per-model ``/api/v1/models/{id}/endpoints``
+  responses; returns ``(gated, score_or_none)``.
+
+The blended ``quality_score`` (0.5 * static + 0.5 * health) is computed in
+:mod:`app.services.openrouter_integration_service` because that's the only
+caller that sees both halves.
+"""
+
+from __future__ import annotations
+
+# ---------------------------------------------------------------------------
+# Tunables (constants, not flags)
+# ---------------------------------------------------------------------------
+
+# Top-K size for deterministic spread inside the locked tier.
+_QUALITY_TOP_K: int = 5
+
+# Hard health gate: any cfg whose best non-null uptime is below this %
+# is excluded from Auto-mode selection entirely.
+_HEALTH_GATE_UPTIME_PCT: float = 90.0
+
+# Health/static blend weight when a cfg has fresh /endpoints data.
+_HEALTH_BLEND_WEIGHT: float = 0.5
+
+# Static bonus applied to YAML cfgs because the operator hand-picked them.
+_OPERATOR_TRUST_BONUS: int = 20
+
+# /endpoints fan-out is bounded per refresh tick.
+_HEALTH_ENRICH_TOP_N_PREMIUM: int = 50
+_HEALTH_ENRICH_TOP_N_FREE: int = 30
+_HEALTH_ENRICH_CONCURRENCY: int = 15
+_HEALTH_FETCH_TIMEOUT_SEC: float = 5.0
+
+# If at least this fraction of /endpoints fetches fail in a refresh cycle,
+# fall back to the previous cycle's last-good cache instead of writing
+# partial / stale health values.
+_HEALTH_FAIL_RATIO_FALLBACK: float = 0.25
+
+# Narrow tiny/legacy slug penalties only. We deliberately do NOT penalise
+# ``-nano`` / ``-mini`` / ``-lite`` because modern frontier models ship with
+# those naming patterns (``gpt-5-mini``, ``gemini-2.5-flash-lite`` etc.) and
+# blanket-penalising them suppresses high-quality picks.
+_TINY_LEGACY_PENALTY_PATTERNS: tuple[str, ...] = (
+    "-1b-",
+    "-1.2b-",
+    "-1.5b-",
+    "-2b-",
+    "-3b-",
+    "gemma-3n",
+    "lfm-",
+    "-base",
+    "-distill",
+    ":nitro",
+    "-preview",
+)
+
+
+# ---------------------------------------------------------------------------
+# Provider prestige tables
+# ---------------------------------------------------------------------------
+
+# OpenRouter-side provider slug (the prefix before ``/`` in the model id).
+# Tiers are coarse: frontier labs > strong open / fast-moving labs >
+# specialist labs > everything else.
+PROVIDER_PRESTIGE_OR: dict[str, int] = {
+    # Frontier labs
+    "openai": 50,
+    "anthropic": 50,
+    "google": 50,
+    "x-ai": 50,
+    # Strong open / fast-moving labs
+    "deepseek": 38,
+    "qwen": 38,
+    "meta-llama": 38,
+    "mistralai": 38,
+    "cohere": 38,
+    "nvidia": 38,
+    "alibaba": 38,
+    # Specialist / regional / strong second-tier
+    "microsoft": 28,
+    "01-ai": 28,
+    "minimax": 28,
+    "moonshot": 28,
+    "z-ai": 28,
+    "nousresearch": 28,
+    "ai21": 28,
+    "perplexity": 28,
+    # Smaller / niche providers
+    "liquid": 18,
+    "cognitivecomputations": 18,
+    "venice": 18,
+    "inflection": 18,
+}
+
+# YAML provider field (the upstream API shape the operator selected).
+PROVIDER_PRESTIGE_YAML: dict[str, int] = {
+    "AZURE_OPENAI": 50,
+    "OPENAI": 50,
+    "ANTHROPIC": 50,
+    "GOOGLE": 50,
+    "VERTEX_AI": 50,
+    "GEMINI": 50,
+    "XAI": 50,
+    "MISTRAL": 38,
+    "DEEPSEEK": 38,
+    "COHERE": 38,
+    "GROQ": 30,
+    "TOGETHER_AI": 28,
+    "FIREWORKS_AI": 28,
+    "PERPLEXITY": 28,
+    "MINIMAX": 28,
+    "BEDROCK": 28,
+    "OPENROUTER": 25,
+    "OLLAMA": 12,
+    "CUSTOM": 12,
+}
+
+
+# ---------------------------------------------------------------------------
+# Pure scoring helpers
+# ---------------------------------------------------------------------------
+
+# Calibrated against the live /api/v1/models bulk dump. Frontier models
+# released in the last ~6 months (GPT-5 family, Claude 4.x, Gemini 2.5,
+# Grok 4) score in the 18-20 band; mid-2024 models in the 8-12 band;
+# anything older trails off.
+_RECENCY_BANDS_DAYS: tuple[tuple[int, int], ...] = (
+    (60, 20),
+    (180, 16),
+    (365, 12),
+    (540, 9),
+    (730, 6),
+    (1095, 3),
+)
+
+
+def created_recency_signal(created_ts: int | None, now_ts: int) -> int:
+    """Return 0-20 based on how recently the model was published.
+
+    Uses the OpenRouter ``created`` Unix timestamp (or any equivalent for
+    YAML cfgs). Models without a usable timestamp get 0 (we don't penalise,
+    we just don't reward).
+    """
+    if created_ts is None or created_ts <= 0 or now_ts <= 0:
+        return 0
+    age_days = max(0, (now_ts - int(created_ts)) // 86_400)
+    for cutoff, score in _RECENCY_BANDS_DAYS:
+        if age_days <= cutoff:
+            return score
+    return 0
+
+
+def pricing_band(
+    prompt: str | float | int | None,
+    completion: str | float | int | None,
+) -> int:
+    """Return 0-15 based on combined prompt+completion cost per 1M tokens.
+
+    Higher-priced models tend to be the larger / more capable ones. A free
+    model returns 0 (we use other signals to rank free-vs-free instead).
+    Uncoercible inputs are treated as 0 rather than raising.
+    """
+
+    def _to_float(value) -> float:
+        if value is None:
+            return 0.0
+        try:
+            return float(value)
+        except (TypeError, ValueError):
+            return 0.0
+
+    p = _to_float(prompt)
+    c = _to_float(completion)
+    total_per_million = (p + c) * 1_000_000
+
+    if total_per_million >= 20.0:
+        return 15
+    if total_per_million >= 5.0:
+        return 12
+    if total_per_million >= 1.0:
+        return 9
+    if total_per_million >= 0.3:
+        return 6
+    if total_per_million >= 0.05:
+        return 4
+    if total_per_million > 0.0:
+        return 2
+    return 0
+
+
+def context_signal(ctx: int | None) -> int:
+    """Return 0-10 based on the model's context window."""
+    if not ctx or ctx <= 0:
+        return 0
+    if ctx >= 1_000_000:
+        return 10
+    if ctx >= 400_000:
+        return 8
+    if ctx >= 200_000:
+        return 6
+    if ctx >= 128_000:
+        return 4
+    if ctx >= 100_000:
+        return 2
+    return 0
+
+
+def capabilities_signal(supported_parameters: list[str] | None) -> int:
+    """Return 0-5 for capabilities that matter for our agent flows."""
+    if not supported_parameters:
+        return 0
+    params = set(supported_parameters)
+    score = 0
+    if "tools" in params:
+        score += 2
+    if "structured_outputs" in params or "response_format" in params:
+        score += 2
+    if "reasoning" in params or "include_reasoning" in params:
+        score += 1
+    return min(score, 5)
+
+
+def slug_penalty(model_id: str) -> int:
+    """Return a non-positive number; matches the narrow tiny/legacy patterns."""
+    if not model_id:
+        return 0
+    needle = model_id.lower()
+    for pattern in _TINY_LEGACY_PENALTY_PATTERNS:
+        if pattern in needle:
+            return -10
+    return 0
+
+
+def _provider_prestige_or(model_id: str) -> int:
+    if "/" not in model_id:
+        return 0
+    slug = model_id.split("/", 1)[0].lower()
+    return PROVIDER_PRESTIGE_OR.get(slug, 15)
+
+
+def static_score_or(or_model: dict, *, now_ts: int) -> int:
+    """Score a raw OpenRouter ``/api/v1/models`` entry on a 0-100 scale."""
+    model_id = str(or_model.get("id", ""))
+    pricing = or_model.get("pricing") or {}
+
+    score = (
+        _provider_prestige_or(model_id)
+        + created_recency_signal(or_model.get("created"), now_ts)
+        + pricing_band(pricing.get("prompt"), pricing.get("completion"))
+        + context_signal(or_model.get("context_length"))
+        + capabilities_signal(or_model.get("supported_parameters"))
+        + slug_penalty(model_id)
+    )
+    return max(0, min(100, int(score)))
+
+
+def static_score_yaml(cfg: dict) -> int:
+    """Score a YAML-curated cfg on a 0-100 scale.
+
+    Includes ``_OPERATOR_TRUST_BONUS`` because the operator deliberately
+    listed this model. Pricing / context fall through to lazy ``litellm``
+    lookups; failures are silent (we just lose those sub-points).
+    """
+    provider = str(cfg.get("provider", "")).upper()
+    base = PROVIDER_PRESTIGE_YAML.get(provider, 15)
+
+    model_name = cfg.get("model_name") or ""
+    litellm_params = cfg.get("litellm_params") or {}
+    lookup_name = (
+        litellm_params.get("base_model")
+        or litellm_params.get("model")
+        or model_name
+    )
+
+    ctx = 0
+    p_cost: float = 0.0
+    c_cost: float = 0.0
+    try:
+        from litellm import get_model_info  # lazy: avoid cold-import cost
+
+        info = get_model_info(lookup_name) or {}
+        ctx = int(info.get("max_input_tokens") or info.get("max_tokens") or 0)
+        p_cost = float(info.get("input_cost_per_token") or 0.0)
+        c_cost = float(info.get("output_cost_per_token") or 0.0)
+    except Exception:
+        # Unknown to litellm — that's fine for prestige+operator-bonus weighting.
+        pass
+
+    score = (
+        base
+        + _OPERATOR_TRUST_BONUS
+        + pricing_band(p_cost, c_cost)
+        + context_signal(ctx)
+        + slug_penalty(str(model_name))
+    )
+    return max(0, min(100, int(score)))
+
+
+# ---------------------------------------------------------------------------
+# Health aggregation
+# ---------------------------------------------------------------------------
+
+
+def _coerce_pct(value) -> float | None:
+    try:
+        if value is None:
+            return None
+        f = float(value)
+    except (TypeError, ValueError):
+        return None
+    if f < 0:
+        return None
+    # OpenRouter reports uptime as a 0-1 fraction; some endpoints surface it
+    # as a 0-100 percentage. Normalise.
+    return f * 100.0 if f <= 1.0 else f
+
+
+def _best_uptime(endpoints: list[dict]) -> tuple[float | None, str | None]:
+    """Pick the best (highest) non-null uptime across all endpoints.
+
+    Window preference: ``uptime_last_30m`` > ``uptime_last_1d`` >
+    ``uptime_last_5m``. Returns ``(uptime_pct, window_used)``.
+    """
+    for window in ("uptime_last_30m", "uptime_last_1d", "uptime_last_5m"):
+        values = [_coerce_pct(ep.get(window)) for ep in endpoints]
+        values = [v for v in values if v is not None]
+        if values:
+            return max(values), window
+    return None, None
+
+
+def aggregate_health(endpoints: list[dict]) -> tuple[bool, float | None]:
+    """Aggregate a model's per-endpoint health into ``(gated, score_or_none)``.
+
+    Hard gate (returns ``(True, None)``):
+      * ``endpoints`` empty,
+      * no endpoint reports ``status == 0`` (OK), or
+      * best non-null uptime below ``_HEALTH_GATE_UPTIME_PCT``.
+
+    On a pass, returns a 0-100 health score blending uptime, status, and a
+    freshness-weighted recent uptime sample.
+    """
+    if not endpoints:
+        return True, None
+
+    any_ok = any(int(ep.get("status", 1)) == 0 for ep in endpoints)
+    if not any_ok:
+        return True, None
+
+    best_uptime, _ = _best_uptime(endpoints)
+    if best_uptime is None or best_uptime < _HEALTH_GATE_UPTIME_PCT:
+        return True, None
+
+    # Freshness term: prefer 5m, fall through to 30m / 1d if 5m is missing.
+    freshness = None
+    for window in ("uptime_last_5m", "uptime_last_30m", "uptime_last_1d"):
+        values = [_coerce_pct(ep.get(window)) for ep in endpoints]
+        values = [v for v in values if v is not None]
+        if values:
+            freshness = max(values)
+            break
+
+    uptime_term = best_uptime
+    status_term = 100.0 if any_ok else 0.0
+    freshness_term = freshness if freshness is not None else best_uptime
+
+    score = 0.50 * uptime_term + 0.30 * status_term + 0.20 * freshness_term
+    return False, max(0.0, min(100.0, score))
--- a/surfsense_backend/tests/unit/services/test_quality_score.py
+++ b/surfsense_backend/tests/unit/services/test_quality_score.py
@ -0,0 +1,342 @@
+"""Unit tests for the Auto (Fastest) quality scoring module."""
+
+from __future__ import annotations
+
+import time
+
+import pytest
+
+from app.services.quality_score import (
+    _HEALTH_GATE_UPTIME_PCT,
+    _OPERATOR_TRUST_BONUS,
+    aggregate_health,
+    capabilities_signal,
+    context_signal,
+    created_recency_signal,
+    pricing_band,
+    slug_penalty,
+    static_score_or,
+    static_score_yaml,
+)
+
+pytestmark = pytest.mark.unit
+
+
+# ---------------------------------------------------------------------------
+# created_recency_signal
+# ---------------------------------------------------------------------------
+
+
+def test_created_recency_signal_recent_model_scores_high():
+    now = 1_750_000_000  # ~mid-2025
+    one_month_ago = now - (30 * 86_400)
+    assert created_recency_signal(one_month_ago, now) == 20
+
+
+def test_created_recency_signal_old_model_scores_zero():
+    now = 1_750_000_000
+    five_years_ago = now - (5 * 365 * 86_400)
+    assert created_recency_signal(five_years_ago, now) == 0
+
+
+def test_created_recency_signal_missing_timestamp_is_neutral():
+    now = 1_750_000_000
+    assert created_recency_signal(None, now) == 0
+    assert created_recency_signal(0, now) == 0
+
+
+def test_created_recency_signal_monotonic_decay():
+    now = 1_750_000_000
+    scores = [
+        created_recency_signal(now - days * 86_400, now)
+        for days in (30, 120, 300, 500, 700, 1000, 1500)
+    ]
+    assert scores == sorted(scores, reverse=True)
+
+
+# ---------------------------------------------------------------------------
+# pricing_band
+# ---------------------------------------------------------------------------
+
+
+def test_pricing_band_free_returns_zero():
+    assert pricing_band("0", "0") == 0
+    assert pricing_band(0.0, 0.0) == 0
+    assert pricing_band(None, None) == 0
+
+
+def test_pricing_band_handles_unparseable():
+    assert pricing_band("not-a-number", "0") == 0
+    assert pricing_band({}, []) == 0  # type: ignore[arg-type]
+
+
+def test_pricing_band_premium_tiers_increase_with_price():
+    cheap = pricing_band("0.0000003", "0.0000005")
+    mid = pricing_band("0.000003", "0.000015")
+    flagship = pricing_band("0.00001", "0.00005")
+    assert 0 < cheap < mid < flagship
+
+
+# ---------------------------------------------------------------------------
+# context_signal
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize(
+    "ctx,expected",
+    [
+        (1_500_000, 10),
+        (1_000_000, 10),
+        (500_000, 8),
+        (200_000, 6),
+        (128_000, 4),
+        (100_000, 2),
+        (50_000, 0),
+        (0, 0),
+        (None, 0),
+    ],
+)
+def test_context_signal_bands(ctx, expected):
+    assert context_signal(ctx) == expected
+
+
+# ---------------------------------------------------------------------------
+# capabilities_signal
+# ---------------------------------------------------------------------------
+
+
+def test_capabilities_signal_caps_at_five():
+    assert capabilities_signal(
+        ["tools", "structured_outputs", "reasoning", "include_reasoning"]
+    ) <= 5
+
+
+def test_capabilities_signal_tools_only():
+    assert capabilities_signal(["tools"]) == 2
+
+
+def test_capabilities_signal_empty():
+    assert capabilities_signal(None) == 0
+    assert capabilities_signal([]) == 0
+
+
+# ---------------------------------------------------------------------------
+# slug_penalty
+# ---------------------------------------------------------------------------
+
+
+def test_slug_penalty_demotes_tiny_models():
+    assert slug_penalty("meta-llama/llama-3.2-1b-instruct") < 0
+    assert slug_penalty("liquid/lfm-7b") < 0
+    assert slug_penalty("google/gemma-3n-e4b-it") < 0
+
+
+def test_slug_penalty_skips_capable_mini_nano_lite_models():
+    """Critical Option C+ regression: don't penalise modern frontier
+    models named ``-nano`` / ``-mini`` / ``-lite`` (gpt-5-mini, etc.)."""
+    assert slug_penalty("openai/gpt-5-mini") == 0
+    assert slug_penalty("openai/gpt-5-nano") == 0
+    assert slug_penalty("google/gemini-2.5-flash-lite") == 0
+    assert slug_penalty("anthropic/claude-haiku-4.5") == 0
+
+
+def test_slug_penalty_demotes_legacy_variants():
+    assert slug_penalty("openai/o1-preview") < 0
+    assert slug_penalty("foo/bar-base") < 0
+    assert slug_penalty("foo/bar-distill") < 0
+
+
+def test_slug_penalty_empty_input():
+    assert slug_penalty("") == 0
+
+
+# ---------------------------------------------------------------------------
+# static_score_or
+# ---------------------------------------------------------------------------
+
+
+def _or_model(
+    *,
+    model_id: str,
+    created: int | None = None,
+    prompt: str = "0.000003",
+    completion: str = "0.000015",
+    context: int = 200_000,
+    params: list[str] | None = None,
+) -> dict:
+    return {
+        "id": model_id,
+        "created": created,
+        "pricing": {"prompt": prompt, "completion": completion},
+        "context_length": context,
+        "supported_parameters": params if params is not None else ["tools"],
+    }
+
+
+def test_static_score_or_frontier_premium_beats_free_tiny():
+    now = 1_750_000_000
+    frontier = _or_model(
+        model_id="openai/gpt-5",
+        created=now - (60 * 86_400),
+        prompt="0.000005",
+        completion="0.000020",
+        context=400_000,
+        params=["tools", "structured_outputs", "reasoning"],
+    )
+    tiny_free = _or_model(
+        model_id="meta-llama/llama-3.2-1b-instruct:free",
+        created=now - (5 * 365 * 86_400),
+        prompt="0",
+        completion="0",
+        context=128_000,
+        params=["tools"],
+    )
+    assert static_score_or(frontier, now_ts=now) > static_score_or(
+        tiny_free, now_ts=now
+    )
+
+
+def test_static_score_or_score_is_clamped_0_to_100():
+    now = int(time.time())
+    score = static_score_or(_or_model(model_id="openai/gpt-4o"), now_ts=now)
+    assert 0 <= score <= 100
+
+
+def test_static_score_or_unknown_provider_is_neutral_not_zero():
+    now = int(time.time())
+    score = static_score_or(
+        _or_model(model_id="some-new-lab/some-model"),
+        now_ts=now,
+    )
+    assert score > 0
+
+
+def test_static_score_or_recent_release_beats_year_old_same_provider():
+    now = 1_750_000_000
+    fresh = _or_model(model_id="openai/gpt-5", created=now - (60 * 86_400))
+    old = _or_model(model_id="openai/gpt-4-turbo", created=now - (700 * 86_400))
+    assert static_score_or(fresh, now_ts=now) > static_score_or(old, now_ts=now)
+
+
+# ---------------------------------------------------------------------------
+# static_score_yaml
+# ---------------------------------------------------------------------------
+
+
+def test_static_score_yaml_includes_operator_bonus():
+    cfg = {
+        "provider": "AZURE_OPENAI",
+        "model_name": "gpt-5",
+        "litellm_params": {"base_model": "azure/gpt-5"},
+    }
+    score = static_score_yaml(cfg)
+    assert score >= _OPERATOR_TRUST_BONUS
+
+
+def test_static_score_yaml_unknown_provider_still_carries_bonus():
+    cfg = {
+        "provider": "SOME_NEW_PROVIDER",
+        "model_name": "weird-model",
+    }
+    score = static_score_yaml(cfg)
+    assert score >= _OPERATOR_TRUST_BONUS
+
+
+def test_static_score_yaml_clamped_0_to_100():
+    cfg = {
+        "provider": "AZURE_OPENAI",
+        "model_name": "gpt-5",
+        "litellm_params": {"base_model": "azure/gpt-5"},
+    }
+    assert 0 <= static_score_yaml(cfg) <= 100
+
+
+# ---------------------------------------------------------------------------
+# aggregate_health
+# ---------------------------------------------------------------------------
+
+
+def test_aggregate_health_gates_when_uptime_below_threshold():
+    """Live data showed Venice-routed cfgs at 53-68%; this guards that the
+    90% gate excludes them."""
+    venice_endpoints = [
+        {
+            "status": 0,
+            "uptime_last_30m": 0.55,
+            "uptime_last_1d": 0.60,
+            "uptime_last_5m": 0.50,
+        },
+        {
+            "status": 0,
+            "uptime_last_30m": 0.65,
+            "uptime_last_1d": 0.68,
+            "uptime_last_5m": 0.62,
+        },
+    ]
+    gated, score = aggregate_health(venice_endpoints)
+    assert gated is True
+    assert score is None
+
+
+def test_aggregate_health_passes_for_healthy_provider():
+    healthy = [
+        {
+            "status": 0,
+            "uptime_last_30m": 0.99,
+            "uptime_last_1d": 0.995,
+            "uptime_last_5m": 0.99,
+        },
+    ]
+    gated, score = aggregate_health(healthy)
+    assert gated is False
+    assert score is not None
+    assert score >= _HEALTH_GATE_UPTIME_PCT
+
+
+def test_aggregate_health_picks_best_endpoint_across_multiple():
+    """Multi-endpoint aggregation should reward the best non-null uptime."""
+    mixed = [
+        {"status": 0, "uptime_last_30m": 0.55},
+        {"status": 0, "uptime_last_30m": 0.97},  # this one passes the gate
+    ]
+    gated, score = aggregate_health(mixed)
+    assert gated is False
+    assert score is not None
+
+
+def test_aggregate_health_empty_endpoints_gated():
+    gated, score = aggregate_health([])
+    assert gated is True
+    assert score is None
+
+
+def test_aggregate_health_no_status_zero_gated():
+    """Even with high uptime, no OK status means the cfg is broken upstream."""
+    endpoints = [
+        {"status": 1, "uptime_last_30m": 0.99},
+        {"status": 2, "uptime_last_30m": 0.98},
+    ]
+    gated, score = aggregate_health(endpoints)
+    assert gated is True
+    assert score is None
+
+
+def test_aggregate_health_all_uptime_null_gated():
+    endpoints = [
+        {"status": 0, "uptime_last_30m": None, "uptime_last_1d": None},
+    ]
+    gated, score = aggregate_health(endpoints)
+    assert gated is True
+    assert score is None
+
+
+def test_aggregate_health_pct_normalisation():
+    """OpenRouter returns 0-1 fractions; some endpoints surface 0-100%
+    percentages. Both should reach the same gate decision."""
+    fraction_form = [{"status": 0, "uptime_last_30m": 0.95}]
+    pct_form = [{"status": 0, "uptime_last_30m": 95.0}]
+    g1, s1 = aggregate_health(fraction_form)
+    g2, s2 = aggregate_health(pct_form)
+    assert g1 == g2 == False  # noqa: E712
+    assert s1 is not None and s2 is not None
+    assert abs(s1 - s2) < 0.5