feat: make model pricing source configurable (models.dev + DigitalOcean) (#971)

2026-07-02 15:51:02 +02:00 · 2026-06-24 10:14:12 -07:00 · 2026-06-24 10:14:12 -07:00 · 558df0307c
commit 558df0307c
parent 5cc4c4ee77
9 changed files with 687 additions and 48 deletions
--- a/cli/planoai/obs/pricing.py
+++ b/cli/planoai/obs/pricing.py
@ -1,7 +1,8 @@
-"""DigitalOcean Gradient pricing catalog for the obs console.
+"""Model pricing catalog for the obs console.
-Ported loosely from ``crates/brightstaff/src/router/model_metrics.rs::fetch_do_pricing``.
+Mirrors ``crates/brightstaff/src/router/model_metrics.rs``. The source is
-Single-source: one fetch at startup, cached for the life of the process.
+configurable: ``digitalocean`` (DO GenAI catalog) or ``models.dev``. A single
 fetch at startup is cached for the life of the process.
 """
 from __future__ import annotations
@ -14,7 +15,18 @@ from typing import Any
 import requests
-DEFAULT_PRICING_URL = "https://api.digitalocean.com/v2/gen-ai/models/catalog"
+DO_PRICING_URL = "https://api.digitalocean.com/v2/gen-ai/models/catalog"
 MODELS_DEV_URL = "https://models.dev/api.json"
 # Backwards-compatible default (DigitalOcean) used when no provider is given.
 DEFAULT_PRICING_URL = DO_PRICING_URL
 DEFAULT_PRICING_PROVIDER = "digitalocean"
 _DEFAULT_URLS = {
    "digitalocean": DO_PRICING_URL,
    "models.dev": MODELS_DEV_URL,
 }
 FETCH_TIMEOUT_SECS = 5.0
@ -51,36 +63,52 @@ class PricingCatalog:
            return list(self._prices.keys())[:n]
    @classmethod
-    def fetch(cls, url: str = DEFAULT_PRICING_URL) -> "PricingCatalog":
+    def fetch(
-        """Fetch pricing from DO's catalog endpoint. On failure, returns an
+        cls,
        provider: str = DEFAULT_PRICING_PROVIDER,
        url: str | None = None,
    ) -> "PricingCatalog":
        """Fetch pricing from the configured catalog. On failure, returns an
        empty catalog (cost column will be blank).
-        The catalog endpoint is public — no auth required, no signup — so
+        ``provider`` selects the parser/default URL: ``digitalocean`` or
-        ``planoai obs`` gets cost data on first run out of the box.
+        ``models.dev``. Both catalog endpoints are public — no auth required —
        so ``planoai obs`` gets cost data on first run out of the box.
        """
        provider = (provider or DEFAULT_PRICING_PROVIDER).strip().lower()
        resolved_url = url or _DEFAULT_URLS.get(provider, DO_PRICING_URL)
        try:
-            resp = requests.get(url, timeout=FETCH_TIMEOUT_SECS)
+            resp = requests.get(resolved_url, timeout=FETCH_TIMEOUT_SECS)
            resp.raise_for_status()
            data = resp.json()
        except Exception as exc:  # noqa: BLE001 — best-effort; never fatal
            logger.warning(
-                "DO pricing fetch failed: %s; cost column will be blank.",
+                "%s pricing fetch failed: %s; cost column will be blank.",
                provider,
                exc,
            )
            return cls()
-        prices = _parse_do_pricing(data)
+        if provider == "models.dev":
            prices = _parse_models_dev_pricing(data)
        else:
            prices = _parse_do_pricing(data)
        if not prices:
-            # Dump the first entry's raw shape so we can see which fields DO
+            # Dump a sample of the raw shape so we can see which fields the
-            # actually returned — helps when the catalog adds new fields or
+            # catalog returned — helps when it adds new fields or the response
-            # the response doesn't match our parser.
+            # doesn't match our parser.
            import json as _json
-            sample_items = _coerce_items(data)
+            if provider == "models.dev" and isinstance(data, dict):
-            sample = sample_items[0] if sample_items else data
+                sample = next(iter(data.values()), data)
            else:
                sample_items = _coerce_items(data)
                sample = sample_items[0] if sample_items else data
            logger.warning(
-                "DO pricing response had no parseable entries; cost column "
+                "%s pricing response had no parseable entries; cost column "
                "will be blank. Sample entry: %s",
                provider,
                _json.dumps(sample, default=str)[:400],
            )
        return cls(prices)
@ -278,6 +306,75 @@ def _parse_do_pricing(data: Any) -> dict[str, ModelPrice]:
    return prices
 def _parse_models_dev_pricing(data: Any) -> dict[str, ModelPrice]:
    """Parse a models.dev ``api.json`` response into a ModelPrice map.
    models.dev shape (top-level object keyed by provider id)::
        {
          "anthropic": {
            "models": {
              "claude-opus-4-5": {
                "cost": {"input": 5, "output": 25, "cache_read": 0.5}
              }
            }
          },
          ...
        }
    ``cost.*`` values are USD per *million* tokens, so we divide by 1e6 to get a
    per-token rate. First-party providers use bare model keys, so we register
    both ``provider/model`` (matching Plano's routing names) and the bare model
    id as a fallback.
    """
    prices: dict[str, ModelPrice] = {}
    if not isinstance(data, dict):
        return prices
    for provider_id, provider in data.items():
        if not isinstance(provider, dict):
            continue
        models = provider.get("models")
        if not isinstance(models, dict):
            continue
        for model_key, model in models.items():
            if not isinstance(model, dict):
                continue
            cost = model.get("cost")
            if not isinstance(cost, dict):
                continue
            input_pm = _as_float(cost.get("input"))
            output_pm = _as_float(cost.get("output"))
            if input_pm is None or output_pm is None:
                continue
            # Skip 0-rate entries so cost falls back to `—` rather than $0.0000.
            if input_pm == 0 and output_pm == 0:
                continue
            cached_pm = _as_float(cost.get("cache_read"))
            price = ModelPrice(
                input_per_token_usd=input_pm / 1_000_000,
                output_per_token_usd=output_pm / 1_000_000,
                cached_input_per_token_usd=(
                    cached_pm / 1_000_000 if cached_pm is not None else None
                ),
            )
            composite = f"{provider_id}/{model_key}"
            prices[composite] = price
            prices.setdefault(composite.lower(), price)
            prices.setdefault(str(model_key), price)
            prices.setdefault(str(model_key).lower(), price)
    return prices
 def _as_float(value: Any) -> float | None:
    if value is None:
        return None
    try:
        return float(value)
    except (TypeError, ValueError):
        return None
 def _coerce_items(data: Any) -> list[dict]:
    if isinstance(data, list):
        return [x for x in data if isinstance(x, dict)]
--- a/cli/planoai/obs_cmd.py
+++ b/cli/planoai/obs_cmd.py
@ -2,9 +2,12 @@
 from __future__ import annotations
 import logging
 import os
 import time
 import rich_click as click
 import yaml
 from rich.console import Console
 from rich.live import Live
@ -15,8 +18,50 @@ from planoai.obs.collector import (
    LLMCallStore,
    ObsCollector,
 )
-from planoai.obs.pricing import PricingCatalog
+from planoai.obs.pricing import DEFAULT_PRICING_PROVIDER, PricingCatalog
 from planoai.obs.render import render
 from planoai.utils import find_config_file
 logger = logging.getLogger(__name__)
 def _resolve_pricing_source(
    config_file: str | None,
    provider_override: str | None,
    url_override: str | None,
 ) -> tuple[str, str | None]:
    """Pick the cost pricing source.
    Precedence: explicit CLI overrides > the first ``type: cost`` entry in
    ``model_metrics_sources`` from the Plano config > the DigitalOcean default.
    """
    provider = DEFAULT_PRICING_PROVIDER
    url: str | None = None
    config_path = find_config_file(file=config_file)
    if config_path and os.path.exists(config_path):
        try:
            with open(config_path, "r") as f:
                config = yaml.safe_load(f) or {}
            sources = config.get("model_metrics_sources") or []
            for source in sources:
                if isinstance(source, dict) and source.get("type") == "cost":
                    if source.get("provider"):
                        provider = str(source["provider"])
                    if source.get("url"):
                        url = str(source["url"])
                    break
        except Exception as exc:  # noqa: BLE001 — config is optional for obs
            logger.warning(
                "could not read pricing source from %s: %s", config_path, exc
            )
    if provider_override:
        provider = provider_override
    if url_override:
        url = url_override
    return provider, url
@click.command(name="obs", help="Live observability console for Plano LLM traffic.")
@ -48,13 +93,42 @@ from planoai.obs.render import render
    show_default=True,
    help="TUI refresh interval.",
 )
-def obs(port: int, host: str, capacity: int, refresh_ms: int) -> None:
+@click.option(
    "--config",
    "config_file",
    type=str,
    default=None,
    help="Path to the Plano config to read the pricing source from "
    "(defaults to ./config.yaml or ./plano_config.yaml).",
 )
@click.option(
    "--pricing-provider",
    type=click.Choice(["digitalocean", "models.dev"]),
    default=None,
    help="Override the cost pricing provider (otherwise read from config).",
 )
@click.option(
    "--pricing-url",
    type=str,
    default=None,
    help="Override the pricing catalog URL (otherwise read from config / provider default).",
 )
 def obs(
    port: int,
    host: str,
    capacity: int,
    refresh_ms: int,
    config_file: str | None,
    pricing_provider: str | None,
    pricing_url: str | None,
 ) -> None:
    console = Console()
    provider, url = _resolve_pricing_source(config_file, pricing_provider, pricing_url)
    console.print(
-        f"[bold {PLANO_COLOR}]planoai obs[/] — loading DO pricing catalog...",
+        f"[bold {PLANO_COLOR}]planoai obs[/] — loading {provider} pricing catalog...",
        end="",
    )
-    pricing = PricingCatalog.fetch()
+    pricing = PricingCatalog.fetch(provider=provider, url=url)
    if len(pricing):
        sample = ", ".join(pricing.sample_models(3))
        console.print(
@ -63,7 +137,7 @@ def obs(port: int, host: str, capacity: int, refresh_ms: int) -> None:
    else:
        console.print(
            " [yellow]no pricing loaded[/] — "
-            "[dim]cost column will be blank (DO catalog unreachable)[/]"
+            f"[dim]cost column will be blank ({provider} catalog unreachable)[/]"
        )
    store = LLMCallStore(capacity=capacity)
--- a/cli/test/test_obs_pricing.py
+++ b/cli/test/test_obs_pricing.py
@ -144,3 +144,68 @@ def test_parse_do_catalog_divides_large_values_as_per_million():
    prices = _parse_do_pricing(sample)
    assert prices["mystery-model"].input_per_token_usd == 5.0 / 1_000_000
    assert prices["mystery-model"].output_per_token_usd == 15.0 / 1_000_000
 _MODELS_DEV_SAMPLE = {
    "anthropic": {
        "id": "anthropic",
        "models": {
            "claude-opus-4-5": {
                "id": "claude-opus-4-5",
                "cost": {"input": 5, "output": 25, "cache_read": 0.5},
            }
        },
    },
    "groq": {
        "id": "groq",
        "models": {
            "llama-3.3-70b-versatile": {
                "id": "llama-3.3-70b-versatile",
                "cost": {"input": 0.59, "output": 0.79},
            },
            # No cost block → skipped.
            "whisper-large-v3-turbo": {"id": "whisper-large-v3-turbo"},
        },
    },
 }
 def test_parse_models_dev_composes_provider_keys_and_per_token_rates():
    from planoai.obs.pricing import _parse_models_dev_pricing
    prices = _parse_models_dev_pricing(_MODELS_DEV_SAMPLE)
    # models.dev cost values are per-million → divided by 1e6.
    opus = prices["anthropic/claude-opus-4-5"]
    assert opus.input_per_token_usd == 5 / 1_000_000
    assert opus.output_per_token_usd == 25 / 1_000_000
    assert opus.cached_input_per_token_usd == 0.5 / 1_000_000
    # Composite provider/model keys match Plano's routing names.
    assert "groq/llama-3.3-70b-versatile" in prices
    # Bare model id registered as a fallback.
    assert "llama-3.3-70b-versatile" in prices
    # Models without a cost block are skipped.
    assert "groq/whisper-large-v3-turbo" not in prices
 def test_models_dev_catalog_cost_computation():
    from planoai.obs.pricing import PricingCatalog, _parse_models_dev_pricing
    catalog = PricingCatalog(_parse_models_dev_pricing(_MODELS_DEV_SAMPLE))
    # 1000 input @ 5e-6 = 0.005; 500 output @ 25e-6 = 0.0125
    cost = catalog.cost_for_call(_call("anthropic/claude-opus-4-5", 1000, 500))
    assert cost == round(0.005 + 0.0125, 6)
 def test_models_dev_skips_zero_rate_entries():
    from planoai.obs.pricing import _parse_models_dev_pricing
    sample = {
        "free": {
            "models": {
                "promo-model": {"cost": {"input": 0, "output": 0}},
            }
        }
    }
    assert _parse_models_dev_pricing(sample) == {}
--- a/config/plano_config_schema.yaml
+++ b/config/plano_config_schema.yaml
@ -582,13 +582,17 @@ properties:
              type: string
              enum:
                - digitalocean
                - models.dev
            url:
              type: string
              description: "Optional override for the pricing catalog endpoint. Defaults per provider (digitalocean: DO GenAI catalog; models.dev: https://models.dev/api.json)."
            refresh_interval:
              type: integer
              minimum: 1
              description: "Refresh interval in seconds"
            model_aliases:
              type: object
-              description: "Map DO catalog keys (lowercase(creator)/model_id) to Plano model names used in routing_preferences. Example: 'openai/openai-gpt-oss-120b: openai/gpt-4o'"
+              description: "Map catalog keys to Plano model names used in routing_preferences. DigitalOcean keys are 'lowercase(creator)/model_id'; models.dev keys are 'creator/model_id'. Example: 'openai/openai-gpt-oss-120b: openai/gpt-4o'"
              additionalProperties:
                type: string
          required:
--- a/crates/brightstaff/src/router/model_metrics.rs
+++ b/crates/brightstaff/src/router/model_metrics.rs
@ -9,6 +9,7 @@ use tokio::sync::RwLock;
 use tracing::{debug, info, warn};
 const DO_PRICING_URL: &str = "https://api.digitalocean.com/v2/gen-ai/models/catalog";
 const MODELS_DEV_URL: &str = "https://models.dev/api.json";
 pub struct ModelMetricsService {
    cost: Arc<RwLock<HashMap<String, f64>>>,
@ -22,28 +23,35 @@ impl ModelMetricsService {
        for source in sources {
            match source {
-                MetricsSource::Cost(cfg) => match cfg.provider {
+                MetricsSource::Cost(cfg) => {
-                    CostProvider::Digitalocean => {
+                    let provider = cfg.provider.clone();
-                        let aliases = cfg.model_aliases.clone().unwrap_or_default();
+                    let url = cfg
-                        let data = fetch_do_pricing(&client, &aliases).await;
+                        .url
-                        info!(models = data.len(), "fetched digitalocean pricing");
+                        .clone()
-                        *cost_data.write().await = data;
+                        .unwrap_or_else(|| default_cost_url(&provider).to_string());
                    let aliases = cfg.model_aliases.clone().unwrap_or_default();
                    let provider_name = cost_provider_name(&provider);
-                        if let Some(interval_secs) = cfg.refresh_interval {
+                    let data = fetch_cost_pricing(&provider, &url, &client, &aliases).await;
-                            let cost_clone = Arc::clone(&cost_data);
+                    info!(models = data.len(), provider = provider_name, url = %url, "fetched cost pricing");
-                            let client_clone = client.clone();
+                    *cost_data.write().await = data;
-                            let interval = Duration::from_secs(interval_secs);
+
-                            tokio::spawn(async move {
+                    if let Some(interval_secs) = cfg.refresh_interval {
-                                loop {
+                        let cost_clone = Arc::clone(&cost_data);
-                                    tokio::time::sleep(interval).await;
+                        let client_clone = client.clone();
-                                    let data = fetch_do_pricing(&client_clone, &aliases).await;
+                        let interval = Duration::from_secs(interval_secs);
-                                    info!(models = data.len(), "refreshed digitalocean pricing");
+                        tokio::spawn(async move {
-                                    *cost_clone.write().await = data;
+                            loop {
-                                }
+                                tokio::time::sleep(interval).await;
-                            });
+                                let data =
-                        }
+                                    fetch_cost_pricing(&provider, &url, &client_clone, &aliases)
                                        .await;
                                info!(models = data.len(), provider = provider_name, url = %url, "refreshed cost pricing");
                                *cost_clone.write().await = data;
                            }
                        });
                    }
-                },
+                }
                MetricsSource::Latency(cfg) => match cfg.provider {
                    LatencyProvider::Prometheus => {
                        let data = fetch_prometheus_metrics(&cfg.url, &cfg.query, &client).await;
@ -165,11 +173,55 @@ struct DoPricing {
    output_price_per_million: Option<f64>,
 }
-async fn fetch_do_pricing(
+#[derive(serde::Deserialize)]
 struct ModelsDevProvider {
    #[serde(default)]
    models: HashMap<String, ModelsDevModel>,
 }
 #[derive(serde::Deserialize)]
 struct ModelsDevModel {
    cost: Option<ModelsDevCost>,
 }
 #[derive(serde::Deserialize)]
 struct ModelsDevCost {
    input: Option<f64>,
    output: Option<f64>,
 }
 fn default_cost_url(provider: &CostProvider) -> &'static str {
    match provider {
        CostProvider::Digitalocean => DO_PRICING_URL,
        CostProvider::ModelsDev => MODELS_DEV_URL,
    }
 }
 fn cost_provider_name(provider: &CostProvider) -> &'static str {
    match provider {
        CostProvider::Digitalocean => "digitalocean",
        CostProvider::ModelsDev => "models.dev",
    }
 }
 async fn fetch_cost_pricing(
    provider: &CostProvider,
    url: &str,
    client: &reqwest::Client,
    aliases: &HashMap<String, String>,
 ) -> HashMap<String, f64> {
-    match client.get(DO_PRICING_URL).send().await {
+    match provider {
        CostProvider::Digitalocean => fetch_do_pricing(url, client, aliases).await,
        CostProvider::ModelsDev => fetch_models_dev_pricing(url, client, aliases).await,
    }
 }
 async fn fetch_do_pricing(
    url: &str,
    client: &reqwest::Client,
    aliases: &HashMap<String, String>,
 ) -> HashMap<String, f64> {
    match client.get(url).send().await {
        Ok(resp) => match resp.json::<DoModelList>().await {
            Ok(list) => list
                .data
@ -184,17 +236,66 @@ async fn fetch_do_pricing(
                })
                .collect(),
            Err(err) => {
-                warn!(error = %err, url = DO_PRICING_URL, "failed to parse digitalocean pricing response");
+                warn!(error = %err, url = %url, "failed to parse digitalocean pricing response");
                HashMap::new()
            }
        },
        Err(err) => {
-            warn!(error = %err, url = DO_PRICING_URL, "failed to fetch digitalocean pricing");
+            warn!(error = %err, url = %url, "failed to fetch digitalocean pricing");
            HashMap::new()
        }
    }
 }
 /// models.dev publishes a top-level object keyed by provider id; each provider
 /// carries a `models` map whose keys are `creator/model` ids and whose `cost`
 /// block holds per-million USD rates. We sum input + output (mirroring the DO
 /// ranking metric) and key the result by `creator/model_id` so it lines up with
 /// Plano's `provider/model` routing names.
 async fn fetch_models_dev_pricing(
    url: &str,
    client: &reqwest::Client,
    aliases: &HashMap<String, String>,
 ) -> HashMap<String, f64> {
    match client.get(url).send().await {
        Ok(resp) => match resp.json::<HashMap<String, ModelsDevProvider>>().await {
            Ok(providers) => parse_models_dev_pricing(providers, aliases),
            Err(err) => {
                warn!(error = %err, url = %url, "failed to parse models.dev pricing response");
                HashMap::new()
            }
        },
        Err(err) => {
            warn!(error = %err, url = %url, "failed to fetch models.dev pricing");
            HashMap::new()
        }
    }
 }
 fn parse_models_dev_pricing(
    providers: HashMap<String, ModelsDevProvider>,
    aliases: &HashMap<String, String>,
 ) -> HashMap<String, f64> {
    let mut out = HashMap::new();
    for (provider_id, provider) in providers {
        for (model_key, model) in provider.models {
            let Some(cost) = model.cost else { continue };
            let (Some(input), Some(output)) = (cost.input, cost.output) else {
                continue;
            };
            // First-party providers use bare model keys (`claude-opus-4-5`),
            // so compose `provider/model` to line up with Plano routing names.
            let raw_key = format!("{provider_id}/{model_key}");
            let total = input + output;
            let key = aliases.get(&raw_key).cloned().unwrap_or(raw_key);
            out.insert(key, total);
            // Also register the bare model id as a fallback lookup.
            out.entry(model_key).or_insert(total);
        }
    }
    out
 }
 #[derive(serde::Deserialize)]
 struct PrometheusResponse {
    data: PrometheusData,
@ -368,6 +469,50 @@ mod tests {
        assert_eq!(result, vec!["gpt-4o", "gpt-4o-mini"]);
    }
    #[test]
    fn test_parse_models_dev_pricing_composes_provider_keys() {
        let json = r#"{
            "anthropic": {
                "models": {
                    "claude-opus-4-5": {"cost": {"input": 5.0, "output": 25.0}}
                }
            },
            "groq": {
                "models": {
                    "llama-3.3-70b-versatile": {"cost": {"input": 0.59, "output": 0.79}},
                    "whisper-large-v3-turbo": {"cost": null}
                }
            }
        }"#;
        let providers: HashMap<String, ModelsDevProvider> = serde_json::from_str(json).unwrap();
        let aliases = HashMap::new();
        let prices = parse_models_dev_pricing(providers, &aliases);
        assert_eq!(prices.get("anthropic/claude-opus-4-5"), Some(&30.0));
        assert_eq!(prices.get("groq/llama-3.3-70b-versatile"), Some(&1.38));
        // bare fallback also registered
        assert_eq!(prices.get("claude-opus-4-5"), Some(&30.0));
        // models with no cost block are skipped
        assert!(!prices.contains_key("groq/whisper-large-v3-turbo"));
    }
    #[test]
    fn test_parse_models_dev_pricing_applies_aliases() {
        let json = r#"{
            "openai": {"models": {"gpt-oss-120b": {"cost": {"input": 1.0, "output": 2.0}}}}
        }"#;
        let providers: HashMap<String, ModelsDevProvider> = serde_json::from_str(json).unwrap();
        let mut aliases = HashMap::new();
        aliases.insert(
            "openai/gpt-oss-120b".to_string(),
            "openai/gpt-4o".to_string(),
        );
        let prices = parse_models_dev_pricing(providers, &aliases);
        assert_eq!(prices.get("openai/gpt-4o"), Some(&3.0));
        assert!(!prices.contains_key("openai/gpt-oss-120b"));
    }
    #[test]
    fn test_rank_by_ascending_metric_nan_treated_as_missing() {
        let models = vec![
--- a/crates/common/src/configuration.rs
+++ b/crates/common/src/configuration.rs
@ -177,8 +177,13 @@ pub enum MetricsSource {
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct CostMetricsConfig {
    pub provider: CostProvider,
    /// Optional override for the pricing catalog endpoint. When omitted, a
    /// sensible default is used per provider.
    pub url: Option<String>,
    pub refresh_interval: Option<u64>,
-    /// Map DO catalog keys (`lowercase(creator)/model_id`) to Plano model names.
+    /// Map catalog keys to Plano model names used in `routing_preferences`.
    /// DigitalOcean keys look like `lowercase(creator)/model_id`; models.dev
    /// keys look like `creator/model_id`.
    /// Example: `openai/openai-gpt-oss-120b: openai/gpt-4o`
    pub model_aliases: Option<HashMap<String, String>>,
 }
@ -187,6 +192,8 @@ pub struct CostMetricsConfig {
 #[serde(rename_all = "snake_case")]
 pub enum CostProvider {
    Digitalocean,
    #[serde(rename = "models.dev")]
    ModelsDev,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
@ -741,6 +748,51 @@ mod test {
        }
    }
    #[test]
    fn test_deserialize_models_dev_cost_source() {
        let yaml = r#"
 - type: cost
  provider: models.dev
  url: https://models.dev/api.json
  refresh_interval: 3600
  model_aliases:
    openai/gpt-oss-120b: openai/gpt-4o
 "#;
        let sources: Vec<super::MetricsSource> = serde_yaml::from_str(yaml).unwrap();
        assert_eq!(sources.len(), 1);
        match &sources[0] {
            super::MetricsSource::Cost(cfg) => {
                assert!(matches!(cfg.provider, super::CostProvider::ModelsDev));
                assert_eq!(cfg.url.as_deref(), Some("https://models.dev/api.json"));
                assert_eq!(cfg.refresh_interval, Some(3600));
                assert_eq!(
                    cfg.model_aliases
                        .as_ref()
                        .and_then(|m| m.get("openai/gpt-oss-120b"))
                        .map(String::as_str),
                    Some("openai/gpt-4o")
                );
            }
            other => panic!("expected cost source, got {other:?}"),
        }
    }
    #[test]
    fn test_deserialize_digitalocean_cost_source_without_url() {
        let yaml = r#"
 - type: cost
  provider: digitalocean
 "#;
        let sources: Vec<super::MetricsSource> = serde_yaml::from_str(yaml).unwrap();
        match &sources[0] {
            super::MetricsSource::Cost(cfg) => {
                assert!(matches!(cfg.provider, super::CostProvider::Digitalocean));
                assert_eq!(cfg.url, None);
            }
            other => panic!("expected cost source, got {other:?}"),
        }
    }
    #[test]
    fn test_into_models_filters_internal_providers() {
        let providers = vec![
--- a/docs/source/guides/llm_router.rst
+++ b/docs/source/guides/llm_router.rst
@ -209,6 +209,178 @@ Clients can let the router decide or still specify aliases:
    )
 .. _cost_latency_aware_selection:
 Cost- and latency-aware selection
 ---------------------------------
 When a route lists more than one candidate model, you can let Plano reorder that
 candidate pool using **live cost or latency data** instead of relying solely on the
 order you wrote them in. This is controlled per route with ``selection_policy`` and
 backed by one or more ``model_metrics_sources``.
 This is useful when several models are equally capable for a route and you want Plano
 to always reach for the cheapest (or fastest) option first, with the others kept as
 fallbacks.
 Selection policy
 ~~~~~~~~~~~~~~~~~
 Attach an optional ``selection_policy`` to any entry in ``routing_preferences``:
 .. code-block:: yaml
    :caption: Per-route selection policy
    routing_preferences:
      - name: code review
        description: reviewing, analyzing, and suggesting improvements to existing code
        models:
          - anthropic/claude-sonnet-4-5
          - groq/llama-3.3-70b-versatile
        selection_policy:
          prefer: cheapest   # cheapest | fastest | none
 ``prefer`` accepts:
 - ``cheapest`` — order candidates by total price (input + output rate) ascending, using a ``cost`` metrics source.
 - ``fastest`` — order candidates by observed latency ascending, using a ``latency`` metrics source.
 - ``none`` (default) — keep the order you declared; no reordering.
 Models that have no data in the selected source are ranked **last**, in their original
 order, so routing always degrades gracefully rather than dropping a candidate.
 Configuring the pricing source
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 ``cheapest`` routing needs a price catalog. Plano's **default pricing provider is
 DigitalOcean** — its GenAI model catalog is public (no API key, no signup), so cost data
 is available out of the box and is what ``planoai obs`` uses if you don't configure
 anything. The pricing source is fully swappable: point Plano at `models.dev <https://models.dev/>`_,
 or at **any endpoint that exposes a supported pricing structure**.
 The ``provider`` field selects which response schema Plano expects (and therefore how it
 parses the catalog); the optional ``url`` lets you override the endpoint — for example to
 use a mirror, a cached copy, or an internal catalog service that returns the same shape.
 .. list-table::
   :header-rows: 1
   :widths: 18 34 28 20
   * - ``provider``
     - Default catalog URL
     - Key format
     - Expected structure
   * - ``digitalocean`` *(default)*
     - DigitalOcean GenAI model catalog
     - ``lowercase(creator)/model_id``
     - ``{ data: [ { model_id, pricing: { input_price_per_million, output_price_per_million } } ] }``
   * - ``models.dev``
     - ``https://models.dev/api.json``
     - ``creator/model`` (e.g. ``anthropic/claude-sonnet-4-5``)
     - ``{ <provider>: { models: { <model>: { cost: { input, output } } } } }``
 Because the source is selected per ``provider``, switching is a one-line change. To stay
 on the default DigitalOcean catalog you can omit ``model_metrics_sources`` entirely for
 ``planoai obs``, or declare it explicitly for routing:
 .. code-block:: yaml
    :caption: Default cost source (DigitalOcean)
    model_metrics_sources:
      - type: cost
        provider: digitalocean   # default; uses the public DO GenAI catalog
 To switch to models.dev — an open, community-maintained catalog covering a broad range of
 providers and models — change the ``provider`` (and optionally ``url``):
 .. code-block:: yaml
    :caption: Cost source backed by models.dev
    model_metrics_sources:
      - type: cost
        provider: models.dev               # models.dev | digitalocean
        url: https://models.dev/api.json    # optional; defaults per provider
        refresh_interval: 3600              # optional, seconds; refetch on this interval
        model_aliases:                      # optional; see below
          openai/gpt-oss-120b: openai/gpt-4o
 To use your own endpoint, pick the ``provider`` whose structure your endpoint matches and
 override ``url`` — Plano parses the response with that provider's schema:
 .. code-block:: yaml
    :caption: Custom endpoint exposing the DigitalOcean catalog structure
    model_metrics_sources:
      - type: cost
        provider: digitalocean                       # selects the DO response schema
        url: https://catalog.internal.example.com/pricing
 .. note::
   The cost metric used for ranking is the sum of the input and output per-million-token
   rates — a relative signal for ordering candidates, not a per-request bill. For actual
   per-request cost, see the observability console below.
 Matching catalog keys to your models
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 The router looks up each candidate model by the exact name you use in
 ``routing_preferences`` (e.g. ``anthropic/claude-sonnet-4-5``). models.dev keys models as
 ``creator/model``, which lines up with Plano's ``provider/model`` naming, so most models
 match automatically.
 When a catalog key does not match your model name — for example a version skew, or an
 open-weight model you serve under a different provider — use ``model_aliases`` to map the
 **catalog key** to the **Plano model name** used in your routing preferences:
 .. code-block:: yaml
    model_metrics_sources:
      - type: cost
        provider: models.dev
        model_aliases:
          # catalog key            : plano model name
          openai/gpt-oss-120b: openai/gpt-4o
 Latency source
 ~~~~~~~~~~~~~~~
 ``fastest`` routing reads observed latency from a Prometheus instance. Provide the query
 that returns a per-model latency value (lower is faster), labelled by ``model_name``:
 .. code-block:: yaml
    :caption: Latency source backed by Prometheus
    model_metrics_sources:
      - type: latency
        provider: prometheus
        url: http://prometheus:9090
        query: avg by (model_name) (rate(plano_llm_latency_seconds_sum[5m]))
        refresh_interval: 60
 You can declare both a ``cost`` and a ``latency`` source at the same time; each route
 picks whichever it needs based on its ``selection_policy``.
 Cost in the observability console
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 ``planoai obs`` displays a per-request USD cost column derived from the same pricing
 catalog. By default it reads the ``cost`` source from your config (the first
 ``type: cost`` entry under ``model_metrics_sources``); you can also override it on the
 command line:
 .. code-block:: bash
    # Use the cost source from ./config.yaml (default)
    planoai obs
    # Or override the provider / endpoint explicitly
    planoai obs --pricing-provider models.dev
    planoai obs --pricing-url https://models.dev/api.json
 If no source is configured and no override is given, ``planoai obs`` falls back to the
 DigitalOcean catalog so the cost column still populates out of the box.
 Plano-Orchestrator
 -------------------
 Plano-Orchestrator is a **preference-based routing model** specifically designed to address the limitations of traditional LLM routing. It delivers production-ready performance with low latency and high accuracy while solving key routing challenges.
--- a/docs/source/resources/includes/plano_config_full_reference.yaml
+++ b/docs/source/resources/includes/plano_config_full_reference.yaml
@ -86,6 +86,24 @@ routing_preferences:
    selection_policy:
      prefer: cheapest
 # model_metrics_sources: external catalogs the router reads to reorder candidate
 # models for selection_policy.prefer. A `cost` source ranks `prefer: cheapest`;
 # a `latency` source ranks `prefer: fastest`. Both are optional.
 model_metrics_sources:
  # Cost catalog. provider: models.dev | digitalocean (default url per provider).
  - type: cost
    provider: models.dev
    url: https://models.dev/api.json   # optional; omit to use the provider default
    refresh_interval: 3600             # optional, seconds
    model_aliases:                     # optional: catalog key -> Plano model name
      openai/gpt-oss-120b: openai/gpt-4o
  # Latency catalog (Prometheus). Used for selection_policy.prefer: fastest.
  - type: latency
    provider: prometheus
    url: http://prometheus:9090
    query: avg by (model_name) (rate(plano_llm_latency_seconds_sum[5m]))
    refresh_interval: 60
 # HTTP listeners - entry points for agent routing, prompt targets, and direct LLM access
 listeners:
  # Agent listener for routing requests to multiple agents
--- a/docs/source/resources/includes/plano_config_full_reference_rendered.yaml
+++ b/docs/source/resources/includes/plano_config_full_reference_rendered.yaml
@ -115,6 +115,18 @@ model_aliases:
    target: gpt-4o-mini
  smart-llm:
    target: gpt-4o
 model_metrics_sources:
 - model_aliases:
    openai/gpt-oss-120b: openai/gpt-4o
  provider: models.dev
  refresh_interval: 3600
  type: cost
  url: https://models.dev/api.json
 - provider: prometheus
  query: avg by (model_name) (rate(plano_llm_latency_seconds_sum[5m]))
  refresh_interval: 60
  type: latency
  url: http://prometheus:9090
 model_providers:
 - access_key: $OPENAI_API_KEY
  default: true