feat: make model pricing source configurable (models.dev + DigitalOcean) (#971)

2026-06-26 15:39:40 +02:00 · 2026-06-24 10:14:12 -07:00 · 2026-06-24 10:14:12 -07:00 · 558df0307c
commit 558df0307c
parent 5cc4c4ee77
9 changed files with 687 additions and 48 deletions
--- a/cli/planoai/obs/pricing.py
+++ b/cli/planoai/obs/pricing.py
@ -1,7 +1,8 @@
-"""DigitalOcean Gradient pricing catalog for the obs console.
+"""Model pricing catalog for the obs console.

-Ported loosely from ``crates/brightstaff/src/router/model_metrics.rs::fetch_do_pricing``.
-Single-source: one fetch at startup, cached for the life of the process.
+Mirrors ``crates/brightstaff/src/router/model_metrics.rs``. The source is
+configurable: ``digitalocean`` (DO GenAI catalog) or ``models.dev``. A single
+fetch at startup is cached for the life of the process.
 """

 from __future__ import annotations
@ -14,7 +15,18 @@ from typing import Any

 import requests

-DEFAULT_PRICING_URL = "https://api.digitalocean.com/v2/gen-ai/models/catalog"
+DO_PRICING_URL = "https://api.digitalocean.com/v2/gen-ai/models/catalog"
+MODELS_DEV_URL = "https://models.dev/api.json"
+
+# Backwards-compatible default (DigitalOcean) used when no provider is given.
+DEFAULT_PRICING_URL = DO_PRICING_URL
+DEFAULT_PRICING_PROVIDER = "digitalocean"
+
+_DEFAULT_URLS = {
+    "digitalocean": DO_PRICING_URL,
+    "models.dev": MODELS_DEV_URL,
+}
+
 FETCH_TIMEOUT_SECS = 5.0


@ -51,36 +63,52 @@ class PricingCatalog:
            return list(self._prices.keys())[:n]

    @classmethod
-    def fetch(cls, url: str = DEFAULT_PRICING_URL) -> "PricingCatalog":
-        """Fetch pricing from DO's catalog endpoint. On failure, returns an
+    def fetch(
+        cls,
+        provider: str = DEFAULT_PRICING_PROVIDER,
+        url: str | None = None,
+    ) -> "PricingCatalog":
+        """Fetch pricing from the configured catalog. On failure, returns an
        empty catalog (cost column will be blank).

-        The catalog endpoint is public — no auth required, no signup — so
-        ``planoai obs`` gets cost data on first run out of the box.
+        ``provider`` selects the parser/default URL: ``digitalocean`` or
+        ``models.dev``. Both catalog endpoints are public — no auth required —
+        so ``planoai obs`` gets cost data on first run out of the box.
        """
+        provider = (provider or DEFAULT_PRICING_PROVIDER).strip().lower()
+        resolved_url = url or _DEFAULT_URLS.get(provider, DO_PRICING_URL)
        try:
-            resp = requests.get(url, timeout=FETCH_TIMEOUT_SECS)
+            resp = requests.get(resolved_url, timeout=FETCH_TIMEOUT_SECS)
            resp.raise_for_status()
            data = resp.json()
        except Exception as exc:  # noqa: BLE001 — best-effort; never fatal
            logger.warning(
-                "DO pricing fetch failed: %s; cost column will be blank.",
+                "%s pricing fetch failed: %s; cost column will be blank.",
+                provider,
                exc,
            )
            return cls()

-        prices = _parse_do_pricing(data)
+        if provider == "models.dev":
+            prices = _parse_models_dev_pricing(data)
+        else:
+            prices = _parse_do_pricing(data)
+
        if not prices:
-            # Dump the first entry's raw shape so we can see which fields DO
-            # actually returned — helps when the catalog adds new fields or
-            # the response doesn't match our parser.
+            # Dump a sample of the raw shape so we can see which fields the
+            # catalog returned — helps when it adds new fields or the response
+            # doesn't match our parser.
            import json as _json

-            sample_items = _coerce_items(data)
-            sample = sample_items[0] if sample_items else data
+            if provider == "models.dev" and isinstance(data, dict):
+                sample = next(iter(data.values()), data)
+            else:
+                sample_items = _coerce_items(data)
+                sample = sample_items[0] if sample_items else data
            logger.warning(
-                "DO pricing response had no parseable entries; cost column "
+                "%s pricing response had no parseable entries; cost column "
                "will be blank. Sample entry: %s",
+                provider,
                _json.dumps(sample, default=str)[:400],
            )
        return cls(prices)
@ -278,6 +306,75 @@ def _parse_do_pricing(data: Any) -> dict[str, ModelPrice]:
    return prices


+def _parse_models_dev_pricing(data: Any) -> dict[str, ModelPrice]:
+    """Parse a models.dev ``api.json`` response into a ModelPrice map.
+
+    models.dev shape (top-level object keyed by provider id)::
+
+        {
+          "anthropic": {
+            "models": {
+              "claude-opus-4-5": {
+                "cost": {"input": 5, "output": 25, "cache_read": 0.5}
+              }
+            }
+          },
+          ...
+        }
+
+    ``cost.*`` values are USD per *million* tokens, so we divide by 1e6 to get a
+    per-token rate. First-party providers use bare model keys, so we register
+    both ``provider/model`` (matching Plano's routing names) and the bare model
+    id as a fallback.
+    """
+    prices: dict[str, ModelPrice] = {}
+    if not isinstance(data, dict):
+        return prices
+
+    for provider_id, provider in data.items():
+        if not isinstance(provider, dict):
+            continue
+        models = provider.get("models")
+        if not isinstance(models, dict):
+            continue
+        for model_key, model in models.items():
+            if not isinstance(model, dict):
+                continue
+            cost = model.get("cost")
+            if not isinstance(cost, dict):
+                continue
+            input_pm = _as_float(cost.get("input"))
+            output_pm = _as_float(cost.get("output"))
+            if input_pm is None or output_pm is None:
+                continue
+            # Skip 0-rate entries so cost falls back to `—` rather than $0.0000.
+            if input_pm == 0 and output_pm == 0:
+                continue
+            cached_pm = _as_float(cost.get("cache_read"))
+            price = ModelPrice(
+                input_per_token_usd=input_pm / 1_000_000,
+                output_per_token_usd=output_pm / 1_000_000,
+                cached_input_per_token_usd=(
+                    cached_pm / 1_000_000 if cached_pm is not None else None
+                ),
+            )
+            composite = f"{provider_id}/{model_key}"
+            prices[composite] = price
+            prices.setdefault(composite.lower(), price)
+            prices.setdefault(str(model_key), price)
+            prices.setdefault(str(model_key).lower(), price)
+    return prices
+
+
+def _as_float(value: Any) -> float | None:
+    if value is None:
+        return None
+    try:
+        return float(value)
+    except (TypeError, ValueError):
+        return None
+
+
 def _coerce_items(data: Any) -> list[dict]:
    if isinstance(data, list):
        return [x for x in data if isinstance(x, dict)]
--- a/cli/planoai/obs_cmd.py
+++ b/cli/planoai/obs_cmd.py
@ -2,9 +2,12 @@

 from __future__ import annotations

+import logging
+import os
 import time

 import rich_click as click
+import yaml
 from rich.console import Console
 from rich.live import Live

@ -15,8 +18,50 @@ from planoai.obs.collector import (
    LLMCallStore,
    ObsCollector,
 )
-from planoai.obs.pricing import PricingCatalog
+from planoai.obs.pricing import DEFAULT_PRICING_PROVIDER, PricingCatalog
 from planoai.obs.render import render
+from planoai.utils import find_config_file
+
+logger = logging.getLogger(__name__)
+
+
+def _resolve_pricing_source(
+    config_file: str | None,
+    provider_override: str | None,
+    url_override: str | None,
+) -> tuple[str, str | None]:
+    """Pick the cost pricing source.
+
+    Precedence: explicit CLI overrides > the first ``type: cost`` entry in
+    ``model_metrics_sources`` from the Plano config > the DigitalOcean default.
+    """
+    provider = DEFAULT_PRICING_PROVIDER
+    url: str | None = None
+
+    config_path = find_config_file(file=config_file)
+    if config_path and os.path.exists(config_path):
+        try:
+            with open(config_path, "r") as f:
+                config = yaml.safe_load(f) or {}
+            sources = config.get("model_metrics_sources") or []
+            for source in sources:
+                if isinstance(source, dict) and source.get("type") == "cost":
+                    if source.get("provider"):
+                        provider = str(source["provider"])
+                    if source.get("url"):
+                        url = str(source["url"])
+                    break
+        except Exception as exc:  # noqa: BLE001 — config is optional for obs
+            logger.warning(
+                "could not read pricing source from %s: %s", config_path, exc
+            )
+
+    if provider_override:
+        provider = provider_override
+    if url_override:
+        url = url_override
+
+    return provider, url


@click.command(name="obs", help="Live observability console for Plano LLM traffic.")
@ -48,13 +93,42 @@ from planoai.obs.render import render
    show_default=True,
    help="TUI refresh interval.",
 )
-def obs(port: int, host: str, capacity: int, refresh_ms: int) -> None:
+@click.option(
+    "--config",
+    "config_file",
+    type=str,
+    default=None,
+    help="Path to the Plano config to read the pricing source from "
+    "(defaults to ./config.yaml or ./plano_config.yaml).",
+)
+@click.option(
+    "--pricing-provider",
+    type=click.Choice(["digitalocean", "models.dev"]),
+    default=None,
+    help="Override the cost pricing provider (otherwise read from config).",
+)
+@click.option(
+    "--pricing-url",
+    type=str,
+    default=None,
+    help="Override the pricing catalog URL (otherwise read from config / provider default).",
+)
+def obs(
+    port: int,
+    host: str,
+    capacity: int,
+    refresh_ms: int,
+    config_file: str | None,
+    pricing_provider: str | None,
+    pricing_url: str | None,
+) -> None:
    console = Console()
+    provider, url = _resolve_pricing_source(config_file, pricing_provider, pricing_url)
    console.print(
-        f"[bold {PLANO_COLOR}]planoai obs[/] — loading DO pricing catalog...",
+        f"[bold {PLANO_COLOR}]planoai obs[/] — loading {provider} pricing catalog...",
        end="",
    )
-    pricing = PricingCatalog.fetch()
+    pricing = PricingCatalog.fetch(provider=provider, url=url)
    if len(pricing):
        sample = ", ".join(pricing.sample_models(3))
        console.print(
@ -63,7 +137,7 @@ def obs(port: int, host: str, capacity: int, refresh_ms: int) -> None:
    else:
        console.print(
            " [yellow]no pricing loaded[/] — "
-            "[dim]cost column will be blank (DO catalog unreachable)[/]"
+            f"[dim]cost column will be blank ({provider} catalog unreachable)[/]"
        )

    store = LLMCallStore(capacity=capacity)
--- a/cli/test/test_obs_pricing.py
+++ b/cli/test/test_obs_pricing.py
@ -144,3 +144,68 @@ def test_parse_do_catalog_divides_large_values_as_per_million():
    prices = _parse_do_pricing(sample)
    assert prices["mystery-model"].input_per_token_usd == 5.0 / 1_000_000
    assert prices["mystery-model"].output_per_token_usd == 15.0 / 1_000_000
+
+
+_MODELS_DEV_SAMPLE = {
+    "anthropic": {
+        "id": "anthropic",
+        "models": {
+            "claude-opus-4-5": {
+                "id": "claude-opus-4-5",
+                "cost": {"input": 5, "output": 25, "cache_read": 0.5},
+            }
+        },
+    },
+    "groq": {
+        "id": "groq",
+        "models": {
+            "llama-3.3-70b-versatile": {
+                "id": "llama-3.3-70b-versatile",
+                "cost": {"input": 0.59, "output": 0.79},
+            },
+            # No cost block → skipped.
+            "whisper-large-v3-turbo": {"id": "whisper-large-v3-turbo"},
+        },
+    },
+}
+
+
+def test_parse_models_dev_composes_provider_keys_and_per_token_rates():
+    from planoai.obs.pricing import _parse_models_dev_pricing
+
+    prices = _parse_models_dev_pricing(_MODELS_DEV_SAMPLE)
+
+    # models.dev cost values are per-million → divided by 1e6.
+    opus = prices["anthropic/claude-opus-4-5"]
+    assert opus.input_per_token_usd == 5 / 1_000_000
+    assert opus.output_per_token_usd == 25 / 1_000_000
+    assert opus.cached_input_per_token_usd == 0.5 / 1_000_000
+
+    # Composite provider/model keys match Plano's routing names.
+    assert "groq/llama-3.3-70b-versatile" in prices
+    # Bare model id registered as a fallback.
+    assert "llama-3.3-70b-versatile" in prices
+    # Models without a cost block are skipped.
+    assert "groq/whisper-large-v3-turbo" not in prices
+
+
+def test_models_dev_catalog_cost_computation():
+    from planoai.obs.pricing import PricingCatalog, _parse_models_dev_pricing
+
+    catalog = PricingCatalog(_parse_models_dev_pricing(_MODELS_DEV_SAMPLE))
+    # 1000 input @ 5e-6 = 0.005; 500 output @ 25e-6 = 0.0125
+    cost = catalog.cost_for_call(_call("anthropic/claude-opus-4-5", 1000, 500))
+    assert cost == round(0.005 + 0.0125, 6)
+
+
+def test_models_dev_skips_zero_rate_entries():
+    from planoai.obs.pricing import _parse_models_dev_pricing
+
+    sample = {
+        "free": {
+            "models": {
+                "promo-model": {"cost": {"input": 0, "output": 0}},
+            }
+        }
+    }
+    assert _parse_models_dev_pricing(sample) == {}
--- a/config/plano_config_schema.yaml
+++ b/config/plano_config_schema.yaml
@ -582,13 +582,17 @@ properties:
              type: string
              enum:
                - digitalocean
+                - models.dev
+            url:
+              type: string
+              description: "Optional override for the pricing catalog endpoint. Defaults per provider (digitalocean: DO GenAI catalog; models.dev: https://models.dev/api.json)."
            refresh_interval:
              type: integer
              minimum: 1
              description: "Refresh interval in seconds"
            model_aliases:
              type: object
-              description: "Map DO catalog keys (lowercase(creator)/model_id) to Plano model names used in routing_preferences. Example: 'openai/openai-gpt-oss-120b: openai/gpt-4o'"
+              description: "Map catalog keys to Plano model names used in routing_preferences. DigitalOcean keys are 'lowercase(creator)/model_id'; models.dev keys are 'creator/model_id'. Example: 'openai/openai-gpt-oss-120b: openai/gpt-4o'"
              additionalProperties:
                type: string
          required:
--- a/crates/brightstaff/src/router/model_metrics.rs
+++ b/crates/brightstaff/src/router/model_metrics.rs
@ -9,6 +9,7 @@ use tokio::sync::RwLock;
 use tracing::{debug, info, warn};

 const DO_PRICING_URL: &str = "https://api.digitalocean.com/v2/gen-ai/models/catalog";
+const MODELS_DEV_URL: &str = "https://models.dev/api.json";

 pub struct ModelMetricsService {
    cost: Arc<RwLock<HashMap<String, f64>>>,
@ -22,28 +23,35 @@ impl ModelMetricsService {

        for source in sources {
            match source {
-                MetricsSource::Cost(cfg) => match cfg.provider {
-                    CostProvider::Digitalocean => {
-                        let aliases = cfg.model_aliases.clone().unwrap_or_default();
-                        let data = fetch_do_pricing(&client, &aliases).await;
-                        info!(models = data.len(), "fetched digitalocean pricing");
-                        *cost_data.write().await = data;
+                MetricsSource::Cost(cfg) => {
+                    let provider = cfg.provider.clone();
+                    let url = cfg
+                        .url
+                        .clone()
+                        .unwrap_or_else(|| default_cost_url(&provider).to_string());
+                    let aliases = cfg.model_aliases.clone().unwrap_or_default();
+                    let provider_name = cost_provider_name(&provider);

-                        if let Some(interval_secs) = cfg.refresh_interval {
-                            let cost_clone = Arc::clone(&cost_data);
-                            let client_clone = client.clone();
-                            let interval = Duration::from_secs(interval_secs);
-                            tokio::spawn(async move {
-                                loop {
-                                    tokio::time::sleep(interval).await;
-                                    let data = fetch_do_pricing(&client_clone, &aliases).await;
-                                    info!(models = data.len(), "refreshed digitalocean pricing");
-                                    *cost_clone.write().await = data;
-                                }
-                            });
-                        }
+                    let data = fetch_cost_pricing(&provider, &url, &client, &aliases).await;
+                    info!(models = data.len(), provider = provider_name, url = %url, "fetched cost pricing");
+                    *cost_data.write().await = data;
+
+                    if let Some(interval_secs) = cfg.refresh_interval {
+                        let cost_clone = Arc::clone(&cost_data);
+                        let client_clone = client.clone();
+                        let interval = Duration::from_secs(interval_secs);
+                        tokio::spawn(async move {
+                            loop {
+                                tokio::time::sleep(interval).await;
+                                let data =
+                                    fetch_cost_pricing(&provider, &url, &client_clone, &aliases)
+                                        .await;
+                                info!(models = data.len(), provider = provider_name, url = %url, "refreshed cost pricing");
+                                *cost_clone.write().await = data;
+                            }
+                        });
                    }
-                },
+                }
                MetricsSource::Latency(cfg) => match cfg.provider {
                    LatencyProvider::Prometheus => {
                        let data = fetch_prometheus_metrics(&cfg.url, &cfg.query, &client).await;
@ -165,11 +173,55 @@ struct DoPricing {
    output_price_per_million: Option<f64>,
 }

-async fn fetch_do_pricing(
+#[derive(serde::Deserialize)]
+struct ModelsDevProvider {
+    #[serde(default)]
+    models: HashMap<String, ModelsDevModel>,
+}
+
+#[derive(serde::Deserialize)]
+struct ModelsDevModel {
+    cost: Option<ModelsDevCost>,
+}
+
+#[derive(serde::Deserialize)]
+struct ModelsDevCost {
+    input: Option<f64>,
+    output: Option<f64>,
+}
+
+fn default_cost_url(provider: &CostProvider) -> &'static str {
+    match provider {
+        CostProvider::Digitalocean => DO_PRICING_URL,
+        CostProvider::ModelsDev => MODELS_DEV_URL,
+    }
+}
+
+fn cost_provider_name(provider: &CostProvider) -> &'static str {
+    match provider {
+        CostProvider::Digitalocean => "digitalocean",
+        CostProvider::ModelsDev => "models.dev",
+    }
+}
+
+async fn fetch_cost_pricing(
+    provider: &CostProvider,
+    url: &str,
    client: &reqwest::Client,
    aliases: &HashMap<String, String>,
 ) -> HashMap<String, f64> {
-    match client.get(DO_PRICING_URL).send().await {
+    match provider {
+        CostProvider::Digitalocean => fetch_do_pricing(url, client, aliases).await,
+        CostProvider::ModelsDev => fetch_models_dev_pricing(url, client, aliases).await,
+    }
+}
+
+async fn fetch_do_pricing(
+    url: &str,
+    client: &reqwest::Client,
+    aliases: &HashMap<String, String>,
+) -> HashMap<String, f64> {
+    match client.get(url).send().await {
        Ok(resp) => match resp.json::<DoModelList>().await {
            Ok(list) => list
                .data
@ -184,17 +236,66 @@ async fn fetch_do_pricing(
                })
                .collect(),
            Err(err) => {
-                warn!(error = %err, url = DO_PRICING_URL, "failed to parse digitalocean pricing response");
+                warn!(error = %err, url = %url, "failed to parse digitalocean pricing response");
                HashMap::new()
            }
        },
        Err(err) => {
-            warn!(error = %err, url = DO_PRICING_URL, "failed to fetch digitalocean pricing");
+            warn!(error = %err, url = %url, "failed to fetch digitalocean pricing");
            HashMap::new()
        }
    }
 }

+/// models.dev publishes a top-level object keyed by provider id; each provider
+/// carries a `models` map whose keys are `creator/model` ids and whose `cost`
+/// block holds per-million USD rates. We sum input + output (mirroring the DO
+/// ranking metric) and key the result by `creator/model_id` so it lines up with
+/// Plano's `provider/model` routing names.
+async fn fetch_models_dev_pricing(
+    url: &str,
+    client: &reqwest::Client,
+    aliases: &HashMap<String, String>,
+) -> HashMap<String, f64> {
+    match client.get(url).send().await {
+        Ok(resp) => match resp.json::<HashMap<String, ModelsDevProvider>>().await {
+            Ok(providers) => parse_models_dev_pricing(providers, aliases),
+            Err(err) => {
+                warn!(error = %err, url = %url, "failed to parse models.dev pricing response");
+                HashMap::new()
+            }
+        },
+        Err(err) => {
+            warn!(error = %err, url = %url, "failed to fetch models.dev pricing");
+            HashMap::new()
+        }
+    }
+}
+
+fn parse_models_dev_pricing(
+    providers: HashMap<String, ModelsDevProvider>,
+    aliases: &HashMap<String, String>,
+) -> HashMap<String, f64> {
+    let mut out = HashMap::new();
+    for (provider_id, provider) in providers {
+        for (model_key, model) in provider.models {
+            let Some(cost) = model.cost else { continue };
+            let (Some(input), Some(output)) = (cost.input, cost.output) else {
+                continue;
+            };
+            // First-party providers use bare model keys (`claude-opus-4-5`),
+            // so compose `provider/model` to line up with Plano routing names.
+            let raw_key = format!("{provider_id}/{model_key}");
+            let total = input + output;
+            let key = aliases.get(&raw_key).cloned().unwrap_or(raw_key);
+            out.insert(key, total);
+            // Also register the bare model id as a fallback lookup.
+            out.entry(model_key).or_insert(total);
+        }
+    }
+    out
+}
+
 #[derive(serde::Deserialize)]
 struct PrometheusResponse {
    data: PrometheusData,
@ -368,6 +469,50 @@ mod tests {
        assert_eq!(result, vec!["gpt-4o", "gpt-4o-mini"]);
    }

+    #[test]
+    fn test_parse_models_dev_pricing_composes_provider_keys() {
+        let json = r#"{
+            "anthropic": {
+                "models": {
+                    "claude-opus-4-5": {"cost": {"input": 5.0, "output": 25.0}}
+                }
+            },
+            "groq": {
+                "models": {
+                    "llama-3.3-70b-versatile": {"cost": {"input": 0.59, "output": 0.79}},
+                    "whisper-large-v3-turbo": {"cost": null}
+                }
+            }
+        }"#;
+        let providers: HashMap<String, ModelsDevProvider> = serde_json::from_str(json).unwrap();
+        let aliases = HashMap::new();
+        let prices = parse_models_dev_pricing(providers, &aliases);
+
+        assert_eq!(prices.get("anthropic/claude-opus-4-5"), Some(&30.0));
+        assert_eq!(prices.get("groq/llama-3.3-70b-versatile"), Some(&1.38));
+        // bare fallback also registered
+        assert_eq!(prices.get("claude-opus-4-5"), Some(&30.0));
+        // models with no cost block are skipped
+        assert!(!prices.contains_key("groq/whisper-large-v3-turbo"));
+    }
+
+    #[test]
+    fn test_parse_models_dev_pricing_applies_aliases() {
+        let json = r#"{
+            "openai": {"models": {"gpt-oss-120b": {"cost": {"input": 1.0, "output": 2.0}}}}
+        }"#;
+        let providers: HashMap<String, ModelsDevProvider> = serde_json::from_str(json).unwrap();
+        let mut aliases = HashMap::new();
+        aliases.insert(
+            "openai/gpt-oss-120b".to_string(),
+            "openai/gpt-4o".to_string(),
+        );
+        let prices = parse_models_dev_pricing(providers, &aliases);
+
+        assert_eq!(prices.get("openai/gpt-4o"), Some(&3.0));
+        assert!(!prices.contains_key("openai/gpt-oss-120b"));
+    }
+
    #[test]
    fn test_rank_by_ascending_metric_nan_treated_as_missing() {
        let models = vec![
--- a/crates/common/src/configuration.rs
+++ b/crates/common/src/configuration.rs
@ -177,8 +177,13 @@ pub enum MetricsSource {
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct CostMetricsConfig {
    pub provider: CostProvider,
+    /// Optional override for the pricing catalog endpoint. When omitted, a
+    /// sensible default is used per provider.
+    pub url: Option<String>,
    pub refresh_interval: Option<u64>,
-    /// Map DO catalog keys (`lowercase(creator)/model_id`) to Plano model names.
+    /// Map catalog keys to Plano model names used in `routing_preferences`.
+    /// DigitalOcean keys look like `lowercase(creator)/model_id`; models.dev
+    /// keys look like `creator/model_id`.
    /// Example: `openai/openai-gpt-oss-120b: openai/gpt-4o`
    pub model_aliases: Option<HashMap<String, String>>,
 }
@ -187,6 +192,8 @@ pub struct CostMetricsConfig {
 #[serde(rename_all = "snake_case")]
 pub enum CostProvider {
    Digitalocean,
+    #[serde(rename = "models.dev")]
+    ModelsDev,
 }

 #[derive(Debug, Clone, Serialize, Deserialize)]
@ -741,6 +748,51 @@ mod test {
        }
    }

+    #[test]
+    fn test_deserialize_models_dev_cost_source() {
+        let yaml = r#"
+- type: cost
+  provider: models.dev
+  url: https://models.dev/api.json
+  refresh_interval: 3600
+  model_aliases:
+    openai/gpt-oss-120b: openai/gpt-4o
+"#;
+        let sources: Vec<super::MetricsSource> = serde_yaml::from_str(yaml).unwrap();
+        assert_eq!(sources.len(), 1);
+        match &sources[0] {
+            super::MetricsSource::Cost(cfg) => {
+                assert!(matches!(cfg.provider, super::CostProvider::ModelsDev));
+                assert_eq!(cfg.url.as_deref(), Some("https://models.dev/api.json"));
+                assert_eq!(cfg.refresh_interval, Some(3600));
+                assert_eq!(
+                    cfg.model_aliases
+                        .as_ref()
+                        .and_then(|m| m.get("openai/gpt-oss-120b"))
+                        .map(String::as_str),
+                    Some("openai/gpt-4o")
+                );
+            }
+            other => panic!("expected cost source, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn test_deserialize_digitalocean_cost_source_without_url() {
+        let yaml = r#"
+- type: cost
+  provider: digitalocean
+"#;
+        let sources: Vec<super::MetricsSource> = serde_yaml::from_str(yaml).unwrap();
+        match &sources[0] {
+            super::MetricsSource::Cost(cfg) => {
+                assert!(matches!(cfg.provider, super::CostProvider::Digitalocean));
+                assert_eq!(cfg.url, None);
+            }
+            other => panic!("expected cost source, got {other:?}"),
+        }
+    }
+
    #[test]
    fn test_into_models_filters_internal_providers() {
        let providers = vec![
--- a/docs/source/guides/llm_router.rst
+++ b/docs/source/guides/llm_router.rst
@ -209,6 +209,178 @@ Clients can let the router decide or still specify aliases:
    )


+.. _cost_latency_aware_selection:
+
+Cost- and latency-aware selection
+---------------------------------
+
+When a route lists more than one candidate model, you can let Plano reorder that
+candidate pool using **live cost or latency data** instead of relying solely on the
+order you wrote them in. This is controlled per route with ``selection_policy`` and
+backed by one or more ``model_metrics_sources``.
+
+This is useful when several models are equally capable for a route and you want Plano
+to always reach for the cheapest (or fastest) option first, with the others kept as
+fallbacks.
+
+Selection policy
+~~~~~~~~~~~~~~~~~
+
+Attach an optional ``selection_policy`` to any entry in ``routing_preferences``:
+
+.. code-block:: yaml
+    :caption: Per-route selection policy
+
+    routing_preferences:
+      - name: code review
+        description: reviewing, analyzing, and suggesting improvements to existing code
+        models:
+          - anthropic/claude-sonnet-4-5
+          - groq/llama-3.3-70b-versatile
+        selection_policy:
+          prefer: cheapest   # cheapest | fastest | none
+
+``prefer`` accepts:
+
+- ``cheapest`` — order candidates by total price (input + output rate) ascending, using a ``cost`` metrics source.
+- ``fastest`` — order candidates by observed latency ascending, using a ``latency`` metrics source.
+- ``none`` (default) — keep the order you declared; no reordering.
+
+Models that have no data in the selected source are ranked **last**, in their original
+order, so routing always degrades gracefully rather than dropping a candidate.
+
+Configuring the pricing source
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+``cheapest`` routing needs a price catalog. Plano's **default pricing provider is
+DigitalOcean** — its GenAI model catalog is public (no API key, no signup), so cost data
+is available out of the box and is what ``planoai obs`` uses if you don't configure
+anything. The pricing source is fully swappable: point Plano at `models.dev <https://models.dev/>`_,
+or at **any endpoint that exposes a supported pricing structure**.
+
+The ``provider`` field selects which response schema Plano expects (and therefore how it
+parses the catalog); the optional ``url`` lets you override the endpoint — for example to
+use a mirror, a cached copy, or an internal catalog service that returns the same shape.
+
+.. list-table::
+   :header-rows: 1
+   :widths: 18 34 28 20
+
+   * - ``provider``
+     - Default catalog URL
+     - Key format
+     - Expected structure
+   * - ``digitalocean`` *(default)*
+     - DigitalOcean GenAI model catalog
+     - ``lowercase(creator)/model_id``
+     - ``{ data: [ { model_id, pricing: { input_price_per_million, output_price_per_million } } ] }``
+   * - ``models.dev``
+     - ``https://models.dev/api.json``
+     - ``creator/model`` (e.g. ``anthropic/claude-sonnet-4-5``)
+     - ``{ <provider>: { models: { <model>: { cost: { input, output } } } } }``
+
+Because the source is selected per ``provider``, switching is a one-line change. To stay
+on the default DigitalOcean catalog you can omit ``model_metrics_sources`` entirely for
+``planoai obs``, or declare it explicitly for routing:
+
+.. code-block:: yaml
+    :caption: Default cost source (DigitalOcean)
+
+    model_metrics_sources:
+      - type: cost
+        provider: digitalocean   # default; uses the public DO GenAI catalog
+
+To switch to models.dev — an open, community-maintained catalog covering a broad range of
+providers and models — change the ``provider`` (and optionally ``url``):
+
+.. code-block:: yaml
+    :caption: Cost source backed by models.dev
+
+    model_metrics_sources:
+      - type: cost
+        provider: models.dev               # models.dev | digitalocean
+        url: https://models.dev/api.json    # optional; defaults per provider
+        refresh_interval: 3600              # optional, seconds; refetch on this interval
+        model_aliases:                      # optional; see below
+          openai/gpt-oss-120b: openai/gpt-4o
+
+To use your own endpoint, pick the ``provider`` whose structure your endpoint matches and
+override ``url`` — Plano parses the response with that provider's schema:
+
+.. code-block:: yaml
+    :caption: Custom endpoint exposing the DigitalOcean catalog structure
+
+    model_metrics_sources:
+      - type: cost
+        provider: digitalocean                       # selects the DO response schema
+        url: https://catalog.internal.example.com/pricing
+
+.. note::
+   The cost metric used for ranking is the sum of the input and output per-million-token
+   rates — a relative signal for ordering candidates, not a per-request bill. For actual
+   per-request cost, see the observability console below.
+
+Matching catalog keys to your models
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The router looks up each candidate model by the exact name you use in
+``routing_preferences`` (e.g. ``anthropic/claude-sonnet-4-5``). models.dev keys models as
+``creator/model``, which lines up with Plano's ``provider/model`` naming, so most models
+match automatically.
+
+When a catalog key does not match your model name — for example a version skew, or an
+open-weight model you serve under a different provider — use ``model_aliases`` to map the
+**catalog key** to the **Plano model name** used in your routing preferences:
+
+.. code-block:: yaml
+
+    model_metrics_sources:
+      - type: cost
+        provider: models.dev
+        model_aliases:
+          # catalog key            : plano model name
+          openai/gpt-oss-120b: openai/gpt-4o
+
+Latency source
+~~~~~~~~~~~~~~~
+
+``fastest`` routing reads observed latency from a Prometheus instance. Provide the query
+that returns a per-model latency value (lower is faster), labelled by ``model_name``:
+
+.. code-block:: yaml
+    :caption: Latency source backed by Prometheus
+
+    model_metrics_sources:
+      - type: latency
+        provider: prometheus
+        url: http://prometheus:9090
+        query: avg by (model_name) (rate(plano_llm_latency_seconds_sum[5m]))
+        refresh_interval: 60
+
+You can declare both a ``cost`` and a ``latency`` source at the same time; each route
+picks whichever it needs based on its ``selection_policy``.
+
+Cost in the observability console
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+``planoai obs`` displays a per-request USD cost column derived from the same pricing
+catalog. By default it reads the ``cost`` source from your config (the first
+``type: cost`` entry under ``model_metrics_sources``); you can also override it on the
+command line:
+
+.. code-block:: bash
+
+    # Use the cost source from ./config.yaml (default)
+    planoai obs
+
+    # Or override the provider / endpoint explicitly
+    planoai obs --pricing-provider models.dev
+    planoai obs --pricing-url https://models.dev/api.json
+
+If no source is configured and no override is given, ``planoai obs`` falls back to the
+DigitalOcean catalog so the cost column still populates out of the box.
+
+
 Plano-Orchestrator
 -------------------
 Plano-Orchestrator is a **preference-based routing model** specifically designed to address the limitations of traditional LLM routing. It delivers production-ready performance with low latency and high accuracy while solving key routing challenges.
--- a/docs/source/resources/includes/plano_config_full_reference.yaml
+++ b/docs/source/resources/includes/plano_config_full_reference.yaml
@ -86,6 +86,24 @@ routing_preferences:
    selection_policy:
      prefer: cheapest

+# model_metrics_sources: external catalogs the router reads to reorder candidate
+# models for selection_policy.prefer. A `cost` source ranks `prefer: cheapest`;
+# a `latency` source ranks `prefer: fastest`. Both are optional.
+model_metrics_sources:
+  # Cost catalog. provider: models.dev | digitalocean (default url per provider).
+  - type: cost
+    provider: models.dev
+    url: https://models.dev/api.json   # optional; omit to use the provider default
+    refresh_interval: 3600             # optional, seconds
+    model_aliases:                     # optional: catalog key -> Plano model name
+      openai/gpt-oss-120b: openai/gpt-4o
+  # Latency catalog (Prometheus). Used for selection_policy.prefer: fastest.
+  - type: latency
+    provider: prometheus
+    url: http://prometheus:9090
+    query: avg by (model_name) (rate(plano_llm_latency_seconds_sum[5m]))
+    refresh_interval: 60
+
 # HTTP listeners - entry points for agent routing, prompt targets, and direct LLM access
 listeners:
  # Agent listener for routing requests to multiple agents
--- a/docs/source/resources/includes/plano_config_full_reference_rendered.yaml
+++ b/docs/source/resources/includes/plano_config_full_reference_rendered.yaml
@ -115,6 +115,18 @@ model_aliases:
    target: gpt-4o-mini
  smart-llm:
    target: gpt-4o
+model_metrics_sources:
+- model_aliases:
+    openai/gpt-oss-120b: openai/gpt-4o
+  provider: models.dev
+  refresh_interval: 3600
+  type: cost
+  url: https://models.dev/api.json
+- provider: prometheus
+  query: avg by (model_name) (rate(plano_llm_latency_seconds_sum[5m]))
+  refresh_interval: 60
+  type: latency
+  url: http://prometheus:9090
 model_providers:
 - access_key: $OPENAI_API_KEY
  default: true