add DigitalOcean pricing, startup validation, and demo update

- MetricsSource::DigitalOceanPricing variant: fetch public DO Gen-AI pricing, normalize as lowercase(creator)/model_id, cost = input + output per million - cost_metrics endpoint format updated to { "model": { "input_per_million": X, "output_per_million": Y } } - Startup errors: prefer:cheapest requires cost source, prefer:fastest requires prometheus - Startup warning: models with no pricing/latency data ranked last - One-per-type enforcement: digitalocean_pricing; error if cost_metrics + digitalocean_pricing both configured - cost_snapshot() / latency_snapshot() on ModelMetricsService for startup checks - Demo config updated to v0.4.0 top-level routing_preferences with cheapest + fastest policies - docker-compose.yaml + prometheus.yaml + metrics_server.py for demo latency metrics - Schema and docs updated
2026-04-25 00:36:34 +02:00 · 2026-03-27 16:54:37 -07:00 · 2026-03-27 16:54:37 -07:00 · bd7afd911e
commit bd7afd911e
parent 76b1f37052
10 changed files with 427 additions and 80 deletions
--- a/config/plano_config_schema.yaml
+++ b/config/plano_config_schema.yaml
@ -548,11 +548,24 @@ properties:
            refresh_interval:
              type: integer
              minimum: 1
+              description: "Refresh interval in seconds"
          required:
            - type
            - url
            - query
          additionalProperties: false
+        - type: object
+          properties:
+            type:
+              type: string
+              const: digitalocean_pricing
+            refresh_interval:
+              type: integer
+              minimum: 1
+              description: "Refresh interval in seconds"
+          required:
+            - type
+          additionalProperties: false

 additionalProperties: false
 required:
--- a/crates/brightstaff/src/main.rs
+++ b/crates/brightstaff/src/main.rs
@ -220,6 +220,10 @@ async fn init_app_state(
            .iter()
            .filter(|s| matches!(s, MetricsSource::PrometheusMetrics { .. }))
            .count();
+        let do_count = sources
+            .iter()
+            .filter(|s| matches!(s, MetricsSource::DigitalOceanPricing { .. }))
+            .count();
        if cost_count > 1 {
            return Err("model_metrics_sources: only one cost_metrics source is allowed".into());
        }
@ -228,12 +232,87 @@ async fn init_app_state(
                "model_metrics_sources: only one prometheus_metrics source is allowed".into(),
            );
        }
+        if do_count > 1 {
+            return Err(
+                "model_metrics_sources: only one digitalocean_pricing source is allowed".into(),
+            );
+        }
+        if cost_count > 0 && do_count > 0 {
+            return Err(
+                "model_metrics_sources: cost_metrics and digitalocean_pricing cannot both be configured — use one or the other".into(),
+            );
+        }
        let svc = ModelMetricsService::new(sources, reqwest::Client::new()).await;
        Some(Arc::new(svc))
    } else {
        None
    };

+    // Validate that selection_policy.prefer is compatible with the configured metric sources.
+    if let Some(ref prefs) = config.routing_preferences {
+        use common::configuration::{MetricsSource, SelectionPreference};
+
+        let has_cost_source = config
+            .model_metrics_sources
+            .as_deref()
+            .unwrap_or_default()
+            .iter()
+            .any(|s| {
+                matches!(
+                    s,
+                    MetricsSource::CostMetrics { .. } | MetricsSource::DigitalOceanPricing { .. }
+                )
+            });
+        let has_prometheus = config
+            .model_metrics_sources
+            .as_deref()
+            .unwrap_or_default()
+            .iter()
+            .any(|s| matches!(s, MetricsSource::PrometheusMetrics { .. }));
+
+        for pref in prefs {
+            if pref.selection_policy.prefer == SelectionPreference::Cheapest && !has_cost_source {
+                return Err(format!(
+                    "routing_preferences route '{}' uses prefer: cheapest but no cost data source is configured — \
+                     add cost_metrics or digitalocean_pricing to model_metrics_sources",
+                    pref.name
+                )
+                .into());
+            }
+            if pref.selection_policy.prefer == SelectionPreference::Fastest && !has_prometheus {
+                return Err(format!(
+                    "routing_preferences route '{}' uses prefer: fastest but no prometheus_metrics source is configured — \
+                     add prometheus_metrics to model_metrics_sources",
+                    pref.name
+                )
+                .into());
+            }
+        }
+    }
+
+    // Warn about models in routing_preferences that have no matching pricing/latency data.
+    if let (Some(ref prefs), Some(ref svc)) = (&config.routing_preferences, &metrics_service) {
+        let cost_data = svc.cost_snapshot().await;
+        let latency_data = svc.latency_snapshot().await;
+        for pref in prefs {
+            use common::configuration::SelectionPreference;
+            for model in &pref.models {
+                let missing = match pref.selection_policy.prefer {
+                    SelectionPreference::Cheapest => !cost_data.contains_key(model.as_str()),
+                    SelectionPreference::Fastest => !latency_data.contains_key(model.as_str()),
+                    _ => false,
+                };
+                if missing {
+                    warn!(
+                        model = %model,
+                        route = %pref.name,
+                        "model has no metric data — will be ranked last"
+                    );
+                }
+            }
+        }
+    }
+
    let router_service = Arc::new(RouterService::new(
        config.routing_preferences.clone(),
        metrics_service,
--- a/crates/brightstaff/src/router/model_metrics.rs
+++ b/crates/brightstaff/src/router/model_metrics.rs
@ -6,6 +6,8 @@ use common::configuration::{MetricsSource, SelectionPolicy, SelectionPreference}
 use tokio::sync::RwLock;
 use tracing::{info, warn};

+const DO_PRICING_URL: &str = "https://api.digitalocean.com/v2/gen-ai/models";
+
 pub struct ModelMetricsService {
    cost: Arc<RwLock<HashMap<String, f64>>>,
    latency: Arc<RwLock<HashMap<String, f64>>>,
@ -70,6 +72,25 @@ impl ModelMetricsService {
                        });
                    }
                }
+                MetricsSource::DigitalOceanPricing { refresh_interval } => {
+                    let data = fetch_do_pricing(&client).await;
+                    info!(models = data.len(), "fetched digitalocean pricing");
+                    *cost_data.write().await = data;
+
+                    if let Some(interval_secs) = refresh_interval {
+                        let cost_clone = Arc::clone(&cost_data);
+                        let client_clone = client.clone();
+                        let interval = Duration::from_secs(*interval_secs);
+                        tokio::spawn(async move {
+                            loop {
+                                tokio::time::sleep(interval).await;
+                                let data = fetch_do_pricing(&client_clone).await;
+                                info!(models = data.len(), "refreshed digitalocean pricing");
+                                *cost_clone.write().await = data;
+                            }
+                        });
+                    }
+                }
            }
        }

@ -95,6 +116,16 @@ impl ModelMetricsService {
            SelectionPreference::None => models.to_vec(),
        }
    }
+
+    /// Returns a snapshot of the current cost data. Used at startup to warn about unmatched models.
+    pub async fn cost_snapshot(&self) -> HashMap<String, f64> {
+        self.cost.read().await.clone()
+    }
+
+    /// Returns a snapshot of the current latency data. Used at startup to warn about unmatched models.
+    pub async fn latency_snapshot(&self) -> HashMap<String, f64> {
+        self.latency.read().await.clone()
+    }
 }

 fn rank_by_ascending_metric(models: &[String], data: &HashMap<String, f64>) -> Vec<String> {
@ -134,6 +165,12 @@ fn shuffle(models: &[String]) -> Vec<String> {
    result
 }

+#[derive(serde::Deserialize)]
+struct CostEntry {
+    input_per_million: f64,
+    output_per_million: f64,
+}
+
 async fn fetch_cost_metrics(
    url: &str,
    auth: Option<&common::configuration::MetricsAuth>,
@ -148,8 +185,11 @@ async fn fetch_cost_metrics(
        }
    }
    match req.send().await {
-        Ok(resp) => match resp.json::<HashMap<String, f64>>().await {
-            Ok(data) => data,
+        Ok(resp) => match resp.json::<HashMap<String, CostEntry>>().await {
+            Ok(data) => data
+                .into_iter()
+                .map(|(k, v)| (k, v.input_per_million + v.output_per_million))
+                .collect(),
            Err(err) => {
                warn!(error = %err, url = %url, "failed to parse cost metrics response");
                HashMap::new()
@ -162,6 +202,49 @@ async fn fetch_cost_metrics(
    }
 }

+#[derive(serde::Deserialize)]
+struct DoModelList {
+    data: Vec<DoModel>,
+}
+
+#[derive(serde::Deserialize)]
+struct DoModel {
+    model_id: String,
+    creator: String,
+    pricing: DoPricing,
+}
+
+#[derive(serde::Deserialize)]
+struct DoPricing {
+    input_price_per_million: f64,
+    output_price_per_million: f64,
+}
+
+async fn fetch_do_pricing(client: &reqwest::Client) -> HashMap<String, f64> {
+    match client.get(DO_PRICING_URL).send().await {
+        Ok(resp) => match resp.json::<DoModelList>().await {
+            Ok(list) => list
+                .data
+                .into_iter()
+                .map(|m| {
+                    let key = format!("{}/{}", m.creator.to_lowercase(), m.model_id);
+                    let cost =
+                        m.pricing.input_price_per_million + m.pricing.output_price_per_million;
+                    (key, cost)
+                })
+                .collect(),
+            Err(err) => {
+                warn!(error = %err, url = DO_PRICING_URL, "failed to parse digitalocean pricing response");
+                HashMap::new()
+            }
+        },
+        Err(err) => {
+            warn!(error = %err, url = DO_PRICING_URL, "failed to fetch digitalocean pricing");
+            HashMap::new()
+        }
+    }
+}
+
 #[derive(serde::Deserialize)]
 struct PrometheusResponse {
    data: PrometheusData,
--- a/crates/common/src/configuration.rs
+++ b/crates/common/src/configuration.rs
@ -147,6 +147,9 @@ pub enum MetricsSource {
        query: String,
        refresh_interval: Option<u64>,
    },
+    DigitalOceanPricing {
+        refresh_interval: Option<u64>,
+    },
 }

 #[derive(Debug, Clone, Serialize, Deserialize)]
--- a/demos/llm_routing/model_routing_service/README.md
+++ b/demos/llm_routing/model_routing_service/README.md
@ -13,42 +13,60 @@ Plano is an AI-native proxy and data plane for agentic apps — with built-in or

 - **One endpoint, many models** — apps call Plano using standard OpenAI/Anthropic APIs; Plano handles provider selection, keys, and failover
 - **Intelligent routing** — a lightweight 1.5B router model classifies user intent and picks the best model per request
+- **Cost & latency ranking** — models are ranked by live cost (DigitalOcean pricing API) or latency (Prometheus) before returning the fallback list
 - **Platform governance** — centralize API keys, rate limits, guardrails, and observability without touching app code
 - **Runs anywhere** — single binary; self-host the router for full data privacy

 ## How Routing Works

-The entire routing configuration is plain YAML — no code:
+Routing is configured in top-level `routing_preferences` (requires `version: v0.4.0`):

 ```yaml
-model_providers:
-  - model: openai/gpt-4o-mini
-    default: true                    # fallback for unmatched requests
+version: v0.4.0

-  - model: openai/gpt-4o
-    routing_preferences:
-      - name: complex_reasoning
-        description: complex reasoning tasks, multi-step analysis
+routing_preferences:
+  - name: complex_reasoning
+    description: complex reasoning tasks, multi-step analysis, or detailed explanations
+    models:
+      - openai/gpt-4o
+      - openai/gpt-4o-mini
+    selection_policy:
+      prefer: cheapest        # rank by live cost data

-  - model: anthropic/claude-sonnet-4-20250514
-    routing_preferences:
-      - name: code_generation
-        description: generating new code, writing functions
+  - name: code_generation
+    description: generating new code, writing functions, or creating boilerplate
+    models:
+      - anthropic/claude-sonnet-4-20250514
+      - openai/gpt-4o
+    selection_policy:
+      prefer: fastest         # rank by Prometheus p95 latency
 ```

-When a request arrives, Plano sends the conversation and routing preferences to Arch-Router, which classifies the intent and returns the matching route:
+### `selection_policy.prefer` values
+
+| Value | Behavior |
+|---|---|
+| `cheapest` | Sort models by ascending cost. Requires `cost_metrics` or `digitalocean_pricing` in `model_metrics_sources`. |
+| `fastest` | Sort models by ascending P95 latency. Requires `prometheus_metrics` in `model_metrics_sources`. |
+| `random` | Shuffle the model list on each request. |
+| `none` | Return models in definition order — no reordering. |
+
+When a request arrives, Plano:
+
+1. Sends the conversation + route descriptions to Arch-Router for intent classification
+2. Looks up the matched route and ranks its candidate models by cost or latency
+3. Returns an ordered list — client uses `models[0]`, falls back to `models[1]` on 429/5xx

 ```
 1. Request arrives          → "Write binary search in Python"
-2. Preferences serialized   → [{"name":"code_generation", ...}, {"name":"complex_reasoning", ...}]
-3. Arch-Router classifies   → {"route": "code_generation"}
-4. Route → Model lookup     → code_generation → anthropic/claude-sonnet-4-20250514
-5. Request forwarded        → Claude generates the response
+2. Arch-Router classifies   → route: "code_generation"
+3. Rank by latency          → claude-sonnet (0.85s) < gpt-4o (1.2s)
+4. Response                 → models: ["anthropic/claude-sonnet-4-20250514", "openai/gpt-4o"]
 ```

-No match? Arch-Router returns `other` → Plano falls back to the default model.
+No match? Arch-Router returns `null` route → client falls back to the model in the original request.

-The `/routing/v1/*` endpoints return the routing decision **without** forwarding to the LLM — useful for testing and validating routing behavior before going to production.
+The `/routing/v1/*` endpoints return the routing decision **without** forwarding to the LLM — useful for testing routing behavior before going to production.

 ## Setup

@ -59,12 +77,28 @@ export OPENAI_API_KEY=<your-key>
 export ANTHROPIC_API_KEY=<your-key>
 ```

-Start Plano:
+Start Prometheus and the mock latency metrics server:
+
 ```bash
 cd demos/llm_routing/model_routing_service
+docker compose up -d
+```
+
+Then start Plano:
+
+```bash
 planoai up config.yaml
 ```

+On startup you should see logs like:
+
+```
+fetched digitalocean pricing: N models
+fetched prometheus latency metrics: 3 models
+```
+
+If a model in `routing_preferences` has no matching pricing or latency data, Plano logs a warning at startup — the model is still included but ranked last.
+
 ## Run the demo

 ```bash
@ -95,13 +129,65 @@ curl http://localhost:12000/routing/v1/chat/completions \
 Response:
 ```json
 {
-    "model": "anthropic/claude-sonnet-4-20250514",
+    "models": ["anthropic/claude-sonnet-4-20250514", "openai/gpt-4o"],
    "route": "code_generation",
    "trace_id": "c16d1096c1af4a17abb48fb182918a88"
 }
 ```

-The response tells you which model would handle this request and which route was matched, without actually making the LLM call.
+The response contains the ranked model list — your client should try `models[0]` first and fall back to `models[1]` on 429 or 5xx errors.
+
+## Metrics Sources
+
+### DigitalOcean Pricing (`digitalocean_pricing`)
+
+Fetches public model pricing from the DigitalOcean Gen-AI catalog (no auth required). Model IDs are normalized as `lowercase(creator)/model_id`. Cost scalar = `input_price_per_million + output_price_per_million`.
+
+```yaml
+model_metrics_sources:
+  - type: digitalocean_pricing
+    refresh_interval: 3600   # re-fetch every hour
+```
+
+### Prometheus Latency (`prometheus_metrics`)
+
+Queries a Prometheus instance for P95 latency. The PromQL expression must return an instant vector with a `model_name` label matching the model names in `routing_preferences`.
+
+```yaml
+model_metrics_sources:
+  - type: prometheus_metrics
+    url: http://localhost:9090
+    query: model_latency_p95_seconds
+    refresh_interval: 60
+```
+
+The demo's `metrics_server.py` exposes mock latency data; `docker compose up -d` starts it alongside Prometheus.
+
+### Custom Cost Endpoint (`cost_metrics`)
+
+```yaml
+model_metrics_sources:
+  - type: cost_metrics
+    url: https://my-internal-pricing-api/costs
+    auth:
+      type: bearer
+      token: $PRICING_TOKEN
+    refresh_interval: 300
+```
+
+Expected response format:
+```json
+{
+  "anthropic/claude-sonnet-4-20250514": {
+    "input_per_million": 3.0,
+    "output_per_million": 15.0
+  },
+  "openai/gpt-4o": {
+    "input_per_million": 5.0,
+    "output_per_million": 20.0
+  }
+}
+```

 ## Kubernetes Deployment (Self-hosted Arch-Router on GPU)

@ -119,7 +205,6 @@ GPU nodes commonly have a `nvidia.com/gpu:NoSchedule` taint — `vllm-deployment
 **1. Deploy Arch-Router and Plano:**

 ```bash
-
 # arch-router deployment
 kubectl apply -f vllm-deployment.yaml

@ -165,39 +250,3 @@ kubectl create configmap plano-config \
  --dry-run=client -o yaml | kubectl apply -f -
 kubectl rollout restart deployment/plano
 ```
-
-## Demo Output
-
-```
-=== Model Routing Service Demo ===
-
--- 1. Code generation query (OpenAI format) ---
-{
-    "model": "anthropic/claude-sonnet-4-20250514",
-    "route": "code_generation",
-    "trace_id": "c16d1096c1af4a17abb48fb182918a88"
-}
-
--- 2. Complex reasoning query (OpenAI format) ---
-{
-    "model": "openai/gpt-4o",
-    "route": "complex_reasoning",
-    "trace_id": "30795e228aff4d7696f082ed01b75ad4"
-}
-
--- 3. Simple query - no routing match (OpenAI format) ---
-{
-    "model": "none",
-    "route": null,
-    "trace_id": "ae0b6c3b220d499fb5298ac63f4eac0e"
-}
-
--- 4. Code generation query (Anthropic format) ---
-{
-    "model": "anthropic/claude-sonnet-4-20250514",
-    "route": "code_generation",
-    "trace_id": "26be822bbdf14a3ba19fe198e55ea4a9"
-}
-
-=== Demo Complete ===
-```
--- a/demos/llm_routing/model_routing_service/config.yaml
+++ b/demos/llm_routing/model_routing_service/config.yaml
@ -1,4 +1,4 @@
-version: v0.3.0
+version: v0.4.0

 listeners:
  - type: model
@ -6,22 +6,41 @@ listeners:
    port: 12000

 model_providers:
-
  - model: openai/gpt-4o-mini
    access_key: $OPENAI_API_KEY
    default: true

  - model: openai/gpt-4o
    access_key: $OPENAI_API_KEY
-    routing_preferences:
-      - name: complex_reasoning
-        description: complex reasoning tasks, multi-step analysis, or detailed explanations

  - model: anthropic/claude-sonnet-4-20250514
    access_key: $ANTHROPIC_API_KEY
-    routing_preferences:
-      - name: code_generation
-        description: generating new code, writing functions, or creating boilerplate
+
+routing_preferences:
+  - name: complex_reasoning
+    description: complex reasoning tasks, multi-step analysis, or detailed explanations
+    models:
+      - openai/gpt-4o
+      - openai/gpt-4o-mini
+    selection_policy:
+      prefer: cheapest
+
+  - name: code_generation
+    description: generating new code, writing functions, or creating boilerplate
+    models:
+      - anthropic/claude-sonnet-4-20250514
+      - openai/gpt-4o
+    selection_policy:
+      prefer: fastest
+
+model_metrics_sources:
+  - type: digitalocean_pricing
+    refresh_interval: 3600
+
+  - type: prometheus_metrics
+    url: http://localhost:9090
+    query: model_latency_p95_seconds
+    refresh_interval: 60

 tracing:
  random_sampling: 100
--- a/demos/llm_routing/model_routing_service/docker-compose.yaml
+++ b/demos/llm_routing/model_routing_service/docker-compose.yaml
@ -0,0 +1,17 @@
+services:
+  prometheus:
+    image: prom/prometheus:latest
+    ports:
+      - "9090:9090"
+    volumes:
+      - ./prometheus.yaml:/etc/prometheus/prometheus.yml:ro
+    depends_on:
+      - model-metrics
+
+  model-metrics:
+    image: python:3.11-slim
+    ports:
+      - "8080:8080"
+    volumes:
+      - ./metrics_server.py:/metrics_server.py:ro
+    command: python /metrics_server.py
--- a/demos/llm_routing/model_routing_service/metrics_server.py
+++ b/demos/llm_routing/model_routing_service/metrics_server.py
@ -0,0 +1,30 @@
+"""
+Minimal Prometheus metrics server for demo purposes.
+Exposes mock P95 latency data for model routing.
+"""
+from http.server import HTTPServer, BaseHTTPRequestHandler
+
+METRICS = """\
+# HELP model_latency_p95_seconds P95 request latency in seconds per model
+# TYPE model_latency_p95_seconds gauge
+model_latency_p95_seconds{model_name="anthropic/claude-sonnet-4-20250514"} 0.85
+model_latency_p95_seconds{model_name="openai/gpt-4o"} 1.20
+model_latency_p95_seconds{model_name="openai/gpt-4o-mini"} 0.40
+""".encode()
+
+
+class MetricsHandler(BaseHTTPRequestHandler):
+    def do_GET(self):
+        self.send_response(200)
+        self.send_header("Content-Type", "text/plain; version=0.0.4; charset=utf-8")
+        self.end_headers()
+        self.wfile.write(METRICS)
+
+    def log_message(self, fmt, *args):
+        pass  # suppress access logs
+
+
+if __name__ == "__main__":
+    server = HTTPServer(("", 8080), MetricsHandler)
+    print("metrics server listening on :8080", flush=True)
+    server.serve_forever()
--- a/demos/llm_routing/model_routing_service/prometheus.yaml
+++ b/demos/llm_routing/model_routing_service/prometheus.yaml
@ -0,0 +1,8 @@
+global:
+  scrape_interval: 15s
+
+scrape_configs:
+  - job_name: model_latency
+    static_configs:
+      - targets:
+          - model-metrics:8080
--- a/docs/routing-api.md
+++ b/docs/routing-api.md
@ -135,12 +135,17 @@ routing_preferences:

 # Optional: live cost and latency data sources (max one per type)
 model_metrics_sources:
-  - type: cost_metrics
-    url: https://internal-cost-api/models
-    refresh_interval: 300  # seconds; omit for fetch-once on startup
-    auth:
-      type: bearer
-      token: $COST_API_TOKEN
+  # Option A: DigitalOcean public pricing (no auth required)
+  - type: digitalocean_pricing
+    refresh_interval: 3600
+
+  # Option B: custom cost endpoint (mutually exclusive with digitalocean_pricing)
+  # - type: cost_metrics
+  #   url: https://internal-cost-api/models
+  #   refresh_interval: 300  # seconds; omit for fetch-once on startup
+  #   auth:
+  #     type: bearer
+  #     token: $COST_API_TOKEN

  - type: prometheus_metrics
    url: https://internal-prometheus/
@ -148,20 +153,61 @@ model_metrics_sources:
    refresh_interval: 60
 ```

+### Startup validation
+
+Plano validates metric source configuration at startup and exits with a clear error if:
+
+| Condition | Error |
+|---|---|
+| `prefer: cheapest` with no cost source | `prefer: cheapest requires a cost data source — add cost_metrics or digitalocean_pricing` |
+| `prefer: fastest` with no `prometheus_metrics` | `prefer: fastest requires a prometheus_metrics source` |
+| Two `cost_metrics` entries | `only one cost_metrics source is allowed` |
+| Two `prometheus_metrics` entries | `only one prometheus_metrics source is allowed` |
+| Two `digitalocean_pricing` entries | `only one digitalocean_pricing source is allowed` |
+| `cost_metrics` and `digitalocean_pricing` both present | `cannot both be configured — use one or the other` |
+
+If a model listed in `routing_preferences` has no matching entry in the fetched pricing or latency data, Plano logs a `WARN` at startup — the model is still included but ranked last.
+
 ### cost_metrics endpoint

-Plano GETs `url` on startup (and on each `refresh_interval`). Expected response — a flat JSON object mapping model name to cost value:
+Plano GETs `url` on startup (and on each `refresh_interval`). Expected response — a JSON object mapping model name to an object with `input_per_million` and `output_per_million` fields:

 ```json
 {
-  "anthropic/claude-sonnet-4-20250514": 0.003,
-  "openai/gpt-4o": 0.005,
-  "openai/gpt-4o-mini": 0.00015
+  "anthropic/claude-sonnet-4-20250514": {
+    "input_per_million": 3.0,
+    "output_per_million": 15.0
+  },
+  "openai/gpt-4o": {
+    "input_per_million": 5.0,
+    "output_per_million": 20.0
+  },
+  "openai/gpt-4o-mini": {
+    "input_per_million": 0.15,
+    "output_per_million": 0.6
+  }
 }
 ```

 - `auth.type: bearer` adds `Authorization: Bearer <token>` to the request
- Cost units are arbitrary (e.g. USD per 1k tokens) — only relative order matters
+- Plano combines the two fields as `input_per_million + output_per_million` to produce a single cost scalar used for ranking
+- Only relative order matters — the unit (e.g. USD per million tokens) is consistent so ranking is correct
+
+### digitalocean_pricing source
+
+Fetches public model pricing from the DigitalOcean Gen-AI catalog. No authentication required.
+
+```yaml
+model_metrics_sources:
+  - type: digitalocean_pricing
+    refresh_interval: 3600   # re-fetch every hour; omit to fetch once on startup
+```
+
+Model IDs are normalized as `lowercase(creator)/model_id` — for example, `creator: "OpenAI"`, `model_id: "openai-gpt-4o"` → `"openai/openai-gpt-4o"`. The cost scalar is `input_price_per_million + output_price_per_million`.
+
+**Constraints:**
+- `cost_metrics` and `digitalocean_pricing` cannot both be configured — use one or the other.
+- Only one `digitalocean_pricing` entry is allowed.

 ### prometheus_metrics endpoint