feat: make model pricing source configurable (models.dev + DigitalOcean) (#971)

This commit is contained in:
Musa 2026-06-24 10:14:12 -07:00 committed by GitHub
parent 5cc4c4ee77
commit 558df0307c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 687 additions and 48 deletions

View file

@ -1,7 +1,8 @@
"""DigitalOcean Gradient pricing catalog for the obs console.
"""Model pricing catalog for the obs console.
Ported loosely from ``crates/brightstaff/src/router/model_metrics.rs::fetch_do_pricing``.
Single-source: one fetch at startup, cached for the life of the process.
Mirrors ``crates/brightstaff/src/router/model_metrics.rs``. The source is
configurable: ``digitalocean`` (DO GenAI catalog) or ``models.dev``. A single
fetch at startup is cached for the life of the process.
"""
from __future__ import annotations
@ -14,7 +15,18 @@ from typing import Any
import requests
DEFAULT_PRICING_URL = "https://api.digitalocean.com/v2/gen-ai/models/catalog"
DO_PRICING_URL = "https://api.digitalocean.com/v2/gen-ai/models/catalog"
MODELS_DEV_URL = "https://models.dev/api.json"
# Backwards-compatible default (DigitalOcean) used when no provider is given.
DEFAULT_PRICING_URL = DO_PRICING_URL
DEFAULT_PRICING_PROVIDER = "digitalocean"
_DEFAULT_URLS = {
"digitalocean": DO_PRICING_URL,
"models.dev": MODELS_DEV_URL,
}
FETCH_TIMEOUT_SECS = 5.0
@ -51,36 +63,52 @@ class PricingCatalog:
return list(self._prices.keys())[:n]
@classmethod
def fetch(cls, url: str = DEFAULT_PRICING_URL) -> "PricingCatalog":
"""Fetch pricing from DO's catalog endpoint. On failure, returns an
def fetch(
cls,
provider: str = DEFAULT_PRICING_PROVIDER,
url: str | None = None,
) -> "PricingCatalog":
"""Fetch pricing from the configured catalog. On failure, returns an
empty catalog (cost column will be blank).
The catalog endpoint is public no auth required, no signup so
``planoai obs`` gets cost data on first run out of the box.
``provider`` selects the parser/default URL: ``digitalocean`` or
``models.dev``. Both catalog endpoints are public no auth required
so ``planoai obs`` gets cost data on first run out of the box.
"""
provider = (provider or DEFAULT_PRICING_PROVIDER).strip().lower()
resolved_url = url or _DEFAULT_URLS.get(provider, DO_PRICING_URL)
try:
resp = requests.get(url, timeout=FETCH_TIMEOUT_SECS)
resp = requests.get(resolved_url, timeout=FETCH_TIMEOUT_SECS)
resp.raise_for_status()
data = resp.json()
except Exception as exc: # noqa: BLE001 — best-effort; never fatal
logger.warning(
"DO pricing fetch failed: %s; cost column will be blank.",
"%s pricing fetch failed: %s; cost column will be blank.",
provider,
exc,
)
return cls()
prices = _parse_do_pricing(data)
if provider == "models.dev":
prices = _parse_models_dev_pricing(data)
else:
prices = _parse_do_pricing(data)
if not prices:
# Dump the first entry's raw shape so we can see which fields DO
# actually returned — helps when the catalog adds new fields or
# the response doesn't match our parser.
# Dump a sample of the raw shape so we can see which fields the
# catalog returned — helps when it adds new fields or the response
# doesn't match our parser.
import json as _json
sample_items = _coerce_items(data)
sample = sample_items[0] if sample_items else data
if provider == "models.dev" and isinstance(data, dict):
sample = next(iter(data.values()), data)
else:
sample_items = _coerce_items(data)
sample = sample_items[0] if sample_items else data
logger.warning(
"DO pricing response had no parseable entries; cost column "
"%s pricing response had no parseable entries; cost column "
"will be blank. Sample entry: %s",
provider,
_json.dumps(sample, default=str)[:400],
)
return cls(prices)
@ -278,6 +306,75 @@ def _parse_do_pricing(data: Any) -> dict[str, ModelPrice]:
return prices
def _parse_models_dev_pricing(data: Any) -> dict[str, ModelPrice]:
"""Parse a models.dev ``api.json`` response into a ModelPrice map.
models.dev shape (top-level object keyed by provider id)::
{
"anthropic": {
"models": {
"claude-opus-4-5": {
"cost": {"input": 5, "output": 25, "cache_read": 0.5}
}
}
},
...
}
``cost.*`` values are USD per *million* tokens, so we divide by 1e6 to get a
per-token rate. First-party providers use bare model keys, so we register
both ``provider/model`` (matching Plano's routing names) and the bare model
id as a fallback.
"""
prices: dict[str, ModelPrice] = {}
if not isinstance(data, dict):
return prices
for provider_id, provider in data.items():
if not isinstance(provider, dict):
continue
models = provider.get("models")
if not isinstance(models, dict):
continue
for model_key, model in models.items():
if not isinstance(model, dict):
continue
cost = model.get("cost")
if not isinstance(cost, dict):
continue
input_pm = _as_float(cost.get("input"))
output_pm = _as_float(cost.get("output"))
if input_pm is None or output_pm is None:
continue
# Skip 0-rate entries so cost falls back to `—` rather than $0.0000.
if input_pm == 0 and output_pm == 0:
continue
cached_pm = _as_float(cost.get("cache_read"))
price = ModelPrice(
input_per_token_usd=input_pm / 1_000_000,
output_per_token_usd=output_pm / 1_000_000,
cached_input_per_token_usd=(
cached_pm / 1_000_000 if cached_pm is not None else None
),
)
composite = f"{provider_id}/{model_key}"
prices[composite] = price
prices.setdefault(composite.lower(), price)
prices.setdefault(str(model_key), price)
prices.setdefault(str(model_key).lower(), price)
return prices
def _as_float(value: Any) -> float | None:
if value is None:
return None
try:
return float(value)
except (TypeError, ValueError):
return None
def _coerce_items(data: Any) -> list[dict]:
if isinstance(data, list):
return [x for x in data if isinstance(x, dict)]

View file

@ -2,9 +2,12 @@
from __future__ import annotations
import logging
import os
import time
import rich_click as click
import yaml
from rich.console import Console
from rich.live import Live
@ -15,8 +18,50 @@ from planoai.obs.collector import (
LLMCallStore,
ObsCollector,
)
from planoai.obs.pricing import PricingCatalog
from planoai.obs.pricing import DEFAULT_PRICING_PROVIDER, PricingCatalog
from planoai.obs.render import render
from planoai.utils import find_config_file
logger = logging.getLogger(__name__)
def _resolve_pricing_source(
config_file: str | None,
provider_override: str | None,
url_override: str | None,
) -> tuple[str, str | None]:
"""Pick the cost pricing source.
Precedence: explicit CLI overrides > the first ``type: cost`` entry in
``model_metrics_sources`` from the Plano config > the DigitalOcean default.
"""
provider = DEFAULT_PRICING_PROVIDER
url: str | None = None
config_path = find_config_file(file=config_file)
if config_path and os.path.exists(config_path):
try:
with open(config_path, "r") as f:
config = yaml.safe_load(f) or {}
sources = config.get("model_metrics_sources") or []
for source in sources:
if isinstance(source, dict) and source.get("type") == "cost":
if source.get("provider"):
provider = str(source["provider"])
if source.get("url"):
url = str(source["url"])
break
except Exception as exc: # noqa: BLE001 — config is optional for obs
logger.warning(
"could not read pricing source from %s: %s", config_path, exc
)
if provider_override:
provider = provider_override
if url_override:
url = url_override
return provider, url
@click.command(name="obs", help="Live observability console for Plano LLM traffic.")
@ -48,13 +93,42 @@ from planoai.obs.render import render
show_default=True,
help="TUI refresh interval.",
)
def obs(port: int, host: str, capacity: int, refresh_ms: int) -> None:
@click.option(
"--config",
"config_file",
type=str,
default=None,
help="Path to the Plano config to read the pricing source from "
"(defaults to ./config.yaml or ./plano_config.yaml).",
)
@click.option(
"--pricing-provider",
type=click.Choice(["digitalocean", "models.dev"]),
default=None,
help="Override the cost pricing provider (otherwise read from config).",
)
@click.option(
"--pricing-url",
type=str,
default=None,
help="Override the pricing catalog URL (otherwise read from config / provider default).",
)
def obs(
port: int,
host: str,
capacity: int,
refresh_ms: int,
config_file: str | None,
pricing_provider: str | None,
pricing_url: str | None,
) -> None:
console = Console()
provider, url = _resolve_pricing_source(config_file, pricing_provider, pricing_url)
console.print(
f"[bold {PLANO_COLOR}]planoai obs[/] — loading DO pricing catalog...",
f"[bold {PLANO_COLOR}]planoai obs[/] — loading {provider} pricing catalog...",
end="",
)
pricing = PricingCatalog.fetch()
pricing = PricingCatalog.fetch(provider=provider, url=url)
if len(pricing):
sample = ", ".join(pricing.sample_models(3))
console.print(
@ -63,7 +137,7 @@ def obs(port: int, host: str, capacity: int, refresh_ms: int) -> None:
else:
console.print(
" [yellow]no pricing loaded[/] — "
"[dim]cost column will be blank (DO catalog unreachable)[/]"
f"[dim]cost column will be blank ({provider} catalog unreachable)[/]"
)
store = LLMCallStore(capacity=capacity)