mirror of
https://github.com/katanemo/plano.git
synced 2026-06-26 15:39:40 +02:00
418 lines
14 KiB
Python
418 lines
14 KiB
Python
"""Model pricing catalog for the obs console.
|
|
|
|
Mirrors ``crates/brightstaff/src/router/model_metrics.rs``. The source is
|
|
configurable: ``digitalocean`` (DO GenAI catalog) or ``models.dev``. A single
|
|
fetch at startup is cached for the life of the process.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import re
|
|
import threading
|
|
from dataclasses import dataclass
|
|
from typing import Any
|
|
|
|
import requests
|
|
|
|
DO_PRICING_URL = "https://api.digitalocean.com/v2/gen-ai/models/catalog"
|
|
MODELS_DEV_URL = "https://models.dev/api.json"
|
|
|
|
# Backwards-compatible default (DigitalOcean) used when no provider is given.
|
|
DEFAULT_PRICING_URL = DO_PRICING_URL
|
|
DEFAULT_PRICING_PROVIDER = "digitalocean"
|
|
|
|
_DEFAULT_URLS = {
|
|
"digitalocean": DO_PRICING_URL,
|
|
"models.dev": MODELS_DEV_URL,
|
|
}
|
|
|
|
FETCH_TIMEOUT_SECS = 5.0
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class ModelPrice:
|
|
"""Input/output $/token rates. Token counts are multiplied by these."""
|
|
|
|
input_per_token_usd: float
|
|
output_per_token_usd: float
|
|
cached_input_per_token_usd: float | None = None
|
|
|
|
|
|
class PricingCatalog:
|
|
"""In-memory pricing lookup keyed by model id.
|
|
|
|
DO's catalog uses ids like ``openai-gpt-5.4``; Plano's resolved model names
|
|
may arrive as ``do/openai-gpt-5.4`` or bare ``openai-gpt-5.4``. We strip the
|
|
leading provider prefix when looking up.
|
|
"""
|
|
|
|
def __init__(self, prices: dict[str, ModelPrice] | None = None) -> None:
|
|
self._prices: dict[str, ModelPrice] = prices or {}
|
|
self._lock = threading.Lock()
|
|
|
|
def __len__(self) -> int:
|
|
with self._lock:
|
|
return len(self._prices)
|
|
|
|
def sample_models(self, n: int = 5) -> list[str]:
|
|
with self._lock:
|
|
return list(self._prices.keys())[:n]
|
|
|
|
@classmethod
|
|
def fetch(
|
|
cls,
|
|
provider: str = DEFAULT_PRICING_PROVIDER,
|
|
url: str | None = None,
|
|
) -> "PricingCatalog":
|
|
"""Fetch pricing from the configured catalog. On failure, returns an
|
|
empty catalog (cost column will be blank).
|
|
|
|
``provider`` selects the parser/default URL: ``digitalocean`` or
|
|
``models.dev``. Both catalog endpoints are public — no auth required —
|
|
so ``planoai obs`` gets cost data on first run out of the box.
|
|
"""
|
|
provider = (provider or DEFAULT_PRICING_PROVIDER).strip().lower()
|
|
resolved_url = url or _DEFAULT_URLS.get(provider, DO_PRICING_URL)
|
|
try:
|
|
resp = requests.get(resolved_url, timeout=FETCH_TIMEOUT_SECS)
|
|
resp.raise_for_status()
|
|
data = resp.json()
|
|
except Exception as exc: # noqa: BLE001 — best-effort; never fatal
|
|
logger.warning(
|
|
"%s pricing fetch failed: %s; cost column will be blank.",
|
|
provider,
|
|
exc,
|
|
)
|
|
return cls()
|
|
|
|
if provider == "models.dev":
|
|
prices = _parse_models_dev_pricing(data)
|
|
else:
|
|
prices = _parse_do_pricing(data)
|
|
|
|
if not prices:
|
|
# Dump a sample of the raw shape so we can see which fields the
|
|
# catalog returned — helps when it adds new fields or the response
|
|
# doesn't match our parser.
|
|
import json as _json
|
|
|
|
if provider == "models.dev" and isinstance(data, dict):
|
|
sample = next(iter(data.values()), data)
|
|
else:
|
|
sample_items = _coerce_items(data)
|
|
sample = sample_items[0] if sample_items else data
|
|
logger.warning(
|
|
"%s pricing response had no parseable entries; cost column "
|
|
"will be blank. Sample entry: %s",
|
|
provider,
|
|
_json.dumps(sample, default=str)[:400],
|
|
)
|
|
return cls(prices)
|
|
|
|
def price_for(self, model_name: str | None) -> ModelPrice | None:
|
|
if not model_name:
|
|
return None
|
|
with self._lock:
|
|
# Try the full name first, then stripped prefix, then lowercased variants.
|
|
for candidate in _model_key_candidates(model_name):
|
|
hit = self._prices.get(candidate)
|
|
if hit is not None:
|
|
return hit
|
|
return None
|
|
|
|
def cost_for_call(self, call: Any) -> float | None:
|
|
"""Compute USD cost for an LLMCall. Returns None when pricing is unknown."""
|
|
price = self.price_for(getattr(call, "model", None)) or self.price_for(
|
|
getattr(call, "request_model", None)
|
|
)
|
|
if price is None:
|
|
return None
|
|
prompt = int(getattr(call, "prompt_tokens", 0) or 0)
|
|
completion = int(getattr(call, "completion_tokens", 0) or 0)
|
|
cached = int(getattr(call, "cached_input_tokens", 0) or 0)
|
|
|
|
# Cached input tokens are priced separately at the cached rate when known;
|
|
# otherwise they're already counted in prompt tokens at the regular rate.
|
|
fresh_prompt = prompt
|
|
if price.cached_input_per_token_usd is not None and cached:
|
|
fresh_prompt = max(0, prompt - cached)
|
|
cost_cached = cached * price.cached_input_per_token_usd
|
|
else:
|
|
cost_cached = 0.0
|
|
|
|
cost = (
|
|
fresh_prompt * price.input_per_token_usd
|
|
+ completion * price.output_per_token_usd
|
|
+ cost_cached
|
|
)
|
|
return round(cost, 6)
|
|
|
|
|
|
_DATE_SUFFIX_RE = re.compile(r"-\d{8}$")
|
|
_PROVIDER_PREFIXES = ("anthropic", "openai", "google", "meta", "cohere", "mistral")
|
|
_ANTHROPIC_FAMILIES = {"opus", "sonnet", "haiku"}
|
|
|
|
|
|
def _model_key_candidates(model_name: str) -> list[str]:
|
|
"""Lookup-side variants of a Plano-emitted model name.
|
|
|
|
Plano resolves names like ``claude-haiku-4-5-20251001``; the catalog stores
|
|
them as ``anthropic-claude-haiku-4.5``. We strip the date suffix and the
|
|
``provider/`` prefix here; the catalog itself registers the dash/dot and
|
|
family-order aliases at parse time (see :func:`_expand_aliases`).
|
|
"""
|
|
base = model_name.strip()
|
|
out = [base]
|
|
if "/" in base:
|
|
out.append(base.split("/", 1)[1])
|
|
for k in list(out):
|
|
stripped = _DATE_SUFFIX_RE.sub("", k)
|
|
if stripped != k:
|
|
out.append(stripped)
|
|
out.extend([v.lower() for v in list(out)])
|
|
seen: set[str] = set()
|
|
uniq = []
|
|
for key in out:
|
|
if key not in seen:
|
|
seen.add(key)
|
|
uniq.append(key)
|
|
return uniq
|
|
|
|
|
|
def _expand_aliases(model_id: str) -> set[str]:
|
|
"""Catalog-side variants of a DO model id.
|
|
|
|
DO publishes Anthropic models under ids like ``anthropic-claude-opus-4.7``
|
|
or ``anthropic-claude-4.6-sonnet`` while Plano emits ``claude-opus-4-7`` /
|
|
``claude-sonnet-4-6``. Generate a set covering provider-prefix stripping,
|
|
dash↔dot in version segments, and family↔version word order so a single
|
|
catalog entry matches every name shape we'll see at lookup.
|
|
"""
|
|
aliases: set[str] = set()
|
|
|
|
def add(name: str) -> None:
|
|
if not name:
|
|
return
|
|
aliases.add(name)
|
|
aliases.add(name.lower())
|
|
|
|
add(model_id)
|
|
|
|
base = model_id
|
|
head, _, rest = base.partition("-")
|
|
if head.lower() in _PROVIDER_PREFIXES and rest:
|
|
add(rest)
|
|
base = rest
|
|
|
|
for key in list(aliases):
|
|
if "." in key:
|
|
add(key.replace(".", "-"))
|
|
|
|
parts = base.split("-")
|
|
if len(parts) >= 3 and parts[0].lower() == "claude":
|
|
rest_parts = parts[1:]
|
|
for i, p in enumerate(rest_parts):
|
|
if p.lower() in _ANTHROPIC_FAMILIES:
|
|
others = rest_parts[:i] + rest_parts[i + 1 :]
|
|
if not others:
|
|
break
|
|
family_last = "claude-" + "-".join(others) + "-" + p
|
|
family_first = "claude-" + p + "-" + "-".join(others)
|
|
add(family_last)
|
|
add(family_first)
|
|
add(family_last.replace(".", "-"))
|
|
add(family_first.replace(".", "-"))
|
|
break
|
|
|
|
return aliases
|
|
|
|
|
|
def _parse_do_pricing(data: Any) -> dict[str, ModelPrice]:
|
|
"""Parse DO catalog response into a ModelPrice map keyed by model id.
|
|
|
|
DO's shape (as of 2026-04):
|
|
{
|
|
"data": [
|
|
{"model_id": "openai-gpt-5.4",
|
|
"pricing": {"input_price_per_million": 5.0,
|
|
"output_price_per_million": 15.0}},
|
|
...
|
|
]
|
|
}
|
|
|
|
Older/alternate shapes are also accepted (flat top-level fields, or the
|
|
``id``/``model``/``name`` key).
|
|
"""
|
|
prices: dict[str, ModelPrice] = {}
|
|
items = _coerce_items(data)
|
|
for item in items:
|
|
model_id = (
|
|
item.get("model_id")
|
|
or item.get("id")
|
|
or item.get("model")
|
|
or item.get("name")
|
|
)
|
|
if not model_id:
|
|
continue
|
|
|
|
# DO nests rates under `pricing`; try that first, then fall back to
|
|
# top-level fields for alternate response shapes.
|
|
sources = [item]
|
|
if isinstance(item.get("pricing"), dict):
|
|
sources.insert(0, item["pricing"])
|
|
|
|
input_rate = _extract_rate_from_sources(
|
|
sources,
|
|
["input_per_token", "input_token_price", "price_input"],
|
|
["input_price_per_million", "input_per_million", "input_per_mtok"],
|
|
)
|
|
output_rate = _extract_rate_from_sources(
|
|
sources,
|
|
["output_per_token", "output_token_price", "price_output"],
|
|
["output_price_per_million", "output_per_million", "output_per_mtok"],
|
|
)
|
|
cached_rate = _extract_rate_from_sources(
|
|
sources,
|
|
[
|
|
"cached_input_per_token",
|
|
"cached_input_token_price",
|
|
"prompt_cache_read_per_token",
|
|
],
|
|
[
|
|
"cached_input_price_per_million",
|
|
"cached_input_per_million",
|
|
"cached_input_per_mtok",
|
|
],
|
|
)
|
|
|
|
if input_rate is None or output_rate is None:
|
|
continue
|
|
# Treat 0-rate entries as "unknown" so cost falls back to `—` rather
|
|
# than showing a misleading $0.0000. DO's catalog sometimes omits
|
|
# rates for promo/open-weight models.
|
|
if input_rate == 0 and output_rate == 0:
|
|
continue
|
|
price = ModelPrice(
|
|
input_per_token_usd=input_rate,
|
|
output_per_token_usd=output_rate,
|
|
cached_input_per_token_usd=cached_rate,
|
|
)
|
|
for alias in _expand_aliases(str(model_id)):
|
|
prices.setdefault(alias, price)
|
|
return prices
|
|
|
|
|
|
def _parse_models_dev_pricing(data: Any) -> dict[str, ModelPrice]:
|
|
"""Parse a models.dev ``api.json`` response into a ModelPrice map.
|
|
|
|
models.dev shape (top-level object keyed by provider id)::
|
|
|
|
{
|
|
"anthropic": {
|
|
"models": {
|
|
"claude-opus-4-5": {
|
|
"cost": {"input": 5, "output": 25, "cache_read": 0.5}
|
|
}
|
|
}
|
|
},
|
|
...
|
|
}
|
|
|
|
``cost.*`` values are USD per *million* tokens, so we divide by 1e6 to get a
|
|
per-token rate. First-party providers use bare model keys, so we register
|
|
both ``provider/model`` (matching Plano's routing names) and the bare model
|
|
id as a fallback.
|
|
"""
|
|
prices: dict[str, ModelPrice] = {}
|
|
if not isinstance(data, dict):
|
|
return prices
|
|
|
|
for provider_id, provider in data.items():
|
|
if not isinstance(provider, dict):
|
|
continue
|
|
models = provider.get("models")
|
|
if not isinstance(models, dict):
|
|
continue
|
|
for model_key, model in models.items():
|
|
if not isinstance(model, dict):
|
|
continue
|
|
cost = model.get("cost")
|
|
if not isinstance(cost, dict):
|
|
continue
|
|
input_pm = _as_float(cost.get("input"))
|
|
output_pm = _as_float(cost.get("output"))
|
|
if input_pm is None or output_pm is None:
|
|
continue
|
|
# Skip 0-rate entries so cost falls back to `—` rather than $0.0000.
|
|
if input_pm == 0 and output_pm == 0:
|
|
continue
|
|
cached_pm = _as_float(cost.get("cache_read"))
|
|
price = ModelPrice(
|
|
input_per_token_usd=input_pm / 1_000_000,
|
|
output_per_token_usd=output_pm / 1_000_000,
|
|
cached_input_per_token_usd=(
|
|
cached_pm / 1_000_000 if cached_pm is not None else None
|
|
),
|
|
)
|
|
composite = f"{provider_id}/{model_key}"
|
|
prices[composite] = price
|
|
prices.setdefault(composite.lower(), price)
|
|
prices.setdefault(str(model_key), price)
|
|
prices.setdefault(str(model_key).lower(), price)
|
|
return prices
|
|
|
|
|
|
def _as_float(value: Any) -> float | None:
|
|
if value is None:
|
|
return None
|
|
try:
|
|
return float(value)
|
|
except (TypeError, ValueError):
|
|
return None
|
|
|
|
|
|
def _coerce_items(data: Any) -> list[dict]:
|
|
if isinstance(data, list):
|
|
return [x for x in data if isinstance(x, dict)]
|
|
if isinstance(data, dict):
|
|
for key in ("data", "models", "pricing", "items"):
|
|
val = data.get(key)
|
|
if isinstance(val, list):
|
|
return [x for x in val if isinstance(x, dict)]
|
|
return []
|
|
|
|
|
|
def _extract_rate_from_sources(
|
|
sources: list[dict],
|
|
per_token_keys: list[str],
|
|
per_million_keys: list[str],
|
|
) -> float | None:
|
|
"""Return a per-token rate in USD, or None if unknown.
|
|
|
|
Some DO catalog responses put per-token values under a field whose name
|
|
says ``_per_million`` (e.g. ``input_price_per_million: 5E-8`` — that's
|
|
$5e-8 per token, not per million). Heuristic: values < 1 are already
|
|
per-token (real per-million rates are ~0.1 to ~100); values >= 1 are
|
|
treated as per-million and divided by 1,000,000.
|
|
"""
|
|
for src in sources:
|
|
for key in per_token_keys:
|
|
if key in src and src[key] is not None:
|
|
try:
|
|
return float(src[key])
|
|
except (TypeError, ValueError):
|
|
continue
|
|
for key in per_million_keys:
|
|
if key in src and src[key] is not None:
|
|
try:
|
|
v = float(src[key])
|
|
except (TypeError, ValueError):
|
|
continue
|
|
if v >= 1:
|
|
return v / 1_000_000
|
|
return v
|
|
return None
|