mirror of
https://github.com/katanemo/plano.git
synced 2026-06-26 15:39:40 +02:00
feat: make model pricing source configurable (models.dev + DigitalOcean) (#971)
This commit is contained in:
parent
5cc4c4ee77
commit
558df0307c
9 changed files with 687 additions and 48 deletions
|
|
@ -1,7 +1,8 @@
|
|||
"""DigitalOcean Gradient pricing catalog for the obs console.
|
||||
"""Model pricing catalog for the obs console.
|
||||
|
||||
Ported loosely from ``crates/brightstaff/src/router/model_metrics.rs::fetch_do_pricing``.
|
||||
Single-source: one fetch at startup, cached for the life of the process.
|
||||
Mirrors ``crates/brightstaff/src/router/model_metrics.rs``. The source is
|
||||
configurable: ``digitalocean`` (DO GenAI catalog) or ``models.dev``. A single
|
||||
fetch at startup is cached for the life of the process.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
|
@ -14,7 +15,18 @@ from typing import Any
|
|||
|
||||
import requests
|
||||
|
||||
DEFAULT_PRICING_URL = "https://api.digitalocean.com/v2/gen-ai/models/catalog"
|
||||
DO_PRICING_URL = "https://api.digitalocean.com/v2/gen-ai/models/catalog"
|
||||
MODELS_DEV_URL = "https://models.dev/api.json"
|
||||
|
||||
# Backwards-compatible default (DigitalOcean) used when no provider is given.
|
||||
DEFAULT_PRICING_URL = DO_PRICING_URL
|
||||
DEFAULT_PRICING_PROVIDER = "digitalocean"
|
||||
|
||||
_DEFAULT_URLS = {
|
||||
"digitalocean": DO_PRICING_URL,
|
||||
"models.dev": MODELS_DEV_URL,
|
||||
}
|
||||
|
||||
FETCH_TIMEOUT_SECS = 5.0
|
||||
|
||||
|
||||
|
|
@ -51,36 +63,52 @@ class PricingCatalog:
|
|||
return list(self._prices.keys())[:n]
|
||||
|
||||
@classmethod
|
||||
def fetch(cls, url: str = DEFAULT_PRICING_URL) -> "PricingCatalog":
|
||||
"""Fetch pricing from DO's catalog endpoint. On failure, returns an
|
||||
def fetch(
|
||||
cls,
|
||||
provider: str = DEFAULT_PRICING_PROVIDER,
|
||||
url: str | None = None,
|
||||
) -> "PricingCatalog":
|
||||
"""Fetch pricing from the configured catalog. On failure, returns an
|
||||
empty catalog (cost column will be blank).
|
||||
|
||||
The catalog endpoint is public — no auth required, no signup — so
|
||||
``planoai obs`` gets cost data on first run out of the box.
|
||||
``provider`` selects the parser/default URL: ``digitalocean`` or
|
||||
``models.dev``. Both catalog endpoints are public — no auth required —
|
||||
so ``planoai obs`` gets cost data on first run out of the box.
|
||||
"""
|
||||
provider = (provider or DEFAULT_PRICING_PROVIDER).strip().lower()
|
||||
resolved_url = url or _DEFAULT_URLS.get(provider, DO_PRICING_URL)
|
||||
try:
|
||||
resp = requests.get(url, timeout=FETCH_TIMEOUT_SECS)
|
||||
resp = requests.get(resolved_url, timeout=FETCH_TIMEOUT_SECS)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
except Exception as exc: # noqa: BLE001 — best-effort; never fatal
|
||||
logger.warning(
|
||||
"DO pricing fetch failed: %s; cost column will be blank.",
|
||||
"%s pricing fetch failed: %s; cost column will be blank.",
|
||||
provider,
|
||||
exc,
|
||||
)
|
||||
return cls()
|
||||
|
||||
prices = _parse_do_pricing(data)
|
||||
if provider == "models.dev":
|
||||
prices = _parse_models_dev_pricing(data)
|
||||
else:
|
||||
prices = _parse_do_pricing(data)
|
||||
|
||||
if not prices:
|
||||
# Dump the first entry's raw shape so we can see which fields DO
|
||||
# actually returned — helps when the catalog adds new fields or
|
||||
# the response doesn't match our parser.
|
||||
# Dump a sample of the raw shape so we can see which fields the
|
||||
# catalog returned — helps when it adds new fields or the response
|
||||
# doesn't match our parser.
|
||||
import json as _json
|
||||
|
||||
sample_items = _coerce_items(data)
|
||||
sample = sample_items[0] if sample_items else data
|
||||
if provider == "models.dev" and isinstance(data, dict):
|
||||
sample = next(iter(data.values()), data)
|
||||
else:
|
||||
sample_items = _coerce_items(data)
|
||||
sample = sample_items[0] if sample_items else data
|
||||
logger.warning(
|
||||
"DO pricing response had no parseable entries; cost column "
|
||||
"%s pricing response had no parseable entries; cost column "
|
||||
"will be blank. Sample entry: %s",
|
||||
provider,
|
||||
_json.dumps(sample, default=str)[:400],
|
||||
)
|
||||
return cls(prices)
|
||||
|
|
@ -278,6 +306,75 @@ def _parse_do_pricing(data: Any) -> dict[str, ModelPrice]:
|
|||
return prices
|
||||
|
||||
|
||||
def _parse_models_dev_pricing(data: Any) -> dict[str, ModelPrice]:
|
||||
"""Parse a models.dev ``api.json`` response into a ModelPrice map.
|
||||
|
||||
models.dev shape (top-level object keyed by provider id)::
|
||||
|
||||
{
|
||||
"anthropic": {
|
||||
"models": {
|
||||
"claude-opus-4-5": {
|
||||
"cost": {"input": 5, "output": 25, "cache_read": 0.5}
|
||||
}
|
||||
}
|
||||
},
|
||||
...
|
||||
}
|
||||
|
||||
``cost.*`` values are USD per *million* tokens, so we divide by 1e6 to get a
|
||||
per-token rate. First-party providers use bare model keys, so we register
|
||||
both ``provider/model`` (matching Plano's routing names) and the bare model
|
||||
id as a fallback.
|
||||
"""
|
||||
prices: dict[str, ModelPrice] = {}
|
||||
if not isinstance(data, dict):
|
||||
return prices
|
||||
|
||||
for provider_id, provider in data.items():
|
||||
if not isinstance(provider, dict):
|
||||
continue
|
||||
models = provider.get("models")
|
||||
if not isinstance(models, dict):
|
||||
continue
|
||||
for model_key, model in models.items():
|
||||
if not isinstance(model, dict):
|
||||
continue
|
||||
cost = model.get("cost")
|
||||
if not isinstance(cost, dict):
|
||||
continue
|
||||
input_pm = _as_float(cost.get("input"))
|
||||
output_pm = _as_float(cost.get("output"))
|
||||
if input_pm is None or output_pm is None:
|
||||
continue
|
||||
# Skip 0-rate entries so cost falls back to `—` rather than $0.0000.
|
||||
if input_pm == 0 and output_pm == 0:
|
||||
continue
|
||||
cached_pm = _as_float(cost.get("cache_read"))
|
||||
price = ModelPrice(
|
||||
input_per_token_usd=input_pm / 1_000_000,
|
||||
output_per_token_usd=output_pm / 1_000_000,
|
||||
cached_input_per_token_usd=(
|
||||
cached_pm / 1_000_000 if cached_pm is not None else None
|
||||
),
|
||||
)
|
||||
composite = f"{provider_id}/{model_key}"
|
||||
prices[composite] = price
|
||||
prices.setdefault(composite.lower(), price)
|
||||
prices.setdefault(str(model_key), price)
|
||||
prices.setdefault(str(model_key).lower(), price)
|
||||
return prices
|
||||
|
||||
|
||||
def _as_float(value: Any) -> float | None:
|
||||
if value is None:
|
||||
return None
|
||||
try:
|
||||
return float(value)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _coerce_items(data: Any) -> list[dict]:
|
||||
if isinstance(data, list):
|
||||
return [x for x in data if isinstance(x, dict)]
|
||||
|
|
|
|||
|
|
@ -2,9 +2,12 @@
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
|
||||
import rich_click as click
|
||||
import yaml
|
||||
from rich.console import Console
|
||||
from rich.live import Live
|
||||
|
||||
|
|
@ -15,8 +18,50 @@ from planoai.obs.collector import (
|
|||
LLMCallStore,
|
||||
ObsCollector,
|
||||
)
|
||||
from planoai.obs.pricing import PricingCatalog
|
||||
from planoai.obs.pricing import DEFAULT_PRICING_PROVIDER, PricingCatalog
|
||||
from planoai.obs.render import render
|
||||
from planoai.utils import find_config_file
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _resolve_pricing_source(
|
||||
config_file: str | None,
|
||||
provider_override: str | None,
|
||||
url_override: str | None,
|
||||
) -> tuple[str, str | None]:
|
||||
"""Pick the cost pricing source.
|
||||
|
||||
Precedence: explicit CLI overrides > the first ``type: cost`` entry in
|
||||
``model_metrics_sources`` from the Plano config > the DigitalOcean default.
|
||||
"""
|
||||
provider = DEFAULT_PRICING_PROVIDER
|
||||
url: str | None = None
|
||||
|
||||
config_path = find_config_file(file=config_file)
|
||||
if config_path and os.path.exists(config_path):
|
||||
try:
|
||||
with open(config_path, "r") as f:
|
||||
config = yaml.safe_load(f) or {}
|
||||
sources = config.get("model_metrics_sources") or []
|
||||
for source in sources:
|
||||
if isinstance(source, dict) and source.get("type") == "cost":
|
||||
if source.get("provider"):
|
||||
provider = str(source["provider"])
|
||||
if source.get("url"):
|
||||
url = str(source["url"])
|
||||
break
|
||||
except Exception as exc: # noqa: BLE001 — config is optional for obs
|
||||
logger.warning(
|
||||
"could not read pricing source from %s: %s", config_path, exc
|
||||
)
|
||||
|
||||
if provider_override:
|
||||
provider = provider_override
|
||||
if url_override:
|
||||
url = url_override
|
||||
|
||||
return provider, url
|
||||
|
||||
|
||||
@click.command(name="obs", help="Live observability console for Plano LLM traffic.")
|
||||
|
|
@ -48,13 +93,42 @@ from planoai.obs.render import render
|
|||
show_default=True,
|
||||
help="TUI refresh interval.",
|
||||
)
|
||||
def obs(port: int, host: str, capacity: int, refresh_ms: int) -> None:
|
||||
@click.option(
|
||||
"--config",
|
||||
"config_file",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Path to the Plano config to read the pricing source from "
|
||||
"(defaults to ./config.yaml or ./plano_config.yaml).",
|
||||
)
|
||||
@click.option(
|
||||
"--pricing-provider",
|
||||
type=click.Choice(["digitalocean", "models.dev"]),
|
||||
default=None,
|
||||
help="Override the cost pricing provider (otherwise read from config).",
|
||||
)
|
||||
@click.option(
|
||||
"--pricing-url",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Override the pricing catalog URL (otherwise read from config / provider default).",
|
||||
)
|
||||
def obs(
|
||||
port: int,
|
||||
host: str,
|
||||
capacity: int,
|
||||
refresh_ms: int,
|
||||
config_file: str | None,
|
||||
pricing_provider: str | None,
|
||||
pricing_url: str | None,
|
||||
) -> None:
|
||||
console = Console()
|
||||
provider, url = _resolve_pricing_source(config_file, pricing_provider, pricing_url)
|
||||
console.print(
|
||||
f"[bold {PLANO_COLOR}]planoai obs[/] — loading DO pricing catalog...",
|
||||
f"[bold {PLANO_COLOR}]planoai obs[/] — loading {provider} pricing catalog...",
|
||||
end="",
|
||||
)
|
||||
pricing = PricingCatalog.fetch()
|
||||
pricing = PricingCatalog.fetch(provider=provider, url=url)
|
||||
if len(pricing):
|
||||
sample = ", ".join(pricing.sample_models(3))
|
||||
console.print(
|
||||
|
|
@ -63,7 +137,7 @@ def obs(port: int, host: str, capacity: int, refresh_ms: int) -> None:
|
|||
else:
|
||||
console.print(
|
||||
" [yellow]no pricing loaded[/] — "
|
||||
"[dim]cost column will be blank (DO catalog unreachable)[/]"
|
||||
f"[dim]cost column will be blank ({provider} catalog unreachable)[/]"
|
||||
)
|
||||
|
||||
store = LLMCallStore(capacity=capacity)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue