plano/cli/planoai/obs/pricing.py

"""Model pricing catalog for the obs console.

Mirrors ``crates/brightstaff/src/router/model_metrics.rs``. The source is
configurable: ``digitalocean`` (DO GenAI catalog) or ``models.dev``. A single
fetch at startup is cached for the life of the process.
"""

from __future__ import annotations

import logging
import re
import threading
from dataclasses import dataclass
from typing import Any

import requests

DO_PRICING_URL = "https://api.digitalocean.com/v2/gen-ai/models/catalog"
MODELS_DEV_URL = "https://models.dev/api.json"

# Backwards-compatible default (DigitalOcean) used when no provider is given.
DEFAULT_PRICING_URL = DO_PRICING_URL
DEFAULT_PRICING_PROVIDER = "digitalocean"

_DEFAULT_URLS = {
    "digitalocean": DO_PRICING_URL,
    "models.dev": MODELS_DEV_URL,
}

FETCH_TIMEOUT_SECS = 5.0


logger = logging.getLogger(__name__)


@dataclass(frozen=True)
class ModelPrice:
    """Input/output $/token rates. Token counts are multiplied by these."""

    input_per_token_usd: float
    output_per_token_usd: float
    cached_input_per_token_usd: float | None = None


class PricingCatalog:
    """In-memory pricing lookup keyed by model id.

    DO's catalog uses ids like ``openai-gpt-5.4``; Plano's resolved model names
    may arrive as ``do/openai-gpt-5.4`` or bare ``openai-gpt-5.4``. We strip the
    leading provider prefix when looking up.
    """

    def __init__(self, prices: dict[str, ModelPrice] | None = None) -> None:
        self._prices: dict[str, ModelPrice] = prices or {}
        self._lock = threading.Lock()

    def __len__(self) -> int:
        with self._lock:
            return len(self._prices)

    def sample_models(self, n: int = 5) -> list[str]:
        with self._lock:
            return list(self._prices.keys())[:n]

    @classmethod
    def fetch(
        cls,
        provider: str = DEFAULT_PRICING_PROVIDER,
        url: str | None = None,
    ) -> "PricingCatalog":
        """Fetch pricing from the configured catalog. On failure, returns an
        empty catalog (cost column will be blank).

        ``provider`` selects the parser/default URL: ``digitalocean`` or
        ``models.dev``. Both catalog endpoints are public — no auth required —
        so ``planoai obs`` gets cost data on first run out of the box.
        """
        provider = (provider or DEFAULT_PRICING_PROVIDER).strip().lower()
        resolved_url = url or _DEFAULT_URLS.get(provider, DO_PRICING_URL)
        try:
            resp = requests.get(resolved_url, timeout=FETCH_TIMEOUT_SECS)
            resp.raise_for_status()
            data = resp.json()
        except Exception as exc:  # noqa: BLE001 — best-effort; never fatal
            logger.warning(
                "%s pricing fetch failed: %s; cost column will be blank.",
                provider,
                exc,
            )
            return cls()

        if provider == "models.dev":
            prices = _parse_models_dev_pricing(data)
        else:
            prices = _parse_do_pricing(data)

        if not prices:
            # Dump a sample of the raw shape so we can see which fields the
            # catalog returned — helps when it adds new fields or the response
            # doesn't match our parser.
            import json as _json

            if provider == "models.dev" and isinstance(data, dict):
                sample = next(iter(data.values()), data)
            else:
                sample_items = _coerce_items(data)
                sample = sample_items[0] if sample_items else data
            logger.warning(
                "%s pricing response had no parseable entries; cost column "
                "will be blank. Sample entry: %s",
                provider,
                _json.dumps(sample, default=str)[:400],
            )
        return cls(prices)

    def price_for(self, model_name: str | None) -> ModelPrice | None:
        if not model_name:
            return None
        with self._lock:
            # Try the full name first, then stripped prefix, then lowercased variants.
            for candidate in _model_key_candidates(model_name):
                hit = self._prices.get(candidate)
                if hit is not None:
                    return hit
        return None

    def cost_for_call(self, call: Any) -> float | None:
        """Compute USD cost for an LLMCall. Returns None when pricing is unknown."""
        price = self.price_for(getattr(call, "model", None)) or self.price_for(
            getattr(call, "request_model", None)
        )
        if price is None:
            return None
        prompt = int(getattr(call, "prompt_tokens", 0) or 0)
        completion = int(getattr(call, "completion_tokens", 0) or 0)
        cached = int(getattr(call, "cached_input_tokens", 0) or 0)

        # Cached input tokens are priced separately at the cached rate when known;
        # otherwise they're already counted in prompt tokens at the regular rate.
        fresh_prompt = prompt
        if price.cached_input_per_token_usd is not None and cached:
            fresh_prompt = max(0, prompt - cached)
            cost_cached = cached * price.cached_input_per_token_usd
        else:
            cost_cached = 0.0

        cost = (
            fresh_prompt * price.input_per_token_usd
            + completion * price.output_per_token_usd
            + cost_cached
        )
        return round(cost, 6)


_DATE_SUFFIX_RE = re.compile(r"-\d{8}$")
_PROVIDER_PREFIXES = ("anthropic", "openai", "google", "meta", "cohere", "mistral")
_ANTHROPIC_FAMILIES = {"opus", "sonnet", "haiku"}


def _model_key_candidates(model_name: str) -> list[str]:
    """Lookup-side variants of a Plano-emitted model name.

    Plano resolves names like ``claude-haiku-4-5-20251001``; the catalog stores
    them as ``anthropic-claude-haiku-4.5``. We strip the date suffix and the
    ``provider/`` prefix here; the catalog itself registers the dash/dot and
    family-order aliases at parse time (see :func:`_expand_aliases`).
    """
    base = model_name.strip()
    out = [base]
    if "/" in base:
        out.append(base.split("/", 1)[1])
    for k in list(out):
        stripped = _DATE_SUFFIX_RE.sub("", k)
        if stripped != k:
            out.append(stripped)
    out.extend([v.lower() for v in list(out)])
    seen: set[str] = set()
    uniq = []
    for key in out:
        if key not in seen:
            seen.add(key)
            uniq.append(key)
    return uniq


def _expand_aliases(model_id: str) -> set[str]:
    """Catalog-side variants of a DO model id.

    DO publishes Anthropic models under ids like ``anthropic-claude-opus-4.7``
    or ``anthropic-claude-4.6-sonnet`` while Plano emits ``claude-opus-4-7`` /
    ``claude-sonnet-4-6``. Generate a set covering provider-prefix stripping,
    dash↔dot in version segments, and family↔version word order so a single
    catalog entry matches every name shape we'll see at lookup.
    """
    aliases: set[str] = set()

    def add(name: str) -> None:
        if not name:
            return
        aliases.add(name)
        aliases.add(name.lower())

    add(model_id)

    base = model_id
    head, _, rest = base.partition("-")
    if head.lower() in _PROVIDER_PREFIXES and rest:
        add(rest)
        base = rest

    for key in list(aliases):
        if "." in key:
            add(key.replace(".", "-"))

    parts = base.split("-")
    if len(parts) >= 3 and parts[0].lower() == "claude":
        rest_parts = parts[1:]
        for i, p in enumerate(rest_parts):
            if p.lower() in _ANTHROPIC_FAMILIES:
                others = rest_parts[:i] + rest_parts[i + 1 :]
                if not others:
                    break
                family_last = "claude-" + "-".join(others) + "-" + p
                family_first = "claude-" + p + "-" + "-".join(others)
                add(family_last)
                add(family_first)
                add(family_last.replace(".", "-"))
                add(family_first.replace(".", "-"))
                break

    return aliases


def _parse_do_pricing(data: Any) -> dict[str, ModelPrice]:
    """Parse DO catalog response into a ModelPrice map keyed by model id.

    DO's shape (as of 2026-04):
        {
          "data": [
            {"model_id": "openai-gpt-5.4",
             "pricing": {"input_price_per_million": 5.0,
                         "output_price_per_million": 15.0}},
            ...
          ]
        }

    Older/alternate shapes are also accepted (flat top-level fields, or the
    ``id``/``model``/``name`` key).
    """
    prices: dict[str, ModelPrice] = {}
    items = _coerce_items(data)
    for item in items:
        model_id = (
            item.get("model_id")
            or item.get("id")
            or item.get("model")
            or item.get("name")
        )
        if not model_id:
            continue

        # DO nests rates under `pricing`; try that first, then fall back to
        # top-level fields for alternate response shapes.
        sources = [item]
        if isinstance(item.get("pricing"), dict):
            sources.insert(0, item["pricing"])

        input_rate = _extract_rate_from_sources(
            sources,
            ["input_per_token", "input_token_price", "price_input"],
            ["input_price_per_million", "input_per_million", "input_per_mtok"],
        )
        output_rate = _extract_rate_from_sources(
            sources,
            ["output_per_token", "output_token_price", "price_output"],
            ["output_price_per_million", "output_per_million", "output_per_mtok"],
        )
        cached_rate = _extract_rate_from_sources(
            sources,
            [
                "cached_input_per_token",
                "cached_input_token_price",
                "prompt_cache_read_per_token",
            ],
            [
                "cached_input_price_per_million",
                "cached_input_per_million",
                "cached_input_per_mtok",
            ],
        )

        if input_rate is None or output_rate is None:
            continue
        # Treat 0-rate entries as "unknown" so cost falls back to `—` rather
        # than showing a misleading $0.0000. DO's catalog sometimes omits
        # rates for promo/open-weight models.
        if input_rate == 0 and output_rate == 0:
            continue
        price = ModelPrice(
            input_per_token_usd=input_rate,
            output_per_token_usd=output_rate,
            cached_input_per_token_usd=cached_rate,
        )
        for alias in _expand_aliases(str(model_id)):
            prices.setdefault(alias, price)
    return prices


def _parse_models_dev_pricing(data: Any) -> dict[str, ModelPrice]:
    """Parse a models.dev ``api.json`` response into a ModelPrice map.

    models.dev shape (top-level object keyed by provider id)::

        {
          "anthropic": {
            "models": {
              "claude-opus-4-5": {
                "cost": {"input": 5, "output": 25, "cache_read": 0.5}
              }
            }
          },
          ...
        }

    ``cost.*`` values are USD per *million* tokens, so we divide by 1e6 to get a
    per-token rate. First-party providers use bare model keys, so we register
    both ``provider/model`` (matching Plano's routing names) and the bare model
    id as a fallback.
    """
    prices: dict[str, ModelPrice] = {}
    if not isinstance(data, dict):
        return prices

    for provider_id, provider in data.items():
        if not isinstance(provider, dict):
            continue
        models = provider.get("models")
        if not isinstance(models, dict):
            continue
        for model_key, model in models.items():
            if not isinstance(model, dict):
                continue
            cost = model.get("cost")
            if not isinstance(cost, dict):
                continue
            input_pm = _as_float(cost.get("input"))
            output_pm = _as_float(cost.get("output"))
            if input_pm is None or output_pm is None:
                continue
            # Skip 0-rate entries so cost falls back to `—` rather than $0.0000.
            if input_pm == 0 and output_pm == 0:
                continue
            cached_pm = _as_float(cost.get("cache_read"))
            price = ModelPrice(
                input_per_token_usd=input_pm / 1_000_000,
                output_per_token_usd=output_pm / 1_000_000,
                cached_input_per_token_usd=(
                    cached_pm / 1_000_000 if cached_pm is not None else None
                ),
            )
            composite = f"{provider_id}/{model_key}"
            prices[composite] = price
            prices.setdefault(composite.lower(), price)
            prices.setdefault(str(model_key), price)
            prices.setdefault(str(model_key).lower(), price)
    return prices


def _as_float(value: Any) -> float | None:
    if value is None:
        return None
    try:
        return float(value)
    except (TypeError, ValueError):
        return None


def _coerce_items(data: Any) -> list[dict]:
    if isinstance(data, list):
        return [x for x in data if isinstance(x, dict)]
    if isinstance(data, dict):
        for key in ("data", "models", "pricing", "items"):
            val = data.get(key)
            if isinstance(val, list):
                return [x for x in val if isinstance(x, dict)]
    return []


def _extract_rate_from_sources(
    sources: list[dict],
    per_token_keys: list[str],
    per_million_keys: list[str],
) -> float | None:
    """Return a per-token rate in USD, or None if unknown.

    Some DO catalog responses put per-token values under a field whose name
    says ``_per_million`` (e.g. ``input_price_per_million: 5E-8`` — that's
    $5e-8 per token, not per million). Heuristic: values < 1 are already
    per-token (real per-million rates are ~0.1 to ~100); values >= 1 are
    treated as per-million and divided by 1,000,000.
    """
    for src in sources:
        for key in per_token_keys:
            if key in src and src[key] is not None:
                try:
                    return float(src[key])
                except (TypeError, ValueError):
                    continue
        for key in per_million_keys:
            if key in src and src[key] is not None:
                try:
                    v = float(src[key])
                except (TypeError, ValueError):
                    continue
                if v >= 1:
                    return v / 1_000_000
                return v
    return None