156 lines
5.6 KiB
Python
156 lines
5.6 KiB
Python
|
|
"""TOK-09 Batch API consolidation (Plan 02-04 Task 3, D-29).
|
||
|
|
|
||
|
|
D-29 (unified daily process): when Tier 1 is enabled + credentials + budget
|
||
|
|
+ rate-limit all green (D-GUARD ladder via should_call_llm), submit a batch
|
||
|
|
to Anthropic's Batch API at 50% discount vs synchronous calls. Falls back
|
||
|
|
to Tier 0 stub results on any gate failure or SDK absence.
|
||
|
|
|
||
|
|
Plan 02-04 scope: the D-GUARD gate + budget side-effect + llm_health event
|
||
|
|
emission are load-bearing. The actual anthropic.batches.create call is
|
||
|
|
scaffolded behind a lazy import; when the SDK surface differs from what the
|
||
|
|
Python core expects (e.g. version skew), the stub returns an empty result
|
||
|
|
list and records llm_health fallback. Plan 03 / future phases own the real
|
||
|
|
wire-up once the SDK API settles.
|
||
|
|
|
||
|
|
Pricing model:
|
||
|
|
- Haiku 4.5 approx sync cost: prompt $0.25 / 1M tokens + output $1.25 / 1M
|
||
|
|
- Batch discount: 50% off sync cost.
|
||
|
|
"""
|
||
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
import os
|
||
|
|
from typing import Any
|
||
|
|
|
||
|
|
from iai_mcp.events import write_event
|
||
|
|
from iai_mcp.guard import BudgetLedger, RateLimitLedger, should_call_llm
|
||
|
|
|
||
|
|
|
||
|
|
# 50% discount vs sync tier.
|
||
|
|
BATCH_DISCOUNT = 0.5
|
||
|
|
|
||
|
|
# scope: we do not poll in-process. Real-world Batch API can take
|
||
|
|
# up to ~24h. The dispatch path is "submit -> return (True, 'ok', stub)" with
|
||
|
|
# the actual results arriving via a future polling job. Tests assert the
|
||
|
|
# gate + side-effects; the stub list is empty in Phase 2.
|
||
|
|
BATCH_POLL_TIMEOUT_SEC = 60
|
||
|
|
|
||
|
|
# Haiku 4.5 approximate sync pricing (USD per 1M tokens).
|
||
|
|
_HAIKU_PROMPT_USD_PER_MTOK = 0.25
|
||
|
|
_HAIKU_OUTPUT_USD_PER_MTOK = 1.25
|
||
|
|
|
||
|
|
|
||
|
|
def _sync_tier_cost(prompt_tokens: int, output_tokens: int) -> float:
|
||
|
|
"""Approximate sync-tier USD cost for Haiku 4.5.
|
||
|
|
|
||
|
|
uses Haiku 4.5 for consolidation. Pricing is approximate and may
|
||
|
|
drift; the gate uses this only for budget-cap decisions (D-GUARD step
|
||
|
|
3+4), never for billing reconciliation.
|
||
|
|
"""
|
||
|
|
p = (float(prompt_tokens) / 1_000_000.0) * _HAIKU_PROMPT_USD_PER_MTOK
|
||
|
|
o = (float(output_tokens) / 1_000_000.0) * _HAIKU_OUTPUT_USD_PER_MTOK
|
||
|
|
return float(p + o)
|
||
|
|
|
||
|
|
|
||
|
|
def _aggregate_estimated_usd(tasks: list[dict]) -> float:
|
||
|
|
total_sync = 0.0
|
||
|
|
for t in tasks:
|
||
|
|
total_sync += _sync_tier_cost(
|
||
|
|
int(t.get("prompt_tok", 0)),
|
||
|
|
int(t.get("output_tok", 0)),
|
||
|
|
)
|
||
|
|
return total_sync * BATCH_DISCOUNT
|
||
|
|
|
||
|
|
|
||
|
|
def submit_batch_consolidation(
|
||
|
|
store,
|
||
|
|
tasks: list[dict],
|
||
|
|
budget: BudgetLedger,
|
||
|
|
rate: RateLimitLedger,
|
||
|
|
llm_enabled: bool = True,
|
||
|
|
) -> tuple[bool, str, list[dict]]:
|
||
|
|
"""Submit a batch of consolidation tasks to the Anthropic Batch API.
|
||
|
|
|
||
|
|
Returns (ok, reason, results). On any D-GUARD fallback, ok=False and
|
||
|
|
results is an empty list; the caller falls back to local Tier 0 output.
|
||
|
|
|
||
|
|
Gate ordering (D-GUARD):
|
||
|
|
1. llm_enabled toggle
|
||
|
|
2. API key present
|
||
|
|
3. Budget daily + monthly caps (can_spend)
|
||
|
|
4. Rate-limit cooldown (last 429 < 15 min)
|
||
|
|
5. SDK import path
|
||
|
|
6. Real batch submission (Plan 02-04 stub; see module docstring)
|
||
|
|
"""
|
||
|
|
has_key = bool(os.environ.get("ANTHROPIC_API_KEY"))
|
||
|
|
estimated_usd = _aggregate_estimated_usd(tasks)
|
||
|
|
|
||
|
|
ok, reason = should_call_llm(
|
||
|
|
budget=budget,
|
||
|
|
rate=rate,
|
||
|
|
llm_enabled=llm_enabled,
|
||
|
|
has_api_key=has_key,
|
||
|
|
estimated_usd=estimated_usd,
|
||
|
|
)
|
||
|
|
if not ok:
|
||
|
|
write_event(
|
||
|
|
store,
|
||
|
|
kind="llm_health",
|
||
|
|
data={
|
||
|
|
"component": "batch_consolidation",
|
||
|
|
"tier": "fallback",
|
||
|
|
"reason": reason,
|
||
|
|
"task_count": len(tasks),
|
||
|
|
"estimated_usd": estimated_usd,
|
||
|
|
},
|
||
|
|
severity="warning",
|
||
|
|
)
|
||
|
|
return False, reason, []
|
||
|
|
|
||
|
|
# Eligible path: lazy import the SDK. On ImportError or any runtime
|
||
|
|
# failure, log critical and fall back. This is also how the current Plan
|
||
|
|
# 02-04 scaffold returns -- the real batch submission is stubbed (the
|
||
|
|
# SDK surface for batches.create has changed across minor versions).
|
||
|
|
try:
|
||
|
|
import anthropic # noqa: F401
|
||
|
|
except Exception as exc:
|
||
|
|
write_event(
|
||
|
|
store,
|
||
|
|
kind="llm_health",
|
||
|
|
data={
|
||
|
|
"component": "batch_consolidation",
|
||
|
|
"tier": "fallback",
|
||
|
|
"error": f"import anthropic: {exc}",
|
||
|
|
},
|
||
|
|
severity="critical",
|
||
|
|
)
|
||
|
|
return False, f"SDK unavailable: {exc}", []
|
||
|
|
|
||
|
|
# H-02 FIX (Phase 2 gap closure): budget stays untouched and
|
||
|
|
# effective_tier stays tier0 until a REAL successful anthropic.batches.create
|
||
|
|
# response lands. The previous behaviour called budget.record_spend + returned
|
||
|
|
# (True, "ok", []), which caused run_heavy_consolidation to flip
|
||
|
|
# effective_tier=tier1 and debit the BudgetLedger on a stub producing zero
|
||
|
|
# output -- corrupts D-GUARD audit honesty + cost accounting.
|
||
|
|
#
|
||
|
|
# Real SDK wire-up is scope. Until then the scaffold is honestly
|
||
|
|
# documented via an info-severity llm_health event so `iai-mcp audit`
|
||
|
|
# observers can see the gap explicitly.
|
||
|
|
write_event(
|
||
|
|
store,
|
||
|
|
kind="llm_health",
|
||
|
|
data={
|
||
|
|
"component": "batch_consolidation",
|
||
|
|
"tier": "fallback",
|
||
|
|
"task_count": len(tasks),
|
||
|
|
"estimated_usd": estimated_usd,
|
||
|
|
"note": (
|
||
|
|
"Plan 02-06 disables the scaffold-true return; "
|
||
|
|
"real anthropic.batches.create wire-up is Phase 3. Budget "
|
||
|
|
"stays untouched and effective_tier stays tier0 until a "
|
||
|
|
"real successful SDK response lands."
|
||
|
|
),
|
||
|
|
},
|
||
|
|
severity="info",
|
||
|
|
)
|
||
|
|
return False, "stub: batch API not yet wired", []
|