Initial release: iai-mcp v0.1.0
Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: XNLLLLH <XNLLLLH@users.noreply.github.com>
This commit is contained in:
commit
f6b876fbe7
332 changed files with 97258 additions and 0 deletions
155
src/iai_mcp/batch.py
Normal file
155
src/iai_mcp/batch.py
Normal file
|
|
@ -0,0 +1,155 @@
|
|||
"""TOK-09 Batch API consolidation (Plan 02-04 Task 3, D-29).
|
||||
|
||||
D-29 (unified daily process): when Tier 1 is enabled + credentials + budget
|
||||
+ rate-limit all green (D-GUARD ladder via should_call_llm), submit a batch
|
||||
to Anthropic's Batch API at 50% discount vs synchronous calls. Falls back
|
||||
to Tier 0 stub results on any gate failure or SDK absence.
|
||||
|
||||
Plan 02-04 scope: the D-GUARD gate + budget side-effect + llm_health event
|
||||
emission are load-bearing. The actual anthropic.batches.create call is
|
||||
scaffolded behind a lazy import; when the SDK surface differs from what the
|
||||
Python core expects (e.g. version skew), the stub returns an empty result
|
||||
list and records llm_health fallback. Plan 03 / future phases own the real
|
||||
wire-up once the SDK API settles.
|
||||
|
||||
Pricing model:
|
||||
- Haiku 4.5 approx sync cost: prompt $0.25 / 1M tokens + output $1.25 / 1M
|
||||
- Batch discount: 50% off sync cost.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
from iai_mcp.events import write_event
|
||||
from iai_mcp.guard import BudgetLedger, RateLimitLedger, should_call_llm
|
||||
|
||||
|
||||
# 50% discount vs sync tier.
|
||||
BATCH_DISCOUNT = 0.5
|
||||
|
||||
# scope: we do not poll in-process. Real-world Batch API can take
|
||||
# up to ~24h. The dispatch path is "submit -> return (True, 'ok', stub)" with
|
||||
# the actual results arriving via a future polling job. Tests assert the
|
||||
# gate + side-effects; the stub list is empty in Phase 2.
|
||||
BATCH_POLL_TIMEOUT_SEC = 60
|
||||
|
||||
# Haiku 4.5 approximate sync pricing (USD per 1M tokens).
|
||||
_HAIKU_PROMPT_USD_PER_MTOK = 0.25
|
||||
_HAIKU_OUTPUT_USD_PER_MTOK = 1.25
|
||||
|
||||
|
||||
def _sync_tier_cost(prompt_tokens: int, output_tokens: int) -> float:
|
||||
"""Approximate sync-tier USD cost for Haiku 4.5.
|
||||
|
||||
uses Haiku 4.5 for consolidation. Pricing is approximate and may
|
||||
drift; the gate uses this only for budget-cap decisions (D-GUARD step
|
||||
3+4), never for billing reconciliation.
|
||||
"""
|
||||
p = (float(prompt_tokens) / 1_000_000.0) * _HAIKU_PROMPT_USD_PER_MTOK
|
||||
o = (float(output_tokens) / 1_000_000.0) * _HAIKU_OUTPUT_USD_PER_MTOK
|
||||
return float(p + o)
|
||||
|
||||
|
||||
def _aggregate_estimated_usd(tasks: list[dict]) -> float:
|
||||
total_sync = 0.0
|
||||
for t in tasks:
|
||||
total_sync += _sync_tier_cost(
|
||||
int(t.get("prompt_tok", 0)),
|
||||
int(t.get("output_tok", 0)),
|
||||
)
|
||||
return total_sync * BATCH_DISCOUNT
|
||||
|
||||
|
||||
def submit_batch_consolidation(
|
||||
store,
|
||||
tasks: list[dict],
|
||||
budget: BudgetLedger,
|
||||
rate: RateLimitLedger,
|
||||
llm_enabled: bool = True,
|
||||
) -> tuple[bool, str, list[dict]]:
|
||||
"""Submit a batch of consolidation tasks to the Anthropic Batch API.
|
||||
|
||||
Returns (ok, reason, results). On any D-GUARD fallback, ok=False and
|
||||
results is an empty list; the caller falls back to local Tier 0 output.
|
||||
|
||||
Gate ordering (D-GUARD):
|
||||
1. llm_enabled toggle
|
||||
2. API key present
|
||||
3. Budget daily + monthly caps (can_spend)
|
||||
4. Rate-limit cooldown (last 429 < 15 min)
|
||||
5. SDK import path
|
||||
6. Real batch submission (Plan 02-04 stub; see module docstring)
|
||||
"""
|
||||
has_key = bool(os.environ.get("ANTHROPIC_API_KEY"))
|
||||
estimated_usd = _aggregate_estimated_usd(tasks)
|
||||
|
||||
ok, reason = should_call_llm(
|
||||
budget=budget,
|
||||
rate=rate,
|
||||
llm_enabled=llm_enabled,
|
||||
has_api_key=has_key,
|
||||
estimated_usd=estimated_usd,
|
||||
)
|
||||
if not ok:
|
||||
write_event(
|
||||
store,
|
||||
kind="llm_health",
|
||||
data={
|
||||
"component": "batch_consolidation",
|
||||
"tier": "fallback",
|
||||
"reason": reason,
|
||||
"task_count": len(tasks),
|
||||
"estimated_usd": estimated_usd,
|
||||
},
|
||||
severity="warning",
|
||||
)
|
||||
return False, reason, []
|
||||
|
||||
# Eligible path: lazy import the SDK. On ImportError or any runtime
|
||||
# failure, log critical and fall back. This is also how the current Plan
|
||||
# 02-04 scaffold returns -- the real batch submission is stubbed (the
|
||||
# SDK surface for batches.create has changed across minor versions).
|
||||
try:
|
||||
import anthropic # noqa: F401
|
||||
except Exception as exc:
|
||||
write_event(
|
||||
store,
|
||||
kind="llm_health",
|
||||
data={
|
||||
"component": "batch_consolidation",
|
||||
"tier": "fallback",
|
||||
"error": f"import anthropic: {exc}",
|
||||
},
|
||||
severity="critical",
|
||||
)
|
||||
return False, f"SDK unavailable: {exc}", []
|
||||
|
||||
# H-02 FIX (Phase 2 gap closure): budget stays untouched and
|
||||
# effective_tier stays tier0 until a REAL successful anthropic.batches.create
|
||||
# response lands. The previous behaviour called budget.record_spend + returned
|
||||
# (True, "ok", []), which caused run_heavy_consolidation to flip
|
||||
# effective_tier=tier1 and debit the BudgetLedger on a stub producing zero
|
||||
# output -- corrupts D-GUARD audit honesty + cost accounting.
|
||||
#
|
||||
# Real SDK wire-up is scope. Until then the scaffold is honestly
|
||||
# documented via an info-severity llm_health event so `iai-mcp audit`
|
||||
# observers can see the gap explicitly.
|
||||
write_event(
|
||||
store,
|
||||
kind="llm_health",
|
||||
data={
|
||||
"component": "batch_consolidation",
|
||||
"tier": "fallback",
|
||||
"task_count": len(tasks),
|
||||
"estimated_usd": estimated_usd,
|
||||
"note": (
|
||||
"Plan 02-06 disables the scaffold-true return; "
|
||||
"real anthropic.batches.create wire-up is Phase 3. Budget "
|
||||
"stays untouched and effective_tier stays tier0 until a "
|
||||
"real successful SDK response lands."
|
||||
),
|
||||
},
|
||||
severity="info",
|
||||
)
|
||||
return False, "stub: batch API not yet wired", []
|
||||
Loading…
Add table
Add a link
Reference in a new issue