"""TOK-09 Batch API consolidation (Plan 02-04 Task 3, D-29).

D-29 (unified daily process): when Tier 1 is enabled + credentials + budget
+ rate-limit all green (D-GUARD ladder via should_call_llm), submit a batch
to Anthropic's Batch API at 50% discount vs synchronous calls. Falls back
to Tier 0 stub results on any gate failure or SDK absence.

Plan 02-04 scope: the D-GUARD gate + budget side-effect + llm_health event
emission are load-bearing. The actual anthropic.batches.create call is
scaffolded behind a lazy import; when the SDK surface differs from what the
Python core expects (e.g. version skew), the stub returns an empty result
list and records llm_health fallback. Plan 03 / future phases own the real
wire-up once the SDK API settles.

Pricing model:
- Haiku 4.5 approx sync cost: prompt $0.25 / 1M tokens + output $1.25 / 1M
- Batch discount: 50% off sync cost.
"""
from __future__ import annotations

import os
from typing import Any

from iai_mcp.events import write_event
from iai_mcp.guard import BudgetLedger, RateLimitLedger, should_call_llm


# 50% discount vs sync tier.
BATCH_DISCOUNT = 0.5

# scope: we do not poll in-process. Real-world Batch API can take
# up to ~24h. The dispatch path is "submit -> return (True, 'ok', stub)" with
# the actual results arriving via a future polling job. Tests assert the
# gate + side-effects; the stub list is empty in Phase 2.
BATCH_POLL_TIMEOUT_SEC = 60

# Haiku 4.5 approximate sync pricing (USD per 1M tokens).
_HAIKU_PROMPT_USD_PER_MTOK = 0.25
_HAIKU_OUTPUT_USD_PER_MTOK = 1.25


def _sync_tier_cost(prompt_tokens: int, output_tokens: int) -> float:
    """Approximate sync-tier USD cost for Haiku 4.5.

    uses Haiku 4.5 for consolidation. Pricing is approximate and may
    drift; the gate uses this only for budget-cap decisions (D-GUARD step
    3+4), never for billing reconciliation.
    """
    p = (float(prompt_tokens) / 1_000_000.0) * _HAIKU_PROMPT_USD_PER_MTOK
    o = (float(output_tokens) / 1_000_000.0) * _HAIKU_OUTPUT_USD_PER_MTOK
    return float(p + o)


def _aggregate_estimated_usd(tasks: list[dict]) -> float:
    total_sync = 0.0
    for t in tasks:
        total_sync += _sync_tier_cost(
            int(t.get("prompt_tok", 0)),
            int(t.get("output_tok", 0)),
        )
    return total_sync * BATCH_DISCOUNT


def submit_batch_consolidation(
    store,
    tasks: list[dict],
    budget: BudgetLedger,
    rate: RateLimitLedger,
    llm_enabled: bool = True,
) -> tuple[bool, str, list[dict]]:
    """Submit a batch of consolidation tasks to the Anthropic Batch API.

    Returns (ok, reason, results). On any D-GUARD fallback, ok=False and
    results is an empty list; the caller falls back to local Tier 0 output.

    Gate ordering (D-GUARD):
      1. llm_enabled toggle
      2. API key present
      3. Budget daily + monthly caps (can_spend)
      4. Rate-limit cooldown (last 429 < 15 min)
      5. SDK import path
      6. Real batch submission (Plan 02-04 stub; see module docstring)
    """
    has_key = bool(os.environ.get("ANTHROPIC_API_KEY"))
    estimated_usd = _aggregate_estimated_usd(tasks)

    ok, reason = should_call_llm(
        budget=budget,
        rate=rate,
        llm_enabled=llm_enabled,
        has_api_key=has_key,
        estimated_usd=estimated_usd,
    )
    if not ok:
        write_event(
            store,
            kind="llm_health",
            data={
                "component": "batch_consolidation",
                "tier": "fallback",
                "reason": reason,
                "task_count": len(tasks),
                "estimated_usd": estimated_usd,
            },
            severity="warning",
        )
        return False, reason, []

    # Eligible path: lazy import the SDK. On ImportError or any runtime
    # failure, log critical and fall back. This is also how the current Plan
    # 02-04 scaffold returns -- the real batch submission is stubbed (the
    # SDK surface for batches.create has changed across minor versions).
    try:
        import anthropic  # noqa: F401
    except Exception as exc:
        write_event(
            store,
            kind="llm_health",
            data={
                "component": "batch_consolidation",
                "tier": "fallback",
                "error": f"import anthropic: {exc}",
            },
            severity="critical",
        )
        return False, f"SDK unavailable: {exc}", []

    # H-02 FIX (Phase 2 gap closure): budget stays untouched and
    # effective_tier stays tier0 until a REAL successful anthropic.batches.create
    # response lands. The previous behaviour called budget.record_spend + returned
    # (True, "ok", []), which caused run_heavy_consolidation to flip
    # effective_tier=tier1 and debit the BudgetLedger on a stub producing zero
    # output -- corrupts D-GUARD audit honesty + cost accounting.
    #
    # Real SDK wire-up is scope. Until then the scaffold is honestly
    # documented via an info-severity llm_health event so `iai-mcp audit`
    # observers can see the gap explicitly.
    write_event(
        store,
        kind="llm_health",
        data={
            "component": "batch_consolidation",
            "tier": "fallback",
            "task_count": len(tasks),
            "estimated_usd": estimated_usd,
            "note": (
                "Plan 02-06 disables the scaffold-true return; "
                "real anthropic.batches.create wire-up is Phase 3. Budget "
                "stays untouched and effective_tier stays tier0 until a "
                "real successful SDK response lands."
            ),
        },
        severity="info",
    )
    return False, "stub: batch API not yet wired", []