mirror of
https://github.com/katanemo/plano.git
synced 2026-04-25 00:36:34 +02:00
merge origin/main, add DigitalOcean alongside Vercel and OpenRouter
This commit is contained in:
commit
013f377ddf
138 changed files with 17041 additions and 3335 deletions
4
.github/workflows/ci.yml
vendored
4
.github/workflows/ci.yml
vendored
|
|
@ -133,13 +133,13 @@ jobs:
|
|||
load: true
|
||||
tags: |
|
||||
${{ env.PLANO_DOCKER_IMAGE }}
|
||||
${{ env.DOCKER_IMAGE }}:0.4.19
|
||||
${{ env.DOCKER_IMAGE }}:0.4.20
|
||||
${{ env.DOCKER_IMAGE }}:latest
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
- name: Save image as artifact
|
||||
run: docker save ${{ env.PLANO_DOCKER_IMAGE }} ${{ env.DOCKER_IMAGE }}:0.4.19 ${{ env.DOCKER_IMAGE }}:latest -o /tmp/plano-image.tar
|
||||
run: docker save ${{ env.PLANO_DOCKER_IMAGE }} ${{ env.DOCKER_IMAGE }}:0.4.20 ${{ env.DOCKER_IMAGE }}:latest -o /tmp/plano-image.tar
|
||||
|
||||
- name: Upload image artifact
|
||||
uses: actions/upload-artifact@v6
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ export function Hero() {
|
|||
>
|
||||
<div className="inline-flex flex-wrap items-center gap-1.5 sm:gap-2 px-3 sm:px-4 py-1 rounded-full bg-[rgba(185,191,255,0.4)] border border-[var(--secondary)] shadow backdrop-blur hover:bg-[rgba(185,191,255,0.6)] transition-colors cursor-pointer">
|
||||
<span className="text-xs sm:text-sm font-medium text-black/65">
|
||||
v0.4.19
|
||||
v0.4.20
|
||||
</span>
|
||||
<span className="text-xs sm:text-sm font-medium text-black ">
|
||||
—
|
||||
|
|
|
|||
|
|
@ -1 +1 @@
|
|||
docker build -f Dockerfile . -t katanemo/plano -t katanemo/plano:0.4.19
|
||||
docker build -f Dockerfile . -t katanemo/plano -t katanemo/plano:0.4.20
|
||||
|
|
|
|||
|
|
@ -1,3 +1,3 @@
|
|||
"""Plano CLI - Intelligent Prompt Gateway."""
|
||||
|
||||
__version__ = "0.4.19"
|
||||
__version__ = "0.4.20"
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ SUPPORTED_PROVIDERS_WITHOUT_BASE_URL = [
|
|||
"zhipu",
|
||||
"vercel",
|
||||
"openrouter",
|
||||
"digitalocean",
|
||||
]
|
||||
|
||||
SUPPORTED_PROVIDERS = (
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ PLANO_COLOR = "#969FF4"
|
|||
|
||||
SERVICE_NAME_ARCHGW = "plano"
|
||||
PLANO_DOCKER_NAME = "plano"
|
||||
PLANO_DOCKER_IMAGE = os.getenv("PLANO_DOCKER_IMAGE", "katanemo/plano:0.4.19")
|
||||
PLANO_DOCKER_IMAGE = os.getenv("PLANO_DOCKER_IMAGE", "katanemo/plano:0.4.20")
|
||||
DEFAULT_OTEL_TRACING_GRPC_ENDPOINT = "http://localhost:4317"
|
||||
|
||||
# Native mode constants
|
||||
|
|
|
|||
163
cli/planoai/defaults.py
Normal file
163
cli/planoai/defaults.py
Normal file
|
|
@ -0,0 +1,163 @@
|
|||
"""Default config synthesizer for zero-config ``planoai up``.
|
||||
|
||||
When the user runs ``planoai up`` in a directory with no ``config.yaml`` /
|
||||
``plano_config.yaml``, we synthesize a pass-through config that covers the
|
||||
common LLM providers and auto-wires OTel export to ``localhost:4317`` so
|
||||
``planoai obs`` works out of the box.
|
||||
|
||||
Auth handling:
|
||||
- If the provider's env var is set, bind ``access_key: $ENV_VAR``.
|
||||
- Otherwise set ``passthrough_auth: true`` so the client's own Authorization
|
||||
header is forwarded. No env var is required to start the proxy.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
|
||||
DEFAULT_LLM_LISTENER_PORT = 12000
|
||||
# plano_config validation requires an http:// scheme on the OTLP endpoint.
|
||||
DEFAULT_OTLP_ENDPOINT = "http://localhost:4317"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ProviderDefault:
|
||||
name: str
|
||||
env_var: str
|
||||
base_url: str
|
||||
model_pattern: str
|
||||
# Only set for providers whose prefix in the model pattern is NOT one of the
|
||||
# built-in SUPPORTED_PROVIDERS in cli/planoai/config_generator.py. For
|
||||
# built-ins, the validator infers the interface from the model prefix and
|
||||
# rejects configs that set this field explicitly.
|
||||
provider_interface: str | None = None
|
||||
|
||||
|
||||
# Keep ordering stable so synthesized configs diff cleanly across runs.
|
||||
PROVIDER_DEFAULTS: list[ProviderDefault] = [
|
||||
ProviderDefault(
|
||||
name="openai",
|
||||
env_var="OPENAI_API_KEY",
|
||||
base_url="https://api.openai.com/v1",
|
||||
model_pattern="openai/*",
|
||||
),
|
||||
ProviderDefault(
|
||||
name="anthropic",
|
||||
env_var="ANTHROPIC_API_KEY",
|
||||
base_url="https://api.anthropic.com/v1",
|
||||
model_pattern="anthropic/*",
|
||||
),
|
||||
ProviderDefault(
|
||||
name="gemini",
|
||||
env_var="GEMINI_API_KEY",
|
||||
base_url="https://generativelanguage.googleapis.com/v1beta",
|
||||
model_pattern="gemini/*",
|
||||
),
|
||||
ProviderDefault(
|
||||
name="groq",
|
||||
env_var="GROQ_API_KEY",
|
||||
base_url="https://api.groq.com/openai/v1",
|
||||
model_pattern="groq/*",
|
||||
),
|
||||
ProviderDefault(
|
||||
name="deepseek",
|
||||
env_var="DEEPSEEK_API_KEY",
|
||||
base_url="https://api.deepseek.com/v1",
|
||||
model_pattern="deepseek/*",
|
||||
),
|
||||
ProviderDefault(
|
||||
name="mistral",
|
||||
env_var="MISTRAL_API_KEY",
|
||||
base_url="https://api.mistral.ai/v1",
|
||||
model_pattern="mistral/*",
|
||||
),
|
||||
# DigitalOcean Gradient is a first-class provider post-#889 — the
|
||||
# `digitalocean/` model prefix routes to the built-in Envoy cluster, no
|
||||
# base_url needed at runtime.
|
||||
ProviderDefault(
|
||||
name="digitalocean",
|
||||
env_var="DO_API_KEY",
|
||||
base_url="https://inference.do-ai.run/v1",
|
||||
model_pattern="digitalocean/*",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@dataclass
|
||||
class DetectionResult:
|
||||
with_keys: list[ProviderDefault]
|
||||
passthrough: list[ProviderDefault]
|
||||
|
||||
@property
|
||||
def summary(self) -> str:
|
||||
parts = []
|
||||
if self.with_keys:
|
||||
parts.append("env-keyed: " + ", ".join(p.name for p in self.with_keys))
|
||||
if self.passthrough:
|
||||
parts.append("pass-through: " + ", ".join(p.name for p in self.passthrough))
|
||||
return " | ".join(parts) if parts else "no providers"
|
||||
|
||||
|
||||
def detect_providers(env: dict[str, str] | None = None) -> DetectionResult:
|
||||
env = env if env is not None else dict(os.environ)
|
||||
with_keys: list[ProviderDefault] = []
|
||||
passthrough: list[ProviderDefault] = []
|
||||
for p in PROVIDER_DEFAULTS:
|
||||
val = env.get(p.env_var)
|
||||
if val:
|
||||
with_keys.append(p)
|
||||
else:
|
||||
passthrough.append(p)
|
||||
return DetectionResult(with_keys=with_keys, passthrough=passthrough)
|
||||
|
||||
|
||||
def synthesize_default_config(
|
||||
env: dict[str, str] | None = None,
|
||||
*,
|
||||
listener_port: int = DEFAULT_LLM_LISTENER_PORT,
|
||||
otel_endpoint: str = DEFAULT_OTLP_ENDPOINT,
|
||||
) -> dict:
|
||||
"""Build a pass-through config dict suitable for validation + envoy rendering.
|
||||
|
||||
The returned dict can be dumped to YAML and handed to the existing `planoai up`
|
||||
pipeline unchanged.
|
||||
"""
|
||||
detection = detect_providers(env)
|
||||
|
||||
def _entry(p: ProviderDefault, base: dict) -> dict:
|
||||
row: dict = {"name": p.name, "model": p.model_pattern, "base_url": p.base_url}
|
||||
if p.provider_interface is not None:
|
||||
row["provider_interface"] = p.provider_interface
|
||||
row.update(base)
|
||||
return row
|
||||
|
||||
model_providers: list[dict] = []
|
||||
for p in detection.with_keys:
|
||||
model_providers.append(_entry(p, {"access_key": f"${p.env_var}"}))
|
||||
for p in detection.passthrough:
|
||||
model_providers.append(_entry(p, {"passthrough_auth": True}))
|
||||
|
||||
# No explicit `default: true` entry is synthesized: the plano config
|
||||
# validator rejects wildcard models as defaults, and brightstaff already
|
||||
# registers bare model names as lookup keys during wildcard expansion
|
||||
# (crates/common/src/llm_providers.rs), so `{"model": "gpt-4o-mini"}`
|
||||
# without a prefix resolves via the openai wildcard without needing
|
||||
# `default: true`. See discussion on #890.
|
||||
|
||||
return {
|
||||
"version": "v0.4.0",
|
||||
"listeners": [
|
||||
{
|
||||
"name": "llm",
|
||||
"type": "model",
|
||||
"port": listener_port,
|
||||
"address": "0.0.0.0",
|
||||
}
|
||||
],
|
||||
"model_providers": model_providers,
|
||||
"tracing": {
|
||||
"random_sampling": 100,
|
||||
"opentracing_grpc_endpoint": otel_endpoint,
|
||||
},
|
||||
}
|
||||
|
|
@ -6,7 +6,13 @@ import sys
|
|||
import contextlib
|
||||
import logging
|
||||
import rich_click as click
|
||||
import yaml
|
||||
from planoai import targets
|
||||
from planoai.defaults import (
|
||||
DEFAULT_LLM_LISTENER_PORT,
|
||||
detect_providers,
|
||||
synthesize_default_config,
|
||||
)
|
||||
|
||||
# Brand color - Plano purple
|
||||
PLANO_COLOR = "#969FF4"
|
||||
|
|
@ -31,6 +37,7 @@ from planoai.core import (
|
|||
)
|
||||
from planoai.init_cmd import init as init_cmd
|
||||
from planoai.trace_cmd import trace as trace_cmd, start_trace_listener_background
|
||||
from planoai.obs_cmd import obs as obs_cmd
|
||||
from planoai.consts import (
|
||||
DEFAULT_OTEL_TRACING_GRPC_ENDPOINT,
|
||||
DEFAULT_NATIVE_OTEL_TRACING_GRPC_ENDPOINT,
|
||||
|
|
@ -317,7 +324,23 @@ def build(docker):
|
|||
help="Show detailed startup logs with timestamps.",
|
||||
is_flag=True,
|
||||
)
|
||||
def up(file, path, foreground, with_tracing, tracing_port, docker, verbose):
|
||||
@click.option(
|
||||
"--listener-port",
|
||||
default=DEFAULT_LLM_LISTENER_PORT,
|
||||
type=int,
|
||||
show_default=True,
|
||||
help="Override the LLM listener port when running without a config file. Ignored when a config file is present.",
|
||||
)
|
||||
def up(
|
||||
file,
|
||||
path,
|
||||
foreground,
|
||||
with_tracing,
|
||||
tracing_port,
|
||||
docker,
|
||||
verbose,
|
||||
listener_port,
|
||||
):
|
||||
"""Starts Plano."""
|
||||
from rich.status import Status
|
||||
|
||||
|
|
@ -328,12 +351,23 @@ def up(file, path, foreground, with_tracing, tracing_port, docker, verbose):
|
|||
# Use the utility function to find config file
|
||||
plano_config_file = find_config_file(path, file)
|
||||
|
||||
# Check if the file exists
|
||||
# Zero-config fallback: when no user config is present, synthesize a
|
||||
# pass-through config that covers the common LLM providers and
|
||||
# auto-wires OTel export to ``planoai obs``. See cli/planoai/defaults.py.
|
||||
if not os.path.exists(plano_config_file):
|
||||
detection = detect_providers()
|
||||
cfg_dict = synthesize_default_config(listener_port=listener_port)
|
||||
|
||||
default_dir = os.path.expanduser("~/.plano")
|
||||
os.makedirs(default_dir, exist_ok=True)
|
||||
synthesized_path = os.path.join(default_dir, "default_config.yaml")
|
||||
with open(synthesized_path, "w") as fh:
|
||||
yaml.safe_dump(cfg_dict, fh, sort_keys=False)
|
||||
plano_config_file = synthesized_path
|
||||
console.print(
|
||||
f"[red]✗[/red] Config file not found: [dim]{plano_config_file}[/dim]"
|
||||
f"[dim]No plano config found; using defaults ({detection.summary}). "
|
||||
f"Listening on :{listener_port}, tracing -> http://localhost:4317.[/dim]"
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
if not docker:
|
||||
from planoai.native_runner import native_validate_config
|
||||
|
|
@ -681,6 +715,7 @@ main.add_command(cli_agent)
|
|||
main.add_command(generate_prompt_targets)
|
||||
main.add_command(init_cmd, name="init")
|
||||
main.add_command(trace_cmd, name="trace")
|
||||
main.add_command(obs_cmd, name="obs")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
|||
6
cli/planoai/obs/__init__.py
Normal file
6
cli/planoai/obs/__init__.py
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
"""Plano observability console: in-memory live view of LLM traffic."""
|
||||
|
||||
from planoai.obs.collector import LLMCall, LLMCallStore, ObsCollector
|
||||
from planoai.obs.pricing import PricingCatalog
|
||||
|
||||
__all__ = ["LLMCall", "LLMCallStore", "ObsCollector", "PricingCatalog"]
|
||||
266
cli/planoai/obs/collector.py
Normal file
266
cli/planoai/obs/collector.py
Normal file
|
|
@ -0,0 +1,266 @@
|
|||
"""In-memory collector for LLM calls, fed by OTLP/gRPC spans from brightstaff."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
from collections import deque
|
||||
from concurrent import futures
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Iterable
|
||||
|
||||
import grpc
|
||||
from opentelemetry.proto.collector.trace.v1 import (
|
||||
trace_service_pb2,
|
||||
trace_service_pb2_grpc,
|
||||
)
|
||||
|
||||
DEFAULT_GRPC_PORT = 4317
|
||||
DEFAULT_CAPACITY = 1000
|
||||
|
||||
|
||||
@dataclass
|
||||
class LLMCall:
|
||||
"""One LLM call as reconstructed from a brightstaff LLM span.
|
||||
|
||||
Fields default to ``None`` when the underlying span attribute was absent.
|
||||
"""
|
||||
|
||||
request_id: str
|
||||
timestamp: datetime
|
||||
model: str
|
||||
provider: str | None = None
|
||||
request_model: str | None = None
|
||||
session_id: str | None = None
|
||||
route_name: str | None = None
|
||||
is_streaming: bool | None = None
|
||||
status_code: int | None = None
|
||||
prompt_tokens: int | None = None
|
||||
completion_tokens: int | None = None
|
||||
total_tokens: int | None = None
|
||||
cached_input_tokens: int | None = None
|
||||
cache_creation_tokens: int | None = None
|
||||
reasoning_tokens: int | None = None
|
||||
ttft_ms: float | None = None
|
||||
duration_ms: float | None = None
|
||||
routing_strategy: str | None = None
|
||||
routing_reason: str | None = None
|
||||
cost_usd: float | None = None
|
||||
|
||||
@property
|
||||
def tpt_ms(self) -> float | None:
|
||||
if self.duration_ms is None or self.completion_tokens in (None, 0):
|
||||
return None
|
||||
ttft = self.ttft_ms or 0.0
|
||||
generate_ms = max(0.0, self.duration_ms - ttft)
|
||||
if generate_ms <= 0:
|
||||
return None
|
||||
return generate_ms / self.completion_tokens
|
||||
|
||||
@property
|
||||
def tokens_per_sec(self) -> float | None:
|
||||
tpt = self.tpt_ms
|
||||
if tpt is None or tpt <= 0:
|
||||
return None
|
||||
return 1000.0 / tpt
|
||||
|
||||
|
||||
class LLMCallStore:
|
||||
"""Thread-safe ring buffer of recent LLM calls."""
|
||||
|
||||
def __init__(self, capacity: int = DEFAULT_CAPACITY) -> None:
|
||||
self._capacity = capacity
|
||||
self._calls: deque[LLMCall] = deque(maxlen=capacity)
|
||||
self._lock = threading.Lock()
|
||||
|
||||
@property
|
||||
def capacity(self) -> int:
|
||||
return self._capacity
|
||||
|
||||
def add(self, call: LLMCall) -> None:
|
||||
with self._lock:
|
||||
self._calls.append(call)
|
||||
|
||||
def clear(self) -> None:
|
||||
with self._lock:
|
||||
self._calls.clear()
|
||||
|
||||
def snapshot(self) -> list[LLMCall]:
|
||||
with self._lock:
|
||||
return list(self._calls)
|
||||
|
||||
def __len__(self) -> int:
|
||||
with self._lock:
|
||||
return len(self._calls)
|
||||
|
||||
|
||||
# Span attribute keys used below are the canonical OTel / Plano keys emitted by
|
||||
# brightstaff — see crates/brightstaff/src/tracing/constants.rs for the source
|
||||
# of truth.
|
||||
|
||||
|
||||
def _anyvalue_to_python(value: Any) -> Any: # AnyValue from OTLP
|
||||
kind = value.WhichOneof("value")
|
||||
if kind == "string_value":
|
||||
return value.string_value
|
||||
if kind == "bool_value":
|
||||
return value.bool_value
|
||||
if kind == "int_value":
|
||||
return value.int_value
|
||||
if kind == "double_value":
|
||||
return value.double_value
|
||||
return None
|
||||
|
||||
|
||||
def _attrs_to_dict(attrs: Iterable[Any]) -> dict[str, Any]:
|
||||
out: dict[str, Any] = {}
|
||||
for kv in attrs:
|
||||
py = _anyvalue_to_python(kv.value)
|
||||
if py is not None:
|
||||
out[kv.key] = py
|
||||
return out
|
||||
|
||||
|
||||
def _maybe_int(value: Any) -> int | None:
|
||||
if value is None:
|
||||
return None
|
||||
try:
|
||||
return int(value)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _maybe_float(value: Any) -> float | None:
|
||||
if value is None:
|
||||
return None
|
||||
try:
|
||||
return float(value)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def span_to_llm_call(
|
||||
span: Any, service_name: str, pricing: Any | None = None
|
||||
) -> LLMCall | None:
|
||||
"""Convert an OTLP span into an LLMCall, or return None if it isn't one.
|
||||
|
||||
A span is considered an LLM call iff it carries the ``llm.model`` attribute.
|
||||
"""
|
||||
attrs = _attrs_to_dict(span.attributes)
|
||||
model = attrs.get("llm.model")
|
||||
if not model:
|
||||
return None
|
||||
|
||||
# Prefer explicit span attributes; fall back to likely aliases.
|
||||
request_id = next(
|
||||
(
|
||||
str(attrs[key])
|
||||
for key in ("request_id", "http.request_id")
|
||||
if key in attrs and attrs[key] is not None
|
||||
),
|
||||
span.span_id.hex() if span.span_id else "",
|
||||
)
|
||||
start_ns = span.start_time_unix_nano or 0
|
||||
ts = (
|
||||
datetime.fromtimestamp(start_ns / 1_000_000_000, tz=timezone.utc).astimezone()
|
||||
if start_ns
|
||||
else datetime.now().astimezone()
|
||||
)
|
||||
|
||||
call = LLMCall(
|
||||
request_id=str(request_id),
|
||||
timestamp=ts,
|
||||
model=str(model),
|
||||
provider=(
|
||||
str(attrs["llm.provider"]) if "llm.provider" in attrs else service_name
|
||||
),
|
||||
request_model=(
|
||||
str(attrs["model.requested"]) if "model.requested" in attrs else None
|
||||
),
|
||||
session_id=(
|
||||
str(attrs["plano.session_id"]) if "plano.session_id" in attrs else None
|
||||
),
|
||||
route_name=(
|
||||
str(attrs["plano.route.name"]) if "plano.route.name" in attrs else None
|
||||
),
|
||||
is_streaming=(
|
||||
bool(attrs["llm.is_streaming"]) if "llm.is_streaming" in attrs else None
|
||||
),
|
||||
status_code=_maybe_int(attrs.get("http.status_code")),
|
||||
prompt_tokens=_maybe_int(attrs.get("llm.usage.prompt_tokens")),
|
||||
completion_tokens=_maybe_int(attrs.get("llm.usage.completion_tokens")),
|
||||
total_tokens=_maybe_int(attrs.get("llm.usage.total_tokens")),
|
||||
cached_input_tokens=_maybe_int(attrs.get("llm.usage.cached_input_tokens")),
|
||||
cache_creation_tokens=_maybe_int(attrs.get("llm.usage.cache_creation_tokens")),
|
||||
reasoning_tokens=_maybe_int(attrs.get("llm.usage.reasoning_tokens")),
|
||||
ttft_ms=_maybe_float(attrs.get("llm.time_to_first_token")),
|
||||
duration_ms=_maybe_float(attrs.get("llm.duration_ms")),
|
||||
routing_strategy=(
|
||||
str(attrs["routing.strategy"]) if "routing.strategy" in attrs else None
|
||||
),
|
||||
routing_reason=(
|
||||
str(attrs["routing.selection_reason"])
|
||||
if "routing.selection_reason" in attrs
|
||||
else None
|
||||
),
|
||||
)
|
||||
|
||||
if pricing is not None:
|
||||
call.cost_usd = pricing.cost_for_call(call)
|
||||
|
||||
return call
|
||||
|
||||
|
||||
class _ObsServicer(trace_service_pb2_grpc.TraceServiceServicer):
|
||||
def __init__(self, store: LLMCallStore, pricing: Any | None) -> None:
|
||||
self._store = store
|
||||
self._pricing = pricing
|
||||
|
||||
def Export(self, request, context): # noqa: N802 — gRPC generated name
|
||||
for resource_spans in request.resource_spans:
|
||||
service_name = "unknown"
|
||||
for attr in resource_spans.resource.attributes:
|
||||
if attr.key == "service.name":
|
||||
val = _anyvalue_to_python(attr.value)
|
||||
if val is not None:
|
||||
service_name = str(val)
|
||||
break
|
||||
for scope_spans in resource_spans.scope_spans:
|
||||
for span in scope_spans.spans:
|
||||
call = span_to_llm_call(span, service_name, self._pricing)
|
||||
if call is not None:
|
||||
self._store.add(call)
|
||||
return trace_service_pb2.ExportTraceServiceResponse()
|
||||
|
||||
|
||||
@dataclass
|
||||
class ObsCollector:
|
||||
"""Owns the OTLP/gRPC server and the in-memory LLMCall ring buffer."""
|
||||
|
||||
store: LLMCallStore = field(default_factory=LLMCallStore)
|
||||
pricing: Any | None = None
|
||||
host: str = "0.0.0.0"
|
||||
port: int = DEFAULT_GRPC_PORT
|
||||
_server: grpc.Server | None = field(default=None, init=False, repr=False)
|
||||
|
||||
def start(self) -> None:
|
||||
if self._server is not None:
|
||||
return
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=4))
|
||||
trace_service_pb2_grpc.add_TraceServiceServicer_to_server(
|
||||
_ObsServicer(self.store, self.pricing), server
|
||||
)
|
||||
address = f"{self.host}:{self.port}"
|
||||
bound = server.add_insecure_port(address)
|
||||
if bound == 0:
|
||||
raise OSError(
|
||||
f"Failed to bind OTLP listener on {address}: port already in use. "
|
||||
"Stop tracing via `planoai trace down` or pick another port with --port."
|
||||
)
|
||||
server.start()
|
||||
self._server = server
|
||||
|
||||
def stop(self, grace: float = 2.0) -> None:
|
||||
if self._server is not None:
|
||||
self._server.stop(grace)
|
||||
self._server = None
|
||||
321
cli/planoai/obs/pricing.py
Normal file
321
cli/planoai/obs/pricing.py
Normal file
|
|
@ -0,0 +1,321 @@
|
|||
"""DigitalOcean Gradient pricing catalog for the obs console.
|
||||
|
||||
Ported loosely from ``crates/brightstaff/src/router/model_metrics.rs::fetch_do_pricing``.
|
||||
Single-source: one fetch at startup, cached for the life of the process.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
import threading
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
import requests
|
||||
|
||||
DEFAULT_PRICING_URL = "https://api.digitalocean.com/v2/gen-ai/models/catalog"
|
||||
FETCH_TIMEOUT_SECS = 5.0
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ModelPrice:
|
||||
"""Input/output $/token rates. Token counts are multiplied by these."""
|
||||
|
||||
input_per_token_usd: float
|
||||
output_per_token_usd: float
|
||||
cached_input_per_token_usd: float | None = None
|
||||
|
||||
|
||||
class PricingCatalog:
|
||||
"""In-memory pricing lookup keyed by model id.
|
||||
|
||||
DO's catalog uses ids like ``openai-gpt-5.4``; Plano's resolved model names
|
||||
may arrive as ``do/openai-gpt-5.4`` or bare ``openai-gpt-5.4``. We strip the
|
||||
leading provider prefix when looking up.
|
||||
"""
|
||||
|
||||
def __init__(self, prices: dict[str, ModelPrice] | None = None) -> None:
|
||||
self._prices: dict[str, ModelPrice] = prices or {}
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def __len__(self) -> int:
|
||||
with self._lock:
|
||||
return len(self._prices)
|
||||
|
||||
def sample_models(self, n: int = 5) -> list[str]:
|
||||
with self._lock:
|
||||
return list(self._prices.keys())[:n]
|
||||
|
||||
@classmethod
|
||||
def fetch(cls, url: str = DEFAULT_PRICING_URL) -> "PricingCatalog":
|
||||
"""Fetch pricing from DO's catalog endpoint. On failure, returns an
|
||||
empty catalog (cost column will be blank).
|
||||
|
||||
The catalog endpoint is public — no auth required, no signup — so
|
||||
``planoai obs`` gets cost data on first run out of the box.
|
||||
"""
|
||||
try:
|
||||
resp = requests.get(url, timeout=FETCH_TIMEOUT_SECS)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
except Exception as exc: # noqa: BLE001 — best-effort; never fatal
|
||||
logger.warning(
|
||||
"DO pricing fetch failed: %s; cost column will be blank.",
|
||||
exc,
|
||||
)
|
||||
return cls()
|
||||
|
||||
prices = _parse_do_pricing(data)
|
||||
if not prices:
|
||||
# Dump the first entry's raw shape so we can see which fields DO
|
||||
# actually returned — helps when the catalog adds new fields or
|
||||
# the response doesn't match our parser.
|
||||
import json as _json
|
||||
|
||||
sample_items = _coerce_items(data)
|
||||
sample = sample_items[0] if sample_items else data
|
||||
logger.warning(
|
||||
"DO pricing response had no parseable entries; cost column "
|
||||
"will be blank. Sample entry: %s",
|
||||
_json.dumps(sample, default=str)[:400],
|
||||
)
|
||||
return cls(prices)
|
||||
|
||||
def price_for(self, model_name: str | None) -> ModelPrice | None:
|
||||
if not model_name:
|
||||
return None
|
||||
with self._lock:
|
||||
# Try the full name first, then stripped prefix, then lowercased variants.
|
||||
for candidate in _model_key_candidates(model_name):
|
||||
hit = self._prices.get(candidate)
|
||||
if hit is not None:
|
||||
return hit
|
||||
return None
|
||||
|
||||
def cost_for_call(self, call: Any) -> float | None:
|
||||
"""Compute USD cost for an LLMCall. Returns None when pricing is unknown."""
|
||||
price = self.price_for(getattr(call, "model", None)) or self.price_for(
|
||||
getattr(call, "request_model", None)
|
||||
)
|
||||
if price is None:
|
||||
return None
|
||||
prompt = int(getattr(call, "prompt_tokens", 0) or 0)
|
||||
completion = int(getattr(call, "completion_tokens", 0) or 0)
|
||||
cached = int(getattr(call, "cached_input_tokens", 0) or 0)
|
||||
|
||||
# Cached input tokens are priced separately at the cached rate when known;
|
||||
# otherwise they're already counted in prompt tokens at the regular rate.
|
||||
fresh_prompt = prompt
|
||||
if price.cached_input_per_token_usd is not None and cached:
|
||||
fresh_prompt = max(0, prompt - cached)
|
||||
cost_cached = cached * price.cached_input_per_token_usd
|
||||
else:
|
||||
cost_cached = 0.0
|
||||
|
||||
cost = (
|
||||
fresh_prompt * price.input_per_token_usd
|
||||
+ completion * price.output_per_token_usd
|
||||
+ cost_cached
|
||||
)
|
||||
return round(cost, 6)
|
||||
|
||||
|
||||
_DATE_SUFFIX_RE = re.compile(r"-\d{8}$")
|
||||
_PROVIDER_PREFIXES = ("anthropic", "openai", "google", "meta", "cohere", "mistral")
|
||||
_ANTHROPIC_FAMILIES = {"opus", "sonnet", "haiku"}
|
||||
|
||||
|
||||
def _model_key_candidates(model_name: str) -> list[str]:
|
||||
"""Lookup-side variants of a Plano-emitted model name.
|
||||
|
||||
Plano resolves names like ``claude-haiku-4-5-20251001``; the catalog stores
|
||||
them as ``anthropic-claude-haiku-4.5``. We strip the date suffix and the
|
||||
``provider/`` prefix here; the catalog itself registers the dash/dot and
|
||||
family-order aliases at parse time (see :func:`_expand_aliases`).
|
||||
"""
|
||||
base = model_name.strip()
|
||||
out = [base]
|
||||
if "/" in base:
|
||||
out.append(base.split("/", 1)[1])
|
||||
for k in list(out):
|
||||
stripped = _DATE_SUFFIX_RE.sub("", k)
|
||||
if stripped != k:
|
||||
out.append(stripped)
|
||||
out.extend([v.lower() for v in list(out)])
|
||||
seen: set[str] = set()
|
||||
uniq = []
|
||||
for key in out:
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
uniq.append(key)
|
||||
return uniq
|
||||
|
||||
|
||||
def _expand_aliases(model_id: str) -> set[str]:
|
||||
"""Catalog-side variants of a DO model id.
|
||||
|
||||
DO publishes Anthropic models under ids like ``anthropic-claude-opus-4.7``
|
||||
or ``anthropic-claude-4.6-sonnet`` while Plano emits ``claude-opus-4-7`` /
|
||||
``claude-sonnet-4-6``. Generate a set covering provider-prefix stripping,
|
||||
dash↔dot in version segments, and family↔version word order so a single
|
||||
catalog entry matches every name shape we'll see at lookup.
|
||||
"""
|
||||
aliases: set[str] = set()
|
||||
|
||||
def add(name: str) -> None:
|
||||
if not name:
|
||||
return
|
||||
aliases.add(name)
|
||||
aliases.add(name.lower())
|
||||
|
||||
add(model_id)
|
||||
|
||||
base = model_id
|
||||
head, _, rest = base.partition("-")
|
||||
if head.lower() in _PROVIDER_PREFIXES and rest:
|
||||
add(rest)
|
||||
base = rest
|
||||
|
||||
for key in list(aliases):
|
||||
if "." in key:
|
||||
add(key.replace(".", "-"))
|
||||
|
||||
parts = base.split("-")
|
||||
if len(parts) >= 3 and parts[0].lower() == "claude":
|
||||
rest_parts = parts[1:]
|
||||
for i, p in enumerate(rest_parts):
|
||||
if p.lower() in _ANTHROPIC_FAMILIES:
|
||||
others = rest_parts[:i] + rest_parts[i + 1 :]
|
||||
if not others:
|
||||
break
|
||||
family_last = "claude-" + "-".join(others) + "-" + p
|
||||
family_first = "claude-" + p + "-" + "-".join(others)
|
||||
add(family_last)
|
||||
add(family_first)
|
||||
add(family_last.replace(".", "-"))
|
||||
add(family_first.replace(".", "-"))
|
||||
break
|
||||
|
||||
return aliases
|
||||
|
||||
|
||||
def _parse_do_pricing(data: Any) -> dict[str, ModelPrice]:
|
||||
"""Parse DO catalog response into a ModelPrice map keyed by model id.
|
||||
|
||||
DO's shape (as of 2026-04):
|
||||
{
|
||||
"data": [
|
||||
{"model_id": "openai-gpt-5.4",
|
||||
"pricing": {"input_price_per_million": 5.0,
|
||||
"output_price_per_million": 15.0}},
|
||||
...
|
||||
]
|
||||
}
|
||||
|
||||
Older/alternate shapes are also accepted (flat top-level fields, or the
|
||||
``id``/``model``/``name`` key).
|
||||
"""
|
||||
prices: dict[str, ModelPrice] = {}
|
||||
items = _coerce_items(data)
|
||||
for item in items:
|
||||
model_id = (
|
||||
item.get("model_id")
|
||||
or item.get("id")
|
||||
or item.get("model")
|
||||
or item.get("name")
|
||||
)
|
||||
if not model_id:
|
||||
continue
|
||||
|
||||
# DO nests rates under `pricing`; try that first, then fall back to
|
||||
# top-level fields for alternate response shapes.
|
||||
sources = [item]
|
||||
if isinstance(item.get("pricing"), dict):
|
||||
sources.insert(0, item["pricing"])
|
||||
|
||||
input_rate = _extract_rate_from_sources(
|
||||
sources,
|
||||
["input_per_token", "input_token_price", "price_input"],
|
||||
["input_price_per_million", "input_per_million", "input_per_mtok"],
|
||||
)
|
||||
output_rate = _extract_rate_from_sources(
|
||||
sources,
|
||||
["output_per_token", "output_token_price", "price_output"],
|
||||
["output_price_per_million", "output_per_million", "output_per_mtok"],
|
||||
)
|
||||
cached_rate = _extract_rate_from_sources(
|
||||
sources,
|
||||
[
|
||||
"cached_input_per_token",
|
||||
"cached_input_token_price",
|
||||
"prompt_cache_read_per_token",
|
||||
],
|
||||
[
|
||||
"cached_input_price_per_million",
|
||||
"cached_input_per_million",
|
||||
"cached_input_per_mtok",
|
||||
],
|
||||
)
|
||||
|
||||
if input_rate is None or output_rate is None:
|
||||
continue
|
||||
# Treat 0-rate entries as "unknown" so cost falls back to `—` rather
|
||||
# than showing a misleading $0.0000. DO's catalog sometimes omits
|
||||
# rates for promo/open-weight models.
|
||||
if input_rate == 0 and output_rate == 0:
|
||||
continue
|
||||
price = ModelPrice(
|
||||
input_per_token_usd=input_rate,
|
||||
output_per_token_usd=output_rate,
|
||||
cached_input_per_token_usd=cached_rate,
|
||||
)
|
||||
for alias in _expand_aliases(str(model_id)):
|
||||
prices.setdefault(alias, price)
|
||||
return prices
|
||||
|
||||
|
||||
def _coerce_items(data: Any) -> list[dict]:
|
||||
if isinstance(data, list):
|
||||
return [x for x in data if isinstance(x, dict)]
|
||||
if isinstance(data, dict):
|
||||
for key in ("data", "models", "pricing", "items"):
|
||||
val = data.get(key)
|
||||
if isinstance(val, list):
|
||||
return [x for x in val if isinstance(x, dict)]
|
||||
return []
|
||||
|
||||
|
||||
def _extract_rate_from_sources(
|
||||
sources: list[dict],
|
||||
per_token_keys: list[str],
|
||||
per_million_keys: list[str],
|
||||
) -> float | None:
|
||||
"""Return a per-token rate in USD, or None if unknown.
|
||||
|
||||
Some DO catalog responses put per-token values under a field whose name
|
||||
says ``_per_million`` (e.g. ``input_price_per_million: 5E-8`` — that's
|
||||
$5e-8 per token, not per million). Heuristic: values < 1 are already
|
||||
per-token (real per-million rates are ~0.1 to ~100); values >= 1 are
|
||||
treated as per-million and divided by 1,000,000.
|
||||
"""
|
||||
for src in sources:
|
||||
for key in per_token_keys:
|
||||
if key in src and src[key] is not None:
|
||||
try:
|
||||
return float(src[key])
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
for key in per_million_keys:
|
||||
if key in src and src[key] is not None:
|
||||
try:
|
||||
v = float(src[key])
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
if v >= 1:
|
||||
return v / 1_000_000
|
||||
return v
|
||||
return None
|
||||
634
cli/planoai/obs/render.py
Normal file
634
cli/planoai/obs/render.py
Normal file
|
|
@ -0,0 +1,634 @@
|
|||
"""Rich TUI renderer for the observability console."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import Counter
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from http import HTTPStatus
|
||||
|
||||
from rich.align import Align
|
||||
from rich.box import SIMPLE, SIMPLE_HEAVY
|
||||
from rich.console import Group
|
||||
from rich.panel import Panel
|
||||
from rich.table import Table
|
||||
from rich.text import Text
|
||||
|
||||
MAX_WIDTH = 160
|
||||
|
||||
from planoai.obs.collector import LLMCall
|
||||
|
||||
|
||||
@dataclass
|
||||
class AggregateStats:
|
||||
count: int
|
||||
total_cost_usd: float
|
||||
total_input_tokens: int
|
||||
total_output_tokens: int
|
||||
distinct_sessions: int
|
||||
current_session: str | None
|
||||
p50_latency_ms: float | None = None
|
||||
p95_latency_ms: float | None = None
|
||||
p99_latency_ms: float | None = None
|
||||
p50_ttft_ms: float | None = None
|
||||
p95_ttft_ms: float | None = None
|
||||
p99_ttft_ms: float | None = None
|
||||
error_count: int = 0
|
||||
errors_4xx: int = 0
|
||||
errors_5xx: int = 0
|
||||
has_cost: bool = False
|
||||
|
||||
|
||||
@dataclass
|
||||
class ModelRollup:
|
||||
model: str
|
||||
requests: int
|
||||
input_tokens: int
|
||||
output_tokens: int
|
||||
cache_write: int
|
||||
cache_read: int
|
||||
cost_usd: float
|
||||
has_cost: bool = False
|
||||
avg_tokens_per_sec: float | None = None
|
||||
|
||||
|
||||
def _percentile(values: list[float], pct: float) -> float | None:
|
||||
if not values:
|
||||
return None
|
||||
s = sorted(values)
|
||||
k = max(0, min(len(s) - 1, int(round((pct / 100.0) * (len(s) - 1)))))
|
||||
return s[k]
|
||||
|
||||
|
||||
def aggregates(calls: list[LLMCall]) -> AggregateStats:
|
||||
total_cost = sum((c.cost_usd or 0.0) for c in calls)
|
||||
total_input = sum(int(c.prompt_tokens or 0) for c in calls)
|
||||
total_output = sum(int(c.completion_tokens or 0) for c in calls)
|
||||
session_ids = {c.session_id for c in calls if c.session_id}
|
||||
current = next(
|
||||
(c.session_id for c in reversed(calls) if c.session_id is not None), None
|
||||
)
|
||||
durations = [c.duration_ms for c in calls if c.duration_ms is not None]
|
||||
ttfts = [c.ttft_ms for c in calls if c.ttft_ms is not None]
|
||||
errors_4xx = sum(
|
||||
1 for c in calls if c.status_code is not None and 400 <= c.status_code < 500
|
||||
)
|
||||
errors_5xx = sum(
|
||||
1 for c in calls if c.status_code is not None and c.status_code >= 500
|
||||
)
|
||||
has_cost = any(c.cost_usd is not None for c in calls)
|
||||
return AggregateStats(
|
||||
count=len(calls),
|
||||
total_cost_usd=total_cost,
|
||||
total_input_tokens=total_input,
|
||||
total_output_tokens=total_output,
|
||||
distinct_sessions=len(session_ids),
|
||||
current_session=current,
|
||||
p50_latency_ms=_percentile(durations, 50),
|
||||
p95_latency_ms=_percentile(durations, 95),
|
||||
p99_latency_ms=_percentile(durations, 99),
|
||||
p50_ttft_ms=_percentile(ttfts, 50),
|
||||
p95_ttft_ms=_percentile(ttfts, 95),
|
||||
p99_ttft_ms=_percentile(ttfts, 99),
|
||||
error_count=errors_4xx + errors_5xx,
|
||||
errors_4xx=errors_4xx,
|
||||
errors_5xx=errors_5xx,
|
||||
has_cost=has_cost,
|
||||
)
|
||||
|
||||
|
||||
def model_rollups(calls: list[LLMCall]) -> list[ModelRollup]:
|
||||
buckets: dict[str, dict[str, float | int | bool]] = {}
|
||||
tps_samples: dict[str, list[float]] = {}
|
||||
for c in calls:
|
||||
key = c.model
|
||||
b = buckets.setdefault(
|
||||
key,
|
||||
{
|
||||
"requests": 0,
|
||||
"input": 0,
|
||||
"output": 0,
|
||||
"cache_write": 0,
|
||||
"cache_read": 0,
|
||||
"cost": 0.0,
|
||||
"has_cost": False,
|
||||
},
|
||||
)
|
||||
b["requests"] = int(b["requests"]) + 1
|
||||
b["input"] = int(b["input"]) + int(c.prompt_tokens or 0)
|
||||
b["output"] = int(b["output"]) + int(c.completion_tokens or 0)
|
||||
b["cache_write"] = int(b["cache_write"]) + int(c.cache_creation_tokens or 0)
|
||||
b["cache_read"] = int(b["cache_read"]) + int(c.cached_input_tokens or 0)
|
||||
b["cost"] = float(b["cost"]) + (c.cost_usd or 0.0)
|
||||
if c.cost_usd is not None:
|
||||
b["has_cost"] = True
|
||||
tps = c.tokens_per_sec
|
||||
if tps is not None:
|
||||
tps_samples.setdefault(key, []).append(tps)
|
||||
|
||||
rollups: list[ModelRollup] = []
|
||||
for model, b in buckets.items():
|
||||
samples = tps_samples.get(model)
|
||||
avg_tps = (sum(samples) / len(samples)) if samples else None
|
||||
rollups.append(
|
||||
ModelRollup(
|
||||
model=model,
|
||||
requests=int(b["requests"]),
|
||||
input_tokens=int(b["input"]),
|
||||
output_tokens=int(b["output"]),
|
||||
cache_write=int(b["cache_write"]),
|
||||
cache_read=int(b["cache_read"]),
|
||||
cost_usd=float(b["cost"]),
|
||||
has_cost=bool(b["has_cost"]),
|
||||
avg_tokens_per_sec=avg_tps,
|
||||
)
|
||||
)
|
||||
rollups.sort(key=lambda r: (r.cost_usd, r.requests), reverse=True)
|
||||
return rollups
|
||||
|
||||
|
||||
@dataclass
|
||||
class RouteHit:
|
||||
route: str
|
||||
hits: int
|
||||
pct: float
|
||||
p95_latency_ms: float | None
|
||||
error_count: int
|
||||
|
||||
|
||||
def route_hits(calls: list[LLMCall]) -> list[RouteHit]:
|
||||
counts: Counter[str] = Counter()
|
||||
per_route_latency: dict[str, list[float]] = {}
|
||||
per_route_errors: dict[str, int] = {}
|
||||
for c in calls:
|
||||
if not c.route_name:
|
||||
continue
|
||||
counts[c.route_name] += 1
|
||||
if c.duration_ms is not None:
|
||||
per_route_latency.setdefault(c.route_name, []).append(c.duration_ms)
|
||||
if c.status_code is not None and c.status_code >= 400:
|
||||
per_route_errors[c.route_name] = per_route_errors.get(c.route_name, 0) + 1
|
||||
total = sum(counts.values())
|
||||
if total == 0:
|
||||
return []
|
||||
return [
|
||||
RouteHit(
|
||||
route=r,
|
||||
hits=n,
|
||||
pct=(n / total) * 100.0,
|
||||
p95_latency_ms=_percentile(per_route_latency.get(r, []), 95),
|
||||
error_count=per_route_errors.get(r, 0),
|
||||
)
|
||||
for r, n in counts.most_common()
|
||||
]
|
||||
|
||||
|
||||
def _fmt_cost(v: float | None, *, zero: str = "—") -> str:
|
||||
if v is None:
|
||||
return "—"
|
||||
if v == 0:
|
||||
return zero
|
||||
if abs(v) < 0.0001:
|
||||
return f"${v:.8f}".rstrip("0").rstrip(".")
|
||||
if abs(v) < 0.01:
|
||||
return f"${v:.6f}".rstrip("0").rstrip(".")
|
||||
if abs(v) < 1:
|
||||
return f"${v:.4f}"
|
||||
return f"${v:,.2f}"
|
||||
|
||||
|
||||
def _fmt_ms(v: float | None) -> str:
|
||||
if v is None:
|
||||
return "—"
|
||||
if v >= 1000:
|
||||
return f"{v / 1000:.1f}s"
|
||||
return f"{v:.0f}ms"
|
||||
|
||||
|
||||
def _fmt_int(v: int | None) -> str:
|
||||
if v is None or v == 0:
|
||||
return "—"
|
||||
return f"{v:,}"
|
||||
|
||||
|
||||
def _fmt_tokens(v: int | None) -> str:
|
||||
if v is None:
|
||||
return "—"
|
||||
return f"{v:,}"
|
||||
|
||||
|
||||
def _fmt_tps(v: float | None) -> str:
|
||||
if v is None or v <= 0:
|
||||
return "—"
|
||||
if v >= 100:
|
||||
return f"{v:.0f}/s"
|
||||
return f"{v:.1f}/s"
|
||||
|
||||
|
||||
def _latency_style(v: float | None) -> str:
|
||||
if v is None:
|
||||
return "dim"
|
||||
if v < 500:
|
||||
return "green"
|
||||
if v < 2000:
|
||||
return "yellow"
|
||||
return "red"
|
||||
|
||||
|
||||
def _ttft_style(v: float | None) -> str:
|
||||
if v is None:
|
||||
return "dim"
|
||||
if v < 300:
|
||||
return "green"
|
||||
if v < 1000:
|
||||
return "yellow"
|
||||
return "red"
|
||||
|
||||
|
||||
def _truncate_model(name: str, limit: int = 32) -> str:
|
||||
if len(name) <= limit:
|
||||
return name
|
||||
return name[: limit - 1] + "…"
|
||||
|
||||
|
||||
def _status_text(code: int | None) -> Text:
|
||||
if code is None:
|
||||
return Text("—", style="dim")
|
||||
if 200 <= code < 300:
|
||||
return Text("● ok", style="green")
|
||||
if 300 <= code < 400:
|
||||
return Text(f"● {code}", style="yellow")
|
||||
if 400 <= code < 500:
|
||||
return Text(f"● {code}", style="yellow bold")
|
||||
return Text(f"● {code}", style="red bold")
|
||||
|
||||
|
||||
def _summary_panel(last: LLMCall | None, stats: AggregateStats) -> Panel:
|
||||
# Content-sized columns with a fixed gutter keep the two blocks close
|
||||
# together instead of stretching across the full terminal on wide screens.
|
||||
grid = Table.grid(padding=(0, 4))
|
||||
grid.add_column(no_wrap=True)
|
||||
grid.add_column(no_wrap=True)
|
||||
|
||||
# Left: latest request snapshot.
|
||||
left = Table.grid(padding=(0, 1))
|
||||
left.add_column(style="dim", no_wrap=True)
|
||||
left.add_column(no_wrap=True)
|
||||
if last is None:
|
||||
left.add_row("latest", Text("waiting for spans…", style="dim italic"))
|
||||
else:
|
||||
model_text = Text(_truncate_model(last.model, 48), style="bold cyan")
|
||||
if last.is_streaming:
|
||||
model_text.append(" ⟳ stream", style="dim")
|
||||
left.add_row("model", model_text)
|
||||
if last.request_model and last.request_model != last.model:
|
||||
left.add_row(
|
||||
"requested", Text(_truncate_model(last.request_model, 48), style="cyan")
|
||||
)
|
||||
if last.route_name:
|
||||
left.add_row("route", Text(last.route_name, style="yellow"))
|
||||
left.add_row("status", _status_text(last.status_code))
|
||||
tokens = Text()
|
||||
tokens.append(_fmt_tokens(last.prompt_tokens))
|
||||
tokens.append(" in", style="dim")
|
||||
tokens.append(" · ", style="dim")
|
||||
tokens.append(_fmt_tokens(last.completion_tokens), style="green")
|
||||
tokens.append(" out", style="dim")
|
||||
if last.cached_input_tokens:
|
||||
tokens.append(" · ", style="dim")
|
||||
tokens.append(_fmt_tokens(last.cached_input_tokens), style="yellow")
|
||||
tokens.append(" cached", style="dim")
|
||||
left.add_row("tokens", tokens)
|
||||
timing = Text()
|
||||
timing.append("TTFT ", style="dim")
|
||||
timing.append(_fmt_ms(last.ttft_ms), style=_ttft_style(last.ttft_ms))
|
||||
timing.append(" · ", style="dim")
|
||||
timing.append("lat ", style="dim")
|
||||
timing.append(_fmt_ms(last.duration_ms), style=_latency_style(last.duration_ms))
|
||||
tps = last.tokens_per_sec
|
||||
if tps:
|
||||
timing.append(" · ", style="dim")
|
||||
timing.append(_fmt_tps(tps), style="green")
|
||||
left.add_row("timing", timing)
|
||||
left.add_row("cost", Text(_fmt_cost(last.cost_usd), style="green bold"))
|
||||
|
||||
# Right: lifetime totals.
|
||||
right = Table.grid(padding=(0, 1))
|
||||
right.add_column(style="dim", no_wrap=True)
|
||||
right.add_column(no_wrap=True)
|
||||
right.add_row(
|
||||
"requests",
|
||||
Text(f"{stats.count:,}", style="bold"),
|
||||
)
|
||||
if stats.error_count:
|
||||
err_text = Text()
|
||||
err_text.append(f"{stats.error_count:,}", style="red bold")
|
||||
parts: list[str] = []
|
||||
if stats.errors_4xx:
|
||||
parts.append(f"{stats.errors_4xx} 4xx")
|
||||
if stats.errors_5xx:
|
||||
parts.append(f"{stats.errors_5xx} 5xx")
|
||||
if parts:
|
||||
err_text.append(f" ({' · '.join(parts)})", style="dim")
|
||||
right.add_row("errors", err_text)
|
||||
cost_str = _fmt_cost(stats.total_cost_usd) if stats.has_cost else "—"
|
||||
right.add_row("total cost", Text(cost_str, style="green bold"))
|
||||
tokens_total = Text()
|
||||
tokens_total.append(_fmt_tokens(stats.total_input_tokens))
|
||||
tokens_total.append(" in", style="dim")
|
||||
tokens_total.append(" · ", style="dim")
|
||||
tokens_total.append(_fmt_tokens(stats.total_output_tokens), style="green")
|
||||
tokens_total.append(" out", style="dim")
|
||||
right.add_row("tokens", tokens_total)
|
||||
lat_text = Text()
|
||||
lat_text.append("p50 ", style="dim")
|
||||
lat_text.append(
|
||||
_fmt_ms(stats.p50_latency_ms), style=_latency_style(stats.p50_latency_ms)
|
||||
)
|
||||
lat_text.append(" · ", style="dim")
|
||||
lat_text.append("p95 ", style="dim")
|
||||
lat_text.append(
|
||||
_fmt_ms(stats.p95_latency_ms), style=_latency_style(stats.p95_latency_ms)
|
||||
)
|
||||
lat_text.append(" · ", style="dim")
|
||||
lat_text.append("p99 ", style="dim")
|
||||
lat_text.append(
|
||||
_fmt_ms(stats.p99_latency_ms), style=_latency_style(stats.p99_latency_ms)
|
||||
)
|
||||
right.add_row("latency", lat_text)
|
||||
ttft_text = Text()
|
||||
ttft_text.append("p50 ", style="dim")
|
||||
ttft_text.append(_fmt_ms(stats.p50_ttft_ms), style=_ttft_style(stats.p50_ttft_ms))
|
||||
ttft_text.append(" · ", style="dim")
|
||||
ttft_text.append("p95 ", style="dim")
|
||||
ttft_text.append(_fmt_ms(stats.p95_ttft_ms), style=_ttft_style(stats.p95_ttft_ms))
|
||||
ttft_text.append(" · ", style="dim")
|
||||
ttft_text.append("p99 ", style="dim")
|
||||
ttft_text.append(_fmt_ms(stats.p99_ttft_ms), style=_ttft_style(stats.p99_ttft_ms))
|
||||
right.add_row("TTFT", ttft_text)
|
||||
sess = Text()
|
||||
sess.append(f"{stats.distinct_sessions}")
|
||||
if stats.current_session:
|
||||
sess.append(" · current ", style="dim")
|
||||
sess.append(stats.current_session, style="magenta")
|
||||
right.add_row("sessions", sess)
|
||||
|
||||
grid.add_row(left, right)
|
||||
return Panel(
|
||||
grid,
|
||||
title="[bold]live LLM traffic[/]",
|
||||
border_style="cyan",
|
||||
box=SIMPLE_HEAVY,
|
||||
padding=(0, 1),
|
||||
)
|
||||
|
||||
|
||||
def _model_rollup_table(rollups: list[ModelRollup]) -> Table:
|
||||
table = Table(
|
||||
title="by model",
|
||||
title_justify="left",
|
||||
title_style="bold dim",
|
||||
caption="cost via DigitalOcean Gradient catalog",
|
||||
caption_justify="left",
|
||||
caption_style="dim italic",
|
||||
box=SIMPLE,
|
||||
header_style="bold",
|
||||
pad_edge=False,
|
||||
padding=(0, 1),
|
||||
)
|
||||
table.add_column("model", style="cyan", no_wrap=True)
|
||||
table.add_column("req", justify="right")
|
||||
table.add_column("input", justify="right")
|
||||
table.add_column("output", justify="right", style="green")
|
||||
table.add_column("cache wr", justify="right", style="yellow")
|
||||
table.add_column("cache rd", justify="right", style="yellow")
|
||||
table.add_column("tok/s", justify="right")
|
||||
table.add_column("cost", justify="right", style="green")
|
||||
if not rollups:
|
||||
table.add_row(
|
||||
Text("no requests yet", style="dim italic"),
|
||||
*(["—"] * 7),
|
||||
)
|
||||
return table
|
||||
for r in rollups:
|
||||
cost_cell = _fmt_cost(r.cost_usd) if r.has_cost else "—"
|
||||
table.add_row(
|
||||
_truncate_model(r.model),
|
||||
f"{r.requests:,}",
|
||||
_fmt_tokens(r.input_tokens),
|
||||
_fmt_tokens(r.output_tokens),
|
||||
_fmt_int(r.cache_write),
|
||||
_fmt_int(r.cache_read),
|
||||
_fmt_tps(r.avg_tokens_per_sec),
|
||||
cost_cell,
|
||||
)
|
||||
return table
|
||||
|
||||
|
||||
def _route_hit_table(hits: list[RouteHit]) -> Table:
|
||||
table = Table(
|
||||
title="route share",
|
||||
title_justify="left",
|
||||
title_style="bold dim",
|
||||
box=SIMPLE,
|
||||
header_style="bold",
|
||||
pad_edge=False,
|
||||
padding=(0, 1),
|
||||
)
|
||||
table.add_column("route", style="cyan")
|
||||
table.add_column("hits", justify="right")
|
||||
table.add_column("%", justify="right")
|
||||
table.add_column("p95", justify="right")
|
||||
table.add_column("err", justify="right")
|
||||
for h in hits:
|
||||
err_cell = (
|
||||
Text(f"{h.error_count:,}", style="red bold") if h.error_count else "—"
|
||||
)
|
||||
table.add_row(
|
||||
h.route,
|
||||
f"{h.hits:,}",
|
||||
f"{h.pct:5.1f}%",
|
||||
Text(_fmt_ms(h.p95_latency_ms), style=_latency_style(h.p95_latency_ms)),
|
||||
err_cell,
|
||||
)
|
||||
return table
|
||||
|
||||
|
||||
def _recent_table(calls: list[LLMCall], limit: int = 15) -> Table:
|
||||
show_route = any(c.route_name for c in calls)
|
||||
show_cache = any((c.cached_input_tokens or 0) > 0 for c in calls)
|
||||
show_rsn = any((c.reasoning_tokens or 0) > 0 for c in calls)
|
||||
|
||||
caption_parts = ["in·new = fresh prompt tokens"]
|
||||
if show_cache:
|
||||
caption_parts.append("in·cache = cached read")
|
||||
if show_rsn:
|
||||
caption_parts.append("rsn = reasoning")
|
||||
caption_parts.append("lat = total latency")
|
||||
|
||||
table = Table(
|
||||
title=f"recent · last {min(limit, len(calls)) if calls else 0}",
|
||||
title_justify="left",
|
||||
title_style="bold dim",
|
||||
caption=" · ".join(caption_parts),
|
||||
caption_justify="left",
|
||||
caption_style="dim italic",
|
||||
box=SIMPLE,
|
||||
header_style="bold",
|
||||
pad_edge=False,
|
||||
padding=(0, 1),
|
||||
)
|
||||
table.add_column("time", no_wrap=True)
|
||||
table.add_column("model", style="cyan", no_wrap=True)
|
||||
if show_route:
|
||||
table.add_column("route", style="yellow", no_wrap=True)
|
||||
table.add_column("in·new", justify="right")
|
||||
if show_cache:
|
||||
table.add_column("in·cache", justify="right", style="yellow")
|
||||
table.add_column("out", justify="right", style="green")
|
||||
if show_rsn:
|
||||
table.add_column("rsn", justify="right")
|
||||
table.add_column("tok/s", justify="right")
|
||||
table.add_column("TTFT", justify="right")
|
||||
table.add_column("lat", justify="right")
|
||||
table.add_column("cost", justify="right", style="green")
|
||||
table.add_column("status")
|
||||
|
||||
if not calls:
|
||||
cols = len(table.columns)
|
||||
table.add_row(
|
||||
Text("waiting for spans…", style="dim italic"),
|
||||
*(["—"] * (cols - 1)),
|
||||
)
|
||||
return table
|
||||
|
||||
recent = list(reversed(calls))[:limit]
|
||||
for idx, c in enumerate(recent):
|
||||
is_newest = idx == 0
|
||||
time_style = "bold white" if is_newest else None
|
||||
model_style = "bold cyan" if is_newest else "cyan"
|
||||
row: list[object] = [
|
||||
(
|
||||
Text(c.timestamp.strftime("%H:%M:%S"), style=time_style)
|
||||
if time_style
|
||||
else c.timestamp.strftime("%H:%M:%S")
|
||||
),
|
||||
Text(_truncate_model(c.model), style=model_style),
|
||||
]
|
||||
if show_route:
|
||||
row.append(c.route_name or "—")
|
||||
row.append(_fmt_tokens(c.prompt_tokens))
|
||||
if show_cache:
|
||||
row.append(_fmt_int(c.cached_input_tokens))
|
||||
row.append(_fmt_tokens(c.completion_tokens))
|
||||
if show_rsn:
|
||||
row.append(_fmt_int(c.reasoning_tokens))
|
||||
row.extend(
|
||||
[
|
||||
_fmt_tps(c.tokens_per_sec),
|
||||
Text(_fmt_ms(c.ttft_ms), style=_ttft_style(c.ttft_ms)),
|
||||
Text(_fmt_ms(c.duration_ms), style=_latency_style(c.duration_ms)),
|
||||
_fmt_cost(c.cost_usd),
|
||||
_status_text(c.status_code),
|
||||
]
|
||||
)
|
||||
table.add_row(*row)
|
||||
return table
|
||||
|
||||
|
||||
def _last_error(calls: list[LLMCall]) -> LLMCall | None:
|
||||
for c in reversed(calls):
|
||||
if c.status_code is not None and c.status_code >= 400:
|
||||
return c
|
||||
return None
|
||||
|
||||
|
||||
def _http_reason(code: int) -> str:
|
||||
try:
|
||||
return HTTPStatus(code).phrase
|
||||
except ValueError:
|
||||
return ""
|
||||
|
||||
|
||||
def _fmt_ago(ts: datetime) -> str:
|
||||
# `ts` is produced in collector.py via datetime.now(tz=...), but fall back
|
||||
# gracefully if a naive timestamp ever sneaks in.
|
||||
now = datetime.now(tz=ts.tzinfo) if ts.tzinfo else datetime.now()
|
||||
delta = (now - ts).total_seconds()
|
||||
if delta < 0:
|
||||
delta = 0
|
||||
if delta < 60:
|
||||
return f"{int(delta)}s ago"
|
||||
if delta < 3600:
|
||||
return f"{int(delta // 60)}m ago"
|
||||
return f"{int(delta // 3600)}h ago"
|
||||
|
||||
|
||||
def _error_banner(call: LLMCall) -> Panel:
|
||||
code = call.status_code or 0
|
||||
border = "red" if code >= 500 else "yellow"
|
||||
header = Text()
|
||||
header.append(f"● {code}", style=f"{border} bold")
|
||||
reason = _http_reason(code)
|
||||
if reason:
|
||||
header.append(f" {reason}", style=border)
|
||||
header.append(" · ", style="dim")
|
||||
header.append(_truncate_model(call.model, 48), style="cyan")
|
||||
if call.route_name:
|
||||
header.append(" · ", style="dim")
|
||||
header.append(call.route_name, style="yellow")
|
||||
header.append(" · ", style="dim")
|
||||
header.append(_fmt_ago(call.timestamp), style="dim")
|
||||
if call.request_id:
|
||||
header.append(" · req ", style="dim")
|
||||
header.append(call.request_id, style="magenta")
|
||||
return Panel(
|
||||
header,
|
||||
title="[bold]last error[/]",
|
||||
title_align="left",
|
||||
border_style=border,
|
||||
box=SIMPLE,
|
||||
padding=(0, 1),
|
||||
)
|
||||
|
||||
|
||||
def _footer(stats: AggregateStats) -> Text:
|
||||
waiting = stats.count == 0
|
||||
text = Text()
|
||||
text.append("Ctrl-C ", style="bold")
|
||||
text.append("exit", style="dim")
|
||||
text.append(" · OTLP :4317", style="dim")
|
||||
text.append(" · pricing: DigitalOcean ", style="dim")
|
||||
if waiting:
|
||||
text.append("waiting for spans", style="yellow")
|
||||
text.append(
|
||||
" — set tracing.opentracing_grpc_endpoint=localhost:4317", style="dim"
|
||||
)
|
||||
else:
|
||||
text.append(f"receiving · {stats.count:,} call(s) buffered", style="green")
|
||||
return text
|
||||
|
||||
|
||||
def render(calls: list[LLMCall]) -> Align:
|
||||
last = calls[-1] if calls else None
|
||||
stats = aggregates(calls)
|
||||
rollups = model_rollups(calls)
|
||||
hits = route_hits(calls)
|
||||
|
||||
parts: list[object] = [_summary_panel(last, stats)]
|
||||
err = _last_error(calls)
|
||||
if err is not None:
|
||||
parts.append(_error_banner(err))
|
||||
if hits:
|
||||
split = Table.grid(padding=(0, 2))
|
||||
split.add_column(no_wrap=False)
|
||||
split.add_column(no_wrap=False)
|
||||
split.add_row(_model_rollup_table(rollups), _route_hit_table(hits))
|
||||
parts.append(split)
|
||||
else:
|
||||
parts.append(_model_rollup_table(rollups))
|
||||
parts.append(_recent_table(calls))
|
||||
parts.append(_footer(stats))
|
||||
# Cap overall width so wide terminals don't stretch the layout into a
|
||||
# mostly-whitespace gap between columns.
|
||||
return Align.left(Group(*parts), width=MAX_WIDTH)
|
||||
99
cli/planoai/obs_cmd.py
Normal file
99
cli/planoai/obs_cmd.py
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
"""`planoai obs` — live observability TUI."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
|
||||
import rich_click as click
|
||||
from rich.console import Console
|
||||
from rich.live import Live
|
||||
|
||||
from planoai.consts import PLANO_COLOR
|
||||
from planoai.obs.collector import (
|
||||
DEFAULT_CAPACITY,
|
||||
DEFAULT_GRPC_PORT,
|
||||
LLMCallStore,
|
||||
ObsCollector,
|
||||
)
|
||||
from planoai.obs.pricing import PricingCatalog
|
||||
from planoai.obs.render import render
|
||||
|
||||
|
||||
@click.command(name="obs", help="Live observability console for Plano LLM traffic.")
|
||||
@click.option(
|
||||
"--port",
|
||||
type=int,
|
||||
default=DEFAULT_GRPC_PORT,
|
||||
show_default=True,
|
||||
help="OTLP/gRPC port to listen on. Must match the brightstaff tracing endpoint.",
|
||||
)
|
||||
@click.option(
|
||||
"--host",
|
||||
type=str,
|
||||
default="0.0.0.0",
|
||||
show_default=True,
|
||||
help="Host to bind the OTLP listener.",
|
||||
)
|
||||
@click.option(
|
||||
"--capacity",
|
||||
type=int,
|
||||
default=DEFAULT_CAPACITY,
|
||||
show_default=True,
|
||||
help="Max LLM calls kept in memory; older calls evicted FIFO.",
|
||||
)
|
||||
@click.option(
|
||||
"--refresh-ms",
|
||||
type=int,
|
||||
default=500,
|
||||
show_default=True,
|
||||
help="TUI refresh interval.",
|
||||
)
|
||||
def obs(port: int, host: str, capacity: int, refresh_ms: int) -> None:
|
||||
console = Console()
|
||||
console.print(
|
||||
f"[bold {PLANO_COLOR}]planoai obs[/] — loading DO pricing catalog...",
|
||||
end="",
|
||||
)
|
||||
pricing = PricingCatalog.fetch()
|
||||
if len(pricing):
|
||||
sample = ", ".join(pricing.sample_models(3))
|
||||
console.print(
|
||||
f" [green]{len(pricing)} models loaded[/] [dim]({sample}, ...)[/]"
|
||||
)
|
||||
else:
|
||||
console.print(
|
||||
" [yellow]no pricing loaded[/] — "
|
||||
"[dim]cost column will be blank (DO catalog unreachable)[/]"
|
||||
)
|
||||
|
||||
store = LLMCallStore(capacity=capacity)
|
||||
collector = ObsCollector(store=store, pricing=pricing, host=host, port=port)
|
||||
try:
|
||||
collector.start()
|
||||
except OSError as exc:
|
||||
console.print(f"[red]{exc}[/]")
|
||||
raise SystemExit(1)
|
||||
|
||||
console.print(
|
||||
f"Listening for OTLP spans on [bold]{host}:{port}[/]. "
|
||||
"Ensure plano config has [cyan]tracing.opentracing_grpc_endpoint: http://localhost:4317[/] "
|
||||
"and [cyan]tracing.random_sampling: 100[/] (or run [bold]planoai up[/] "
|
||||
"with no config — it wires this automatically)."
|
||||
)
|
||||
console.print("Press [bold]Ctrl-C[/] to exit.\n")
|
||||
|
||||
refresh = max(0.05, refresh_ms / 1000.0)
|
||||
try:
|
||||
with Live(
|
||||
render(store.snapshot()),
|
||||
console=console,
|
||||
refresh_per_second=1.0 / refresh,
|
||||
screen=False,
|
||||
) as live:
|
||||
while True:
|
||||
time.sleep(refresh)
|
||||
live.update(render(store.snapshot()))
|
||||
except KeyboardInterrupt:
|
||||
console.print("\n[dim]obs stopped[/]")
|
||||
finally:
|
||||
collector.stop()
|
||||
|
|
@ -61,7 +61,7 @@ def configure_rich_click(plano_color: str) -> None:
|
|||
},
|
||||
{
|
||||
"name": "Observability",
|
||||
"commands": ["trace"],
|
||||
"commands": ["trace", "obs"],
|
||||
},
|
||||
{
|
||||
"name": "Utilities",
|
||||
|
|
|
|||
|
|
@ -91,7 +91,12 @@ def convert_legacy_listeners(
|
|||
"type": "model",
|
||||
"port": 12000,
|
||||
"address": "0.0.0.0",
|
||||
"timeout": "30s",
|
||||
# LLM streaming responses routinely exceed 30s (extended thinking,
|
||||
# long tool reasoning, large completions). Match the 300s ceiling
|
||||
# used by the direct upstream-provider routes so Envoy doesn't
|
||||
# abort streams with UT mid-response. Users can override via their
|
||||
# plano_config.yaml `listeners.timeout` field.
|
||||
"timeout": "300s",
|
||||
"model_providers": model_providers or [],
|
||||
}
|
||||
|
||||
|
|
@ -100,7 +105,7 @@ def convert_legacy_listeners(
|
|||
"type": "prompt",
|
||||
"port": 10000,
|
||||
"address": "0.0.0.0",
|
||||
"timeout": "30s",
|
||||
"timeout": "300s",
|
||||
}
|
||||
|
||||
# Handle None case
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
[project]
|
||||
name = "planoai"
|
||||
version = "0.4.19"
|
||||
version = "0.4.20"
|
||||
description = "Python-based CLI tool to manage Plano."
|
||||
authors = [{name = "Katanemo Labs, Inc."}]
|
||||
readme = "README.md"
|
||||
|
|
|
|||
86
cli/test/test_defaults.py
Normal file
86
cli/test/test_defaults.py
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
from pathlib import Path
|
||||
|
||||
import jsonschema
|
||||
import yaml
|
||||
|
||||
from planoai.defaults import (
|
||||
PROVIDER_DEFAULTS,
|
||||
detect_providers,
|
||||
synthesize_default_config,
|
||||
)
|
||||
|
||||
_SCHEMA_PATH = Path(__file__).parents[2] / "config" / "plano_config_schema.yaml"
|
||||
|
||||
|
||||
def _schema() -> dict:
|
||||
return yaml.safe_load(_SCHEMA_PATH.read_text())
|
||||
|
||||
|
||||
def test_zero_env_vars_produces_pure_passthrough():
|
||||
cfg = synthesize_default_config(env={})
|
||||
assert cfg["version"] == "v0.4.0"
|
||||
assert cfg["listeners"][0]["port"] == 12000
|
||||
for provider in cfg["model_providers"]:
|
||||
assert provider.get("passthrough_auth") is True
|
||||
assert "access_key" not in provider
|
||||
# No provider should be marked default in pure pass-through mode.
|
||||
assert provider.get("default") is not True
|
||||
# All known providers should be listed.
|
||||
names = {p["name"] for p in cfg["model_providers"]}
|
||||
assert "digitalocean" in names
|
||||
assert "openai" in names
|
||||
assert "anthropic" in names
|
||||
|
||||
|
||||
def test_env_keys_promote_providers_to_env_keyed():
|
||||
cfg = synthesize_default_config(
|
||||
env={"OPENAI_API_KEY": "sk-1", "DO_API_KEY": "do-1"}
|
||||
)
|
||||
by_name = {p["name"]: p for p in cfg["model_providers"]}
|
||||
assert by_name["openai"].get("access_key") == "$OPENAI_API_KEY"
|
||||
assert by_name["openai"].get("passthrough_auth") is None
|
||||
assert by_name["digitalocean"].get("access_key") == "$DO_API_KEY"
|
||||
# Unset env keys remain pass-through.
|
||||
assert by_name["anthropic"].get("passthrough_auth") is True
|
||||
|
||||
|
||||
def test_no_default_is_synthesized():
|
||||
# Bare model names resolve via brightstaff's wildcard expansion registering
|
||||
# bare keys, so the synthesizer intentionally never sets `default: true`.
|
||||
cfg = synthesize_default_config(
|
||||
env={"OPENAI_API_KEY": "sk-1", "ANTHROPIC_API_KEY": "a-1"}
|
||||
)
|
||||
assert not any(p.get("default") is True for p in cfg["model_providers"])
|
||||
|
||||
|
||||
def test_listener_port_is_configurable():
|
||||
cfg = synthesize_default_config(env={}, listener_port=11000)
|
||||
assert cfg["listeners"][0]["port"] == 11000
|
||||
|
||||
|
||||
def test_detection_summary_strings():
|
||||
det = detect_providers(env={"OPENAI_API_KEY": "sk", "DO_API_KEY": "d"})
|
||||
summary = det.summary
|
||||
assert "env-keyed" in summary and "openai" in summary and "digitalocean" in summary
|
||||
assert "pass-through" in summary
|
||||
|
||||
|
||||
def test_tracing_block_points_at_local_console():
|
||||
cfg = synthesize_default_config(env={})
|
||||
tracing = cfg["tracing"]
|
||||
assert tracing["opentracing_grpc_endpoint"] == "http://localhost:4317"
|
||||
# random_sampling is a percentage in the plano config — 100 = every span.
|
||||
assert tracing["random_sampling"] == 100
|
||||
|
||||
|
||||
def test_synthesized_config_validates_against_schema():
|
||||
cfg = synthesize_default_config(env={"OPENAI_API_KEY": "sk"})
|
||||
jsonschema.validate(cfg, _schema())
|
||||
|
||||
|
||||
def test_provider_defaults_digitalocean_is_configured():
|
||||
by_name = {p.name: p for p in PROVIDER_DEFAULTS}
|
||||
assert "digitalocean" in by_name
|
||||
assert by_name["digitalocean"].env_var == "DO_API_KEY"
|
||||
assert by_name["digitalocean"].base_url == "https://inference.do-ai.run/v1"
|
||||
assert by_name["digitalocean"].model_pattern == "digitalocean/*"
|
||||
145
cli/test/test_obs_collector.py
Normal file
145
cli/test/test_obs_collector.py
Normal file
|
|
@ -0,0 +1,145 @@
|
|||
import time
|
||||
from datetime import datetime, timezone
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from planoai.obs.collector import LLMCall, LLMCallStore, span_to_llm_call
|
||||
|
||||
|
||||
def _mk_attr(key: str, value):
|
||||
v = MagicMock()
|
||||
if isinstance(value, bool):
|
||||
v.WhichOneof.return_value = "bool_value"
|
||||
v.bool_value = value
|
||||
elif isinstance(value, int):
|
||||
v.WhichOneof.return_value = "int_value"
|
||||
v.int_value = value
|
||||
elif isinstance(value, float):
|
||||
v.WhichOneof.return_value = "double_value"
|
||||
v.double_value = value
|
||||
else:
|
||||
v.WhichOneof.return_value = "string_value"
|
||||
v.string_value = str(value)
|
||||
kv = MagicMock()
|
||||
kv.key = key
|
||||
kv.value = v
|
||||
return kv
|
||||
|
||||
|
||||
def _mk_span(
|
||||
attrs: dict, start_ns: int | None = None, span_id_hex: str = "ab"
|
||||
) -> MagicMock:
|
||||
span = MagicMock()
|
||||
span.attributes = [_mk_attr(k, v) for k, v in attrs.items()]
|
||||
span.start_time_unix_nano = start_ns or int(time.time() * 1_000_000_000)
|
||||
span.span_id.hex.return_value = span_id_hex
|
||||
return span
|
||||
|
||||
|
||||
def test_span_without_llm_model_is_ignored():
|
||||
span = _mk_span({"http.method": "POST"})
|
||||
assert span_to_llm_call(span, "plano(llm)") is None
|
||||
|
||||
|
||||
def test_span_with_full_llm_attrs_produces_call():
|
||||
span = _mk_span(
|
||||
{
|
||||
"llm.model": "openai-gpt-5.4",
|
||||
"model.requested": "router:software-engineering",
|
||||
"plano.session_id": "sess-abc",
|
||||
"plano.route.name": "software-engineering",
|
||||
"llm.is_streaming": False,
|
||||
"llm.duration_ms": 1234,
|
||||
"llm.time_to_first_token": 210,
|
||||
"llm.usage.prompt_tokens": 100,
|
||||
"llm.usage.completion_tokens": 50,
|
||||
"llm.usage.total_tokens": 150,
|
||||
"llm.usage.cached_input_tokens": 30,
|
||||
"llm.usage.cache_creation_tokens": 5,
|
||||
"llm.usage.reasoning_tokens": 200,
|
||||
"http.status_code": 200,
|
||||
"request_id": "req-42",
|
||||
}
|
||||
)
|
||||
call = span_to_llm_call(span, "plano(llm)")
|
||||
assert call is not None
|
||||
assert call.request_id == "req-42"
|
||||
assert call.model == "openai-gpt-5.4"
|
||||
assert call.request_model == "router:software-engineering"
|
||||
assert call.session_id == "sess-abc"
|
||||
assert call.route_name == "software-engineering"
|
||||
assert call.is_streaming is False
|
||||
assert call.duration_ms == 1234.0
|
||||
assert call.ttft_ms == 210.0
|
||||
assert call.prompt_tokens == 100
|
||||
assert call.completion_tokens == 50
|
||||
assert call.total_tokens == 150
|
||||
assert call.cached_input_tokens == 30
|
||||
assert call.cache_creation_tokens == 5
|
||||
assert call.reasoning_tokens == 200
|
||||
assert call.status_code == 200
|
||||
|
||||
|
||||
def test_pricing_lookup_attaches_cost():
|
||||
class StubPricing:
|
||||
def cost_for_call(self, call):
|
||||
# Simple: 2 * prompt + 3 * completion, in cents
|
||||
return 0.02 * (call.prompt_tokens or 0) + 0.03 * (
|
||||
call.completion_tokens or 0
|
||||
)
|
||||
|
||||
span = _mk_span(
|
||||
{
|
||||
"llm.model": "do/openai-gpt-5.4",
|
||||
"llm.usage.prompt_tokens": 10,
|
||||
"llm.usage.completion_tokens": 2,
|
||||
}
|
||||
)
|
||||
call = span_to_llm_call(span, "plano(llm)", pricing=StubPricing())
|
||||
assert call is not None
|
||||
assert call.cost_usd == pytest.approx(0.26)
|
||||
|
||||
|
||||
def test_tpt_and_tokens_per_sec_derived():
|
||||
call = LLMCall(
|
||||
request_id="x",
|
||||
timestamp=datetime.now(tz=timezone.utc),
|
||||
model="m",
|
||||
duration_ms=1000,
|
||||
ttft_ms=200,
|
||||
completion_tokens=80,
|
||||
)
|
||||
# (1000 - 200) / 80 = 10ms per token => 100 tokens/sec
|
||||
assert call.tpt_ms == 10.0
|
||||
assert call.tokens_per_sec == 100.0
|
||||
|
||||
|
||||
def test_tpt_returns_none_when_no_completion_tokens():
|
||||
call = LLMCall(
|
||||
request_id="x",
|
||||
timestamp=datetime.now(tz=timezone.utc),
|
||||
model="m",
|
||||
duration_ms=1000,
|
||||
ttft_ms=200,
|
||||
completion_tokens=0,
|
||||
)
|
||||
assert call.tpt_ms is None
|
||||
assert call.tokens_per_sec is None
|
||||
|
||||
|
||||
def test_store_evicts_fifo_at_capacity():
|
||||
store = LLMCallStore(capacity=3)
|
||||
now = datetime.now(tz=timezone.utc)
|
||||
for i in range(5):
|
||||
store.add(
|
||||
LLMCall(
|
||||
request_id=f"r{i}",
|
||||
timestamp=now,
|
||||
model="m",
|
||||
)
|
||||
)
|
||||
snap = store.snapshot()
|
||||
assert len(snap) == 3
|
||||
assert [c.request_id for c in snap] == ["r2", "r3", "r4"]
|
||||
146
cli/test/test_obs_pricing.py
Normal file
146
cli/test/test_obs_pricing.py
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
from datetime import datetime, timezone
|
||||
|
||||
from planoai.obs.collector import LLMCall
|
||||
from planoai.obs.pricing import ModelPrice, PricingCatalog
|
||||
|
||||
|
||||
def _call(model: str, prompt: int, completion: int, cached: int = 0) -> LLMCall:
|
||||
return LLMCall(
|
||||
request_id="r",
|
||||
timestamp=datetime.now(tz=timezone.utc),
|
||||
model=model,
|
||||
prompt_tokens=prompt,
|
||||
completion_tokens=completion,
|
||||
cached_input_tokens=cached,
|
||||
)
|
||||
|
||||
|
||||
def test_lookup_matches_bare_and_prefixed():
|
||||
prices = {
|
||||
"openai-gpt-5.4": ModelPrice(
|
||||
input_per_token_usd=0.000001, output_per_token_usd=0.000002
|
||||
)
|
||||
}
|
||||
catalog = PricingCatalog(prices)
|
||||
assert catalog.price_for("openai-gpt-5.4") is not None
|
||||
# do/openai-gpt-5.4 should resolve after stripping the provider prefix.
|
||||
assert catalog.price_for("do/openai-gpt-5.4") is not None
|
||||
assert catalog.price_for("unknown-model") is None
|
||||
|
||||
|
||||
def test_cost_computation_without_cache():
|
||||
prices = {
|
||||
"m": ModelPrice(input_per_token_usd=0.000001, output_per_token_usd=0.000002)
|
||||
}
|
||||
cost = PricingCatalog(prices).cost_for_call(_call("m", 1000, 500))
|
||||
assert cost == 0.002 # 1000 * 1e-6 + 500 * 2e-6
|
||||
|
||||
|
||||
def test_cost_computation_with_cached_discount():
|
||||
prices = {
|
||||
"m": ModelPrice(
|
||||
input_per_token_usd=0.000001,
|
||||
output_per_token_usd=0.000002,
|
||||
cached_input_per_token_usd=0.0000001,
|
||||
)
|
||||
}
|
||||
# 800 fresh @ 1e-6 = 8e-4; 200 cached @ 1e-7 = 2e-5; 500 out @ 2e-6 = 1e-3
|
||||
cost = PricingCatalog(prices).cost_for_call(_call("m", 1000, 500, cached=200))
|
||||
assert cost == round(0.0008 + 0.00002 + 0.001, 6)
|
||||
|
||||
|
||||
def test_empty_catalog_returns_none():
|
||||
assert PricingCatalog().cost_for_call(_call("m", 100, 50)) is None
|
||||
|
||||
|
||||
def test_parse_do_catalog_treats_small_values_as_per_token():
|
||||
"""DO's real catalog uses per-token values under the `_per_million` key
|
||||
(e.g. 5E-8 for GPT-oss-20b). We treat values < 1 as already per-token."""
|
||||
from planoai.obs.pricing import _parse_do_pricing
|
||||
|
||||
sample = {
|
||||
"data": [
|
||||
{
|
||||
"model_id": "openai-gpt-oss-20b",
|
||||
"pricing": {
|
||||
"input_price_per_million": 5e-8,
|
||||
"output_price_per_million": 4.5e-7,
|
||||
},
|
||||
},
|
||||
{
|
||||
"model_id": "openai-gpt-oss-120b",
|
||||
"pricing": {
|
||||
"input_price_per_million": 1e-7,
|
||||
"output_price_per_million": 7e-7,
|
||||
},
|
||||
},
|
||||
]
|
||||
}
|
||||
prices = _parse_do_pricing(sample)
|
||||
# Values < 1 are assumed to already be per-token — no extra division.
|
||||
assert prices["openai-gpt-oss-20b"].input_per_token_usd == 5e-8
|
||||
assert prices["openai-gpt-oss-20b"].output_per_token_usd == 4.5e-7
|
||||
assert prices["openai-gpt-oss-120b"].input_per_token_usd == 1e-7
|
||||
|
||||
|
||||
def test_anthropic_aliases_match_plano_emitted_names():
|
||||
"""DO publishes 'anthropic-claude-opus-4.7' and 'anthropic-claude-haiku-4.5';
|
||||
Plano emits 'claude-opus-4-7' and 'claude-haiku-4-5-20251001'. Aliases
|
||||
registered at parse time should bridge the gap."""
|
||||
from planoai.obs.pricing import _parse_do_pricing
|
||||
|
||||
sample = {
|
||||
"data": [
|
||||
{
|
||||
"model_id": "anthropic-claude-opus-4.7",
|
||||
"pricing": {
|
||||
"input_price_per_million": 15.0,
|
||||
"output_price_per_million": 75.0,
|
||||
},
|
||||
},
|
||||
{
|
||||
"model_id": "anthropic-claude-haiku-4.5",
|
||||
"pricing": {
|
||||
"input_price_per_million": 1.0,
|
||||
"output_price_per_million": 5.0,
|
||||
},
|
||||
},
|
||||
{
|
||||
"model_id": "anthropic-claude-4.6-sonnet",
|
||||
"pricing": {
|
||||
"input_price_per_million": 3.0,
|
||||
"output_price_per_million": 15.0,
|
||||
},
|
||||
},
|
||||
]
|
||||
}
|
||||
catalog = PricingCatalog(_parse_do_pricing(sample))
|
||||
# Family-last shapes Plano emits.
|
||||
assert catalog.price_for("claude-opus-4-7") is not None
|
||||
assert catalog.price_for("claude-haiku-4-5") is not None
|
||||
# Date-suffixed name (Anthropic API style).
|
||||
assert catalog.price_for("claude-haiku-4-5-20251001") is not None
|
||||
# Word-order swap: DO has 'claude-4.6-sonnet', Plano emits 'claude-sonnet-4-6'.
|
||||
assert catalog.price_for("claude-sonnet-4-6") is not None
|
||||
# Original DO ids still resolve.
|
||||
assert catalog.price_for("anthropic-claude-opus-4.7") is not None
|
||||
|
||||
|
||||
def test_parse_do_catalog_divides_large_values_as_per_million():
|
||||
"""A provider that genuinely reports $5-per-million in that field gets divided."""
|
||||
from planoai.obs.pricing import _parse_do_pricing
|
||||
|
||||
sample = {
|
||||
"data": [
|
||||
{
|
||||
"model_id": "mystery-model",
|
||||
"pricing": {
|
||||
"input_price_per_million": 5.0, # > 1 → treated as per-million
|
||||
"output_price_per_million": 15.0,
|
||||
},
|
||||
},
|
||||
]
|
||||
}
|
||||
prices = _parse_do_pricing(sample)
|
||||
assert prices["mystery-model"].input_per_token_usd == 5.0 / 1_000_000
|
||||
assert prices["mystery-model"].output_per_token_usd == 15.0 / 1_000_000
|
||||
106
cli/test/test_obs_render.py
Normal file
106
cli/test/test_obs_render.py
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
from planoai.obs.collector import LLMCall
|
||||
from planoai.obs.render import aggregates, model_rollups, route_hits
|
||||
|
||||
|
||||
def _call(
|
||||
model: str,
|
||||
ts: datetime,
|
||||
prompt=0,
|
||||
completion=0,
|
||||
cost=None,
|
||||
route=None,
|
||||
session=None,
|
||||
cache_read=0,
|
||||
cache_write=0,
|
||||
):
|
||||
return LLMCall(
|
||||
request_id="r",
|
||||
timestamp=ts,
|
||||
model=model,
|
||||
prompt_tokens=prompt,
|
||||
completion_tokens=completion,
|
||||
cached_input_tokens=cache_read,
|
||||
cache_creation_tokens=cache_write,
|
||||
cost_usd=cost,
|
||||
route_name=route,
|
||||
session_id=session,
|
||||
)
|
||||
|
||||
|
||||
def test_aggregates_sum_and_session_counts():
|
||||
now = datetime.now(tz=timezone.utc).astimezone()
|
||||
calls = [
|
||||
_call(
|
||||
"m1",
|
||||
now - timedelta(seconds=50),
|
||||
prompt=10,
|
||||
completion=5,
|
||||
cost=0.001,
|
||||
session="s1",
|
||||
),
|
||||
_call(
|
||||
"m2",
|
||||
now - timedelta(seconds=40),
|
||||
prompt=20,
|
||||
completion=10,
|
||||
cost=0.002,
|
||||
session="s1",
|
||||
),
|
||||
_call(
|
||||
"m1",
|
||||
now - timedelta(seconds=30),
|
||||
prompt=30,
|
||||
completion=15,
|
||||
cost=0.003,
|
||||
session="s2",
|
||||
),
|
||||
]
|
||||
stats = aggregates(calls)
|
||||
assert stats.count == 3
|
||||
assert stats.total_cost_usd == 0.006
|
||||
assert stats.total_input_tokens == 60
|
||||
assert stats.total_output_tokens == 30
|
||||
assert stats.distinct_sessions == 2
|
||||
assert stats.current_session == "s2"
|
||||
|
||||
|
||||
def test_rollups_split_by_model_and_cache():
|
||||
now = datetime.now(tz=timezone.utc).astimezone()
|
||||
calls = [
|
||||
_call(
|
||||
"m1", now, prompt=10, completion=5, cost=0.001, cache_write=3, cache_read=7
|
||||
),
|
||||
_call("m1", now, prompt=20, completion=10, cost=0.002, cache_read=1),
|
||||
_call("m2", now, prompt=30, completion=15, cost=0.004),
|
||||
]
|
||||
rollups = model_rollups(calls)
|
||||
by_model = {r.model: r for r in rollups}
|
||||
assert by_model["m1"].requests == 2
|
||||
assert by_model["m1"].input_tokens == 30
|
||||
assert by_model["m1"].cache_write == 3
|
||||
assert by_model["m1"].cache_read == 8
|
||||
assert by_model["m2"].input_tokens == 30
|
||||
|
||||
|
||||
def test_route_hits_only_for_routed_calls():
|
||||
now = datetime.now(tz=timezone.utc).astimezone()
|
||||
calls = [
|
||||
_call("m", now, route="code"),
|
||||
_call("m", now, route="code"),
|
||||
_call("m", now, route="summarization"),
|
||||
_call("m", now), # no route
|
||||
]
|
||||
hits = route_hits(calls)
|
||||
# Only calls with route names are counted.
|
||||
assert sum(h.hits for h in hits) == 3
|
||||
hits_by_name = {h.route: h for h in hits}
|
||||
assert hits_by_name["code"].hits == 2
|
||||
assert hits_by_name["summarization"].hits == 1
|
||||
|
||||
|
||||
def test_route_hits_empty_when_no_routes():
|
||||
now = datetime.now(tz=timezone.utc).astimezone()
|
||||
calls = [_call("m", now), _call("m", now)]
|
||||
assert route_hits(calls) == []
|
||||
2
cli/uv.lock
generated
2
cli/uv.lock
generated
|
|
@ -337,7 +337,7 @@ wheels = [
|
|||
|
||||
[[package]]
|
||||
name = "planoai"
|
||||
version = "0.4.18"
|
||||
version = "0.4.20"
|
||||
source = { editable = "." }
|
||||
dependencies = [
|
||||
{ name = "click" },
|
||||
|
|
|
|||
|
|
@ -901,6 +901,33 @@ static_resources:
|
|||
validation_context:
|
||||
trusted_ca:
|
||||
filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }}
|
||||
- name: digitalocean
|
||||
connect_timeout: {{ upstream_connect_timeout | default('5s') }}
|
||||
type: LOGICAL_DNS
|
||||
dns_lookup_family: V4_ONLY
|
||||
lb_policy: ROUND_ROBIN
|
||||
load_assignment:
|
||||
cluster_name: digitalocean
|
||||
endpoints:
|
||||
- lb_endpoints:
|
||||
- endpoint:
|
||||
address:
|
||||
socket_address:
|
||||
address: inference.do-ai.run
|
||||
port_value: 443
|
||||
hostname: "inference.do-ai.run"
|
||||
transport_socket:
|
||||
name: envoy.transport_sockets.tls
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
|
||||
sni: inference.do-ai.run
|
||||
common_tls_context:
|
||||
tls_params:
|
||||
tls_minimum_protocol_version: TLSv1_2
|
||||
tls_maximum_protocol_version: TLSv1_3
|
||||
validation_context:
|
||||
trusted_ca:
|
||||
filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }}
|
||||
- name: xiaomi
|
||||
connect_timeout: {{ upstream_connect_timeout | default('5s') }}
|
||||
type: LOGICAL_DNS
|
||||
|
|
|
|||
541
config/grafana/brightstaff_dashboard.json
Normal file
541
config/grafana/brightstaff_dashboard.json
Normal file
|
|
@ -0,0 +1,541 @@
|
|||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": "-- Grafana --",
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "RED, LLM upstream, routing service, and process metrics for brightstaff. Pair with Envoy admin metrics from cluster=bright_staff.",
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 1,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 },
|
||||
"id": 100,
|
||||
"panels": [],
|
||||
"title": "HTTP RED",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": {
|
||||
"axisLabel": "req/s",
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"lineWidth": 1,
|
||||
"showPoints": "never"
|
||||
},
|
||||
"unit": "reqps"
|
||||
}
|
||||
},
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 1 },
|
||||
"id": 1,
|
||||
"options": {
|
||||
"legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||
"tooltip": { "mode": "multi" }
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"expr": "sum by (handler) (rate(brightstaff_http_requests_total[1m]))",
|
||||
"legendFormat": "{{handler}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Rate — brightstaff RPS by handler",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"description": "5xx fraction over 5m. Page-worthy when sustained above ~1%.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "thresholds" },
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 0.01 },
|
||||
{ "color": "red", "value": 0.05 }
|
||||
]
|
||||
},
|
||||
"unit": "percentunit"
|
||||
}
|
||||
},
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 1 },
|
||||
"id": 2,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"graphMode": "area",
|
||||
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"expr": "sum(rate(brightstaff_http_requests_total{status_class=\"5xx\"}[5m])) / clamp_min(sum(rate(brightstaff_http_requests_total[5m])), 1)",
|
||||
"legendFormat": "5xx rate",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Errors — brightstaff 5xx rate",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"description": "p50/p95/p99 by handler, computed from histogram buckets over 5m.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": { "drawStyle": "line", "fillOpacity": 5, "lineWidth": 1, "showPoints": "never" },
|
||||
"unit": "s"
|
||||
}
|
||||
},
|
||||
"gridPos": { "h": 9, "w": 24, "x": 0, "y": 9 },
|
||||
"id": 3,
|
||||
"options": {
|
||||
"legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||
"tooltip": { "mode": "multi" }
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"expr": "histogram_quantile(0.50, sum by (le, handler) (rate(brightstaff_http_request_duration_seconds_bucket[5m])))",
|
||||
"legendFormat": "p50 {{handler}}",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"expr": "histogram_quantile(0.95, sum by (le, handler) (rate(brightstaff_http_request_duration_seconds_bucket[5m])))",
|
||||
"legendFormat": "p95 {{handler}}",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"expr": "histogram_quantile(0.99, sum by (le, handler) (rate(brightstaff_http_request_duration_seconds_bucket[5m])))",
|
||||
"legendFormat": "p99 {{handler}}",
|
||||
"refId": "C"
|
||||
}
|
||||
],
|
||||
"title": "Duration — p50 / p95 / p99 by handler",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"description": "In-flight requests by handler. Climbs before latency does when brightstaff is saturated.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": { "drawStyle": "line", "fillOpacity": 10, "lineWidth": 1, "showPoints": "never" },
|
||||
"unit": "short"
|
||||
}
|
||||
},
|
||||
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 18 },
|
||||
"id": 4,
|
||||
"options": {
|
||||
"legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||
"tooltip": { "mode": "multi" }
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"expr": "sum by (handler) (brightstaff_http_in_flight_requests)",
|
||||
"legendFormat": "{{handler}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "In-flight requests by handler",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 26 },
|
||||
"id": 200,
|
||||
"panels": [],
|
||||
"title": "LLM upstream",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": { "drawStyle": "line", "fillOpacity": 5, "lineWidth": 1, "showPoints": "never" },
|
||||
"unit": "s"
|
||||
}
|
||||
},
|
||||
"gridPos": { "h": 9, "w": 12, "x": 0, "y": 27 },
|
||||
"id": 5,
|
||||
"options": {
|
||||
"legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||
"tooltip": { "mode": "multi" }
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"expr": "histogram_quantile(0.95, sum by (le, provider, model) (rate(brightstaff_llm_upstream_duration_seconds_bucket[5m])))",
|
||||
"legendFormat": "p95 {{provider}}/{{model}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "LLM upstream p95 by provider/model",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"description": "All non-success error classes. timeout/connect = network, 5xx/429 = provider, parse = body shape mismatch, stream = mid-stream disconnect.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": { "drawStyle": "line", "fillOpacity": 30, "lineWidth": 1, "showPoints": "never", "stacking": { "mode": "normal" } },
|
||||
"unit": "reqps"
|
||||
}
|
||||
},
|
||||
"gridPos": { "h": 9, "w": 12, "x": 12, "y": 27 },
|
||||
"id": 6,
|
||||
"options": {
|
||||
"legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||
"tooltip": { "mode": "multi" }
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"expr": "sum by (provider, error_class) (rate(brightstaff_llm_upstream_requests_total{error_class!=\"none\"}[5m]))",
|
||||
"legendFormat": "{{provider}} / {{error_class}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "LLM upstream errors by provider / class",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"description": "Streaming only. Empty if the route never streams.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": { "drawStyle": "line", "fillOpacity": 5, "lineWidth": 1, "showPoints": "never" },
|
||||
"unit": "s"
|
||||
}
|
||||
},
|
||||
"gridPos": { "h": 9, "w": 12, "x": 0, "y": 36 },
|
||||
"id": 7,
|
||||
"options": {
|
||||
"legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||
"tooltip": { "mode": "multi" }
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"expr": "histogram_quantile(0.95, sum by (le, provider, model) (rate(brightstaff_llm_time_to_first_token_seconds_bucket[5m])))",
|
||||
"legendFormat": "p95 {{provider}}/{{model}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Time-to-first-token p95 (streaming)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"description": "Tokens/sec by provider/model/kind — proxy for cost. Stacked.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": { "drawStyle": "line", "fillOpacity": 30, "lineWidth": 1, "showPoints": "never", "stacking": { "mode": "normal" } },
|
||||
"unit": "tokens/s"
|
||||
}
|
||||
},
|
||||
"gridPos": { "h": 9, "w": 12, "x": 12, "y": 36 },
|
||||
"id": 8,
|
||||
"options": {
|
||||
"legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||
"tooltip": { "mode": "multi" }
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"expr": "sum by (provider, model, kind) (rate(brightstaff_llm_tokens_total[5m]))",
|
||||
"legendFormat": "{{provider}}/{{model}} {{kind}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Token throughput by provider / model / kind",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 45 },
|
||||
"id": 300,
|
||||
"panels": [],
|
||||
"title": "Routing service",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"description": "Which models the orchestrator picked over the last 15 minutes.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"unit": "short"
|
||||
}
|
||||
},
|
||||
"gridPos": { "h": 9, "w": 12, "x": 0, "y": 46 },
|
||||
"id": 9,
|
||||
"options": {
|
||||
"displayMode": "gradient",
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"expr": "sum by (selected_model) (increase(brightstaff_router_decisions_total[15m]))",
|
||||
"legendFormat": "{{selected_model}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Model selection distribution (last 15m)",
|
||||
"type": "bargauge"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"description": "Fraction of decisions that fell back (orchestrator returned `none` or errored). High = router can't classify intent or no candidates configured.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": { "drawStyle": "line", "fillOpacity": 10, "lineWidth": 1, "showPoints": "never" },
|
||||
"unit": "percentunit"
|
||||
}
|
||||
},
|
||||
"gridPos": { "h": 9, "w": 12, "x": 12, "y": 46 },
|
||||
"id": 10,
|
||||
"options": {
|
||||
"legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||
"tooltip": { "mode": "multi" }
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"expr": "sum by (route) (rate(brightstaff_router_decisions_total{fallback=\"true\"}[5m])) / clamp_min(sum by (route) (rate(brightstaff_router_decisions_total[5m])), 1)",
|
||||
"legendFormat": "{{route}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Fallback rate by route",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": { "drawStyle": "line", "fillOpacity": 5, "lineWidth": 1, "showPoints": "never" },
|
||||
"unit": "s"
|
||||
}
|
||||
},
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 55 },
|
||||
"id": 11,
|
||||
"options": {
|
||||
"legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||
"tooltip": { "mode": "multi" }
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"expr": "histogram_quantile(0.95, sum by (le, route) (rate(brightstaff_router_decision_duration_seconds_bucket[5m])))",
|
||||
"legendFormat": "p95 {{route}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Router decision p95 latency",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"description": "Hit / (hit + miss). Low ratio = sessions aren't being reused or TTL too short.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "thresholds" },
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "red", "value": null },
|
||||
{ "color": "yellow", "value": 0.5 },
|
||||
{ "color": "green", "value": 0.8 }
|
||||
]
|
||||
},
|
||||
"unit": "percentunit",
|
||||
"min": 0,
|
||||
"max": 1
|
||||
}
|
||||
},
|
||||
"gridPos": { "h": 8, "w": 6, "x": 12, "y": 55 },
|
||||
"id": 12,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"graphMode": "area",
|
||||
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"expr": "sum(rate(brightstaff_session_cache_events_total{outcome=\"hit\"}[5m])) / clamp_min(sum(rate(brightstaff_session_cache_events_total{outcome=~\"hit|miss\"}[5m])), 1)",
|
||||
"legendFormat": "hit rate",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Session cache hit rate",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"description": "decision_served = a real model picked. no_candidates = sentinel `none` returned. policy_error = orchestrator failed.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": { "drawStyle": "line", "fillOpacity": 30, "lineWidth": 1, "showPoints": "never", "stacking": { "mode": "normal" } },
|
||||
"unit": "reqps"
|
||||
}
|
||||
},
|
||||
"gridPos": { "h": 8, "w": 6, "x": 18, "y": 55 },
|
||||
"id": 13,
|
||||
"options": {
|
||||
"legend": { "displayMode": "list", "placement": "bottom", "showLegend": true },
|
||||
"tooltip": { "mode": "multi" }
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"expr": "sum by (outcome) (rate(brightstaff_routing_service_requests_total[5m]))",
|
||||
"legendFormat": "{{outcome}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "/routing/* outcomes",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 63 },
|
||||
"id": 400,
|
||||
"panels": [],
|
||||
"title": "Process & Envoy link",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"description": "Compare to brightstaff RPS (panel 1) — sustained gap = network or Envoy queueing.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": { "drawStyle": "line", "fillOpacity": 10, "lineWidth": 1, "showPoints": "never" },
|
||||
"unit": "reqps"
|
||||
}
|
||||
},
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 64 },
|
||||
"id": 14,
|
||||
"options": {
|
||||
"legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||
"tooltip": { "mode": "multi" }
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"expr": "sum(rate(envoy_cluster_upstream_rq_total{envoy_cluster_name=\"bright_staff\"}[1m]))",
|
||||
"legendFormat": "envoy → bright_staff",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"expr": "sum(rate(brightstaff_http_requests_total[1m]))",
|
||||
"legendFormat": "brightstaff served",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Envoy → brightstaff link health",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": { "drawStyle": "line", "fillOpacity": 10, "lineWidth": 1, "showPoints": "never" }
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": { "id": "byName", "options": "RSS" },
|
||||
"properties": [{ "id": "unit", "value": "bytes" }]
|
||||
},
|
||||
{
|
||||
"matcher": { "id": "byName", "options": "CPU" },
|
||||
"properties": [{ "id": "unit", "value": "percentunit" }]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 64 },
|
||||
"id": 15,
|
||||
"options": {
|
||||
"legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
|
||||
"tooltip": { "mode": "multi" }
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"expr": "process_resident_memory_bytes{job=\"brightstaff\"}",
|
||||
"legendFormat": "RSS",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
||||
"expr": "rate(process_cpu_seconds_total{job=\"brightstaff\"}[1m])",
|
||||
"legendFormat": "CPU",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Brightstaff process RSS / CPU",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 39,
|
||||
"tags": ["plano", "brightstaff", "llm"],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"name": "DS_PROMETHEUS",
|
||||
"label": "Prometheus",
|
||||
"type": "datasource",
|
||||
"query": "prometheus",
|
||||
"current": { "selected": false, "text": "Prometheus", "value": "DS_PROMETHEUS" },
|
||||
"hide": 0,
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"includeAll": false,
|
||||
"multi": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": { "from": "now-1h", "to": "now" },
|
||||
"timepicker": {},
|
||||
"timezone": "browser",
|
||||
"title": "Brightstaff (Plano dataplane)",
|
||||
"uid": "brightstaff",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
43
config/grafana/docker-compose.yaml
Normal file
43
config/grafana/docker-compose.yaml
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
# One-command Prometheus + Grafana stack for observing a locally-running
|
||||
# Plano (Envoy admin :9901 + brightstaff :9092 on the host).
|
||||
#
|
||||
# cd config/grafana
|
||||
# docker compose up -d
|
||||
# open http://localhost:3000 (admin / admin)
|
||||
#
|
||||
# Grafana is preloaded with:
|
||||
# - Prometheus datasource (uid=DS_PROMETHEUS) → http://prometheus:9090
|
||||
# - Brightstaff dashboard (auto-imported from brightstaff_dashboard.json)
|
||||
#
|
||||
# Prometheus scrapes the host's :9092 and :9901 via host.docker.internal.
|
||||
# On Linux this works because of the `extra_hosts: host-gateway` mapping
|
||||
# below. On Mac/Win it works natively.
|
||||
|
||||
services:
|
||||
prometheus:
|
||||
image: prom/prometheus:latest
|
||||
container_name: plano-prometheus
|
||||
ports:
|
||||
- "9090:9090"
|
||||
volumes:
|
||||
- ./prometheus_scrape.yaml:/etc/prometheus/prometheus.yml:ro
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
restart: unless-stopped
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana:latest
|
||||
container_name: plano-grafana
|
||||
ports:
|
||||
- "3000:3000"
|
||||
environment:
|
||||
GF_SECURITY_ADMIN_USER: admin
|
||||
GF_SECURITY_ADMIN_PASSWORD: admin
|
||||
GF_AUTH_ANONYMOUS_ENABLED: "true"
|
||||
GF_AUTH_ANONYMOUS_ORG_ROLE: Viewer
|
||||
volumes:
|
||||
- ./provisioning:/etc/grafana/provisioning:ro
|
||||
- ./brightstaff_dashboard.json:/var/lib/grafana/dashboards/brightstaff_dashboard.json:ro
|
||||
depends_on:
|
||||
- prometheus
|
||||
restart: unless-stopped
|
||||
44
config/grafana/prometheus_scrape.yaml
Normal file
44
config/grafana/prometheus_scrape.yaml
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
# Prometheus config that scrapes Plano (Envoy admin + brightstaff). This is
|
||||
# a complete Prometheus config — mount it directly at
|
||||
# /etc/prometheus/prometheus.yml. The included docker-compose.yaml does this
|
||||
# for you.
|
||||
#
|
||||
# Targets:
|
||||
# - envoy:9901 Envoy admin → envoy_cluster_*, envoy_http_*, envoy_server_*.
|
||||
# - brightstaff:9092 Native dataplane → brightstaff_http_*, brightstaff_llm_*,
|
||||
# brightstaff_router_*, process_*.
|
||||
#
|
||||
# Hostname `host.docker.internal` works on Docker Desktop (Mac/Win) and on
|
||||
# Linux when the container is started with `--add-host=host.docker.internal:
|
||||
# host-gateway` (the included compose does this). If Plano runs *inside*
|
||||
# Docker on the same network as Prometheus, replace it with the container
|
||||
# name (e.g. `plano:9092`).
|
||||
#
|
||||
# This file is unrelated to demos/llm_routing/model_routing_service/prometheus.yaml,
|
||||
# which scrapes a fake metrics service to feed the routing engine.
|
||||
|
||||
global:
|
||||
scrape_interval: 15s
|
||||
scrape_timeout: 10s
|
||||
evaluation_interval: 15s
|
||||
|
||||
scrape_configs:
|
||||
- job_name: envoy
|
||||
honor_timestamps: true
|
||||
metrics_path: /stats
|
||||
params:
|
||||
format: ["prometheus"]
|
||||
static_configs:
|
||||
- targets:
|
||||
- host.docker.internal:9901
|
||||
labels:
|
||||
service: plano
|
||||
|
||||
- job_name: brightstaff
|
||||
honor_timestamps: true
|
||||
metrics_path: /metrics
|
||||
static_configs:
|
||||
- targets:
|
||||
- host.docker.internal:9092
|
||||
labels:
|
||||
service: plano
|
||||
15
config/grafana/provisioning/dashboards/brightstaff.yaml
Normal file
15
config/grafana/provisioning/dashboards/brightstaff.yaml
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
# Auto-load the brightstaff dashboard JSON on Grafana startup.
|
||||
|
||||
apiVersion: 1
|
||||
|
||||
providers:
|
||||
- name: brightstaff
|
||||
orgId: 1
|
||||
folder: Plano
|
||||
type: file
|
||||
disableDeletion: false
|
||||
updateIntervalSeconds: 30
|
||||
allowUiUpdates: true
|
||||
options:
|
||||
path: /var/lib/grafana/dashboards
|
||||
foldersFromFilesStructure: false
|
||||
14
config/grafana/provisioning/datasources/prometheus.yaml
Normal file
14
config/grafana/provisioning/datasources/prometheus.yaml
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
# Auto-provision the Prometheus datasource so the bundled dashboard wires up
|
||||
# without any clicks. The `uid: DS_PROMETHEUS` matches the templated input in
|
||||
# brightstaff_dashboard.json.
|
||||
|
||||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
uid: DS_PROMETHEUS
|
||||
type: prometheus
|
||||
access: proxy
|
||||
url: http://prometheus:9090
|
||||
isDefault: true
|
||||
editable: true
|
||||
|
|
@ -192,6 +192,7 @@ properties:
|
|||
- gemini
|
||||
- vercel
|
||||
- openrouter
|
||||
- digitalocean
|
||||
routing_preferences:
|
||||
type: array
|
||||
items:
|
||||
|
|
@ -242,6 +243,7 @@ properties:
|
|||
- gemini
|
||||
- vercel
|
||||
- openrouter
|
||||
- digitalocean
|
||||
routing_preferences:
|
||||
type: array
|
||||
items:
|
||||
|
|
@ -280,6 +282,9 @@ properties:
|
|||
type: boolean
|
||||
use_agent_orchestrator:
|
||||
type: boolean
|
||||
disable_signals:
|
||||
type: boolean
|
||||
description: "Disable agentic signal analysis (frustration, repetition, escalation, etc.) on LLM responses to save CPU. Default false."
|
||||
upstream_connect_timeout:
|
||||
type: string
|
||||
description: "Connect timeout for upstream provider clusters (e.g., '5s', '10s'). Default is '5s'."
|
||||
|
|
|
|||
372
crates/Cargo.lock
generated
372
crates/Cargo.lock
generated
|
|
@ -23,6 +23,18 @@ version = "0.3.8"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e8fd72866655d1904d6b0997d0b07ba561047d070fbe29de039031c641b61217"
|
||||
|
||||
[[package]]
|
||||
name = "ahash"
|
||||
version = "0.8.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"once_cell",
|
||||
"version_check",
|
||||
"zerocopy",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "1.1.4"
|
||||
|
|
@ -257,6 +269,24 @@ dependencies = [
|
|||
"vsimd",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bindgen"
|
||||
version = "0.72.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"cexpr",
|
||||
"clang-sys",
|
||||
"itertools 0.13.0",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"regex",
|
||||
"rustc-hash 2.1.2",
|
||||
"shlex",
|
||||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bit-set"
|
||||
version = "0.5.3"
|
||||
|
|
@ -316,6 +346,9 @@ dependencies = [
|
|||
"hyper 1.9.0",
|
||||
"hyper-util",
|
||||
"lru",
|
||||
"metrics 0.23.1",
|
||||
"metrics-exporter-prometheus",
|
||||
"metrics-process",
|
||||
"mockito",
|
||||
"opentelemetry",
|
||||
"opentelemetry-http",
|
||||
|
|
@ -325,6 +358,7 @@ dependencies = [
|
|||
"pretty_assertions",
|
||||
"rand 0.9.4",
|
||||
"redis",
|
||||
"regex",
|
||||
"reqwest",
|
||||
"serde",
|
||||
"serde_json",
|
||||
|
|
@ -332,6 +366,8 @@ dependencies = [
|
|||
"serde_yaml",
|
||||
"strsim",
|
||||
"thiserror 2.0.18",
|
||||
"tikv-jemalloc-ctl",
|
||||
"tikv-jemallocator",
|
||||
"time",
|
||||
"tokio",
|
||||
"tokio-postgres",
|
||||
|
|
@ -391,6 +427,15 @@ dependencies = [
|
|||
"shlex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cexpr"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
|
||||
dependencies = [
|
||||
"nom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.4"
|
||||
|
|
@ -428,6 +473,17 @@ dependencies = [
|
|||
"windows-link",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clang-sys"
|
||||
version = "1.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
|
||||
dependencies = [
|
||||
"glob",
|
||||
"libc",
|
||||
"libloading",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cmov"
|
||||
version = "0.5.3"
|
||||
|
|
@ -574,6 +630,21 @@ dependencies = [
|
|||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-epoch"
|
||||
version = "0.9.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
|
||||
dependencies = [
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-utils"
|
||||
version = "0.8.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
|
||||
|
||||
[[package]]
|
||||
name = "crypto-common"
|
||||
version = "0.1.7"
|
||||
|
|
@ -1070,6 +1141,12 @@ dependencies = [
|
|||
"wasip3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "glob"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
|
||||
|
||||
[[package]]
|
||||
name = "governor"
|
||||
version = "0.6.3"
|
||||
|
|
@ -1128,7 +1205,7 @@ version = "0.8.2"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e91b62f79061a0bc2e046024cb7ba44b08419ed238ecbd9adbd787434b9e8c25"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"ahash 0.3.8",
|
||||
"autocfg",
|
||||
]
|
||||
|
||||
|
|
@ -1138,6 +1215,15 @@ version = "0.12.3"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.14.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
|
||||
dependencies = [
|
||||
"ahash 0.8.12",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.15.5"
|
||||
|
|
@ -1189,6 +1275,12 @@ dependencies = [
|
|||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
|
||||
|
||||
[[package]]
|
||||
name = "hex"
|
||||
version = "0.4.3"
|
||||
|
|
@ -1665,6 +1757,27 @@ version = "0.2.185"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "52ff2c0fe9bc6cb6b14a0592c2ff4fa9ceb83eea9db979b0487cd054946a2b8f"
|
||||
|
||||
[[package]]
|
||||
name = "libloading"
|
||||
version = "0.8.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"windows-link",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libproc"
|
||||
version = "0.14.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a54ad7278b8bc5301d5ffd2a94251c004feb971feba96c971ea4063645990757"
|
||||
dependencies = [
|
||||
"bindgen",
|
||||
"errno",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libredox"
|
||||
version = "0.1.16"
|
||||
|
|
@ -1745,6 +1858,12 @@ version = "0.1.2"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
|
||||
|
||||
[[package]]
|
||||
name = "mach2"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dae608c151f68243f2b000364e1f7b186d9c29845f7d2d85bd31b9ad77ad552b"
|
||||
|
||||
[[package]]
|
||||
name = "matchers"
|
||||
version = "0.2.0"
|
||||
|
|
@ -1782,6 +1901,77 @@ version = "2.8.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
|
||||
|
||||
[[package]]
|
||||
name = "metrics"
|
||||
version = "0.23.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3045b4193fbdc5b5681f32f11070da9be3609f189a79f3390706d42587f46bb5"
|
||||
dependencies = [
|
||||
"ahash 0.8.12",
|
||||
"portable-atomic",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "metrics"
|
||||
version = "0.24.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5d5312e9ba3771cfa961b585728215e3d972c950a3eed9252aa093d6301277e8"
|
||||
dependencies = [
|
||||
"ahash 0.8.12",
|
||||
"portable-atomic",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "metrics-exporter-prometheus"
|
||||
version = "0.15.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b4f0c8427b39666bf970460908b213ec09b3b350f20c0c2eabcbba51704a08e6"
|
||||
dependencies = [
|
||||
"base64 0.22.1",
|
||||
"http-body-util",
|
||||
"hyper 1.9.0",
|
||||
"hyper-util",
|
||||
"indexmap 2.14.0",
|
||||
"ipnet",
|
||||
"metrics 0.23.1",
|
||||
"metrics-util",
|
||||
"quanta",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "metrics-process"
|
||||
version = "2.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4268d87f64a752f5a651314fc683f04da10be65701ea3e721ba4d74f79163cac"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"libproc",
|
||||
"mach2",
|
||||
"metrics 0.24.3",
|
||||
"once_cell",
|
||||
"procfs",
|
||||
"rlimit",
|
||||
"windows",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "metrics-util"
|
||||
version = "0.17.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4259040465c955f9f2f1a4a8a16dc46726169bca0f88e8fb2dbeced487c3e828"
|
||||
dependencies = [
|
||||
"crossbeam-epoch",
|
||||
"crossbeam-utils",
|
||||
"hashbrown 0.14.5",
|
||||
"metrics 0.23.1",
|
||||
"num_cpus",
|
||||
"quanta",
|
||||
"sketches-ddsketch",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mime"
|
||||
version = "0.3.17"
|
||||
|
|
@ -1935,6 +2125,16 @@ dependencies = [
|
|||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num_cpus"
|
||||
version = "1.17.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b"
|
||||
dependencies = [
|
||||
"hermit-abi",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "objc2-core-foundation"
|
||||
version = "0.3.2"
|
||||
|
|
@ -2125,6 +2325,12 @@ dependencies = [
|
|||
"windows-link",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "paste"
|
||||
version = "1.0.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
|
||||
|
||||
[[package]]
|
||||
name = "percent-encoding"
|
||||
version = "2.3.2"
|
||||
|
|
@ -2278,6 +2484,27 @@ dependencies = [
|
|||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "procfs"
|
||||
version = "0.18.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "25485360a54d6861439d60facef26de713b1e126bf015ec8f98239467a2b82f7"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"procfs-core",
|
||||
"rustix",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "procfs-core"
|
||||
version = "0.18.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e6401bf7b6af22f78b563665d15a22e9aef27775b79b149a66ca022468a4e405"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"hex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prompt_gateway"
|
||||
version = "0.1.0"
|
||||
|
|
@ -2333,6 +2560,21 @@ dependencies = [
|
|||
"log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quanta"
|
||||
version = "0.12.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f3ab5a9d756f0d97bdc89019bd2e4ea098cf9cde50ee7564dde6b81ccc8f06c7"
|
||||
dependencies = [
|
||||
"crossbeam-utils",
|
||||
"libc",
|
||||
"once_cell",
|
||||
"raw-cpuid",
|
||||
"wasi 0.11.1+wasi-snapshot-preview1",
|
||||
"web-sys",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quinn"
|
||||
version = "0.11.9"
|
||||
|
|
@ -2485,6 +2727,15 @@ version = "0.10.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "63b8176103e19a2643978565ca18b50549f6101881c443590420e4dc998a3c69"
|
||||
|
||||
[[package]]
|
||||
name = "raw-cpuid"
|
||||
version = "11.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "498cd0dc59d73224351ee52a95fee0f1a617a2eae0e7d9d720cc622c73a54186"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "redis"
|
||||
version = "0.27.6"
|
||||
|
|
@ -2646,6 +2897,15 @@ dependencies = [
|
|||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rlimit"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f35ee2729c56bb610f6dba436bf78135f728b7373bdffae2ec815b2d3eb98cc3"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustc-hash"
|
||||
version = "1.1.0"
|
||||
|
|
@ -3098,6 +3358,12 @@ version = "1.0.2"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e"
|
||||
|
||||
[[package]]
|
||||
name = "sketches-ddsketch"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "85636c14b73d81f541e525f585c0a2109e6744e1565b5c1668e31c70c10ed65c"
|
||||
|
||||
[[package]]
|
||||
name = "slab"
|
||||
version = "0.4.12"
|
||||
|
|
@ -3308,6 +3574,37 @@ dependencies = [
|
|||
"rustc-hash 1.1.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tikv-jemalloc-ctl"
|
||||
version = "0.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "661f1f6a57b3a36dc9174a2c10f19513b4866816e13425d3e418b11cc37bc24c"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"paste",
|
||||
"tikv-jemalloc-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tikv-jemalloc-sys"
|
||||
version = "0.6.1+5.3.0-1-ge13ca993e8ccb9ba9847cc330696e02839f328f7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cd8aa5b2ab86a2cefa406d889139c162cbb230092f7d1d7cbc1716405d852a3b"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tikv-jemallocator"
|
||||
version = "0.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0359b4327f954e0567e69fb191cf1436617748813819c94b8cd4a431422d053a"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"tikv-jemalloc-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "time"
|
||||
version = "0.3.47"
|
||||
|
|
@ -4003,6 +4300,49 @@ dependencies = [
|
|||
"web-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.3.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
||||
dependencies = [
|
||||
"winapi-i686-pc-windows-gnu",
|
||||
"winapi-x86_64-pc-windows-gnu",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-i686-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||
|
||||
[[package]]
|
||||
name = "winapi-x86_64-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||
|
||||
[[package]]
|
||||
name = "windows"
|
||||
version = "0.62.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "527fadee13e0c05939a6a05d5bd6eec6cd2e3dbd648b9f8e447c6518133d8580"
|
||||
dependencies = [
|
||||
"windows-collections",
|
||||
"windows-core",
|
||||
"windows-future",
|
||||
"windows-numerics",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-collections"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "23b2d95af1a8a14a3c7367e1ed4fc9c20e0a26e79551b1454d72583c97cc6610"
|
||||
dependencies = [
|
||||
"windows-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-core"
|
||||
version = "0.62.2"
|
||||
|
|
@ -4016,6 +4356,17 @@ dependencies = [
|
|||
"windows-strings",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-future"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e1d6f90251fe18a279739e78025bd6ddc52a7e22f921070ccdc67dde84c605cb"
|
||||
dependencies = [
|
||||
"windows-core",
|
||||
"windows-link",
|
||||
"windows-threading",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-implement"
|
||||
version = "0.60.2"
|
||||
|
|
@ -4044,6 +4395,16 @@ version = "0.2.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
|
||||
|
||||
[[package]]
|
||||
name = "windows-numerics"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6e2e40844ac143cdb44aead537bbf727de9b044e107a0f1220392177d15b0f26"
|
||||
dependencies = [
|
||||
"windows-core",
|
||||
"windows-link",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-registry"
|
||||
version = "0.6.1"
|
||||
|
|
@ -4133,6 +4494,15 @@ dependencies = [
|
|||
"windows_x86_64_msvc 0.53.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-threading"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3949bd5b99cafdf1c7ca86b43ca564028dfe27d66958f2470940f73d86d75b37"
|
||||
dependencies = [
|
||||
"windows-link",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_gnullvm"
|
||||
version = "0.52.6"
|
||||
|
|
|
|||
|
|
@ -3,6 +3,18 @@ name = "brightstaff"
|
|||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[features]
|
||||
default = ["jemalloc"]
|
||||
jemalloc = ["tikv-jemallocator", "tikv-jemalloc-ctl"]
|
||||
|
||||
[[bin]]
|
||||
name = "brightstaff"
|
||||
path = "src/main.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "signals_replay"
|
||||
path = "src/bin/signals_replay.rs"
|
||||
|
||||
[dependencies]
|
||||
async-openai = "0.30.1"
|
||||
async-trait = "0.1"
|
||||
|
|
@ -26,7 +38,11 @@ opentelemetry-stdout = "0.31"
|
|||
opentelemetry_sdk = { version = "0.31", features = ["rt-tokio"] }
|
||||
pretty_assertions = "1.4.1"
|
||||
rand = "0.9.2"
|
||||
regex = "1.10"
|
||||
lru = "0.12"
|
||||
metrics = "0.23"
|
||||
metrics-exporter-prometheus = { version = "0.15", default-features = false, features = ["http-listener"] }
|
||||
metrics-process = "2.1"
|
||||
redis = { version = "0.27", features = ["tokio-comp"] }
|
||||
reqwest = { version = "0.12.15", features = ["stream"] }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
|
|
@ -35,6 +51,8 @@ serde_with = "3.13.0"
|
|||
strsim = "0.11"
|
||||
serde_yaml = "0.9.34"
|
||||
thiserror = "2.0.12"
|
||||
tikv-jemallocator = { version = "0.6", optional = true }
|
||||
tikv-jemalloc-ctl = { version = "0.6", features = ["stats"], optional = true }
|
||||
tokio = { version = "1.44.2", features = ["full"] }
|
||||
tokio-postgres = { version = "0.7", features = ["with-serde_json-1"] }
|
||||
tokio-stream = "0.1"
|
||||
|
|
|
|||
|
|
@ -24,4 +24,7 @@ pub struct AppState {
|
|||
/// Shared HTTP client for upstream LLM requests (connection pooling / keep-alive).
|
||||
pub http_client: reqwest::Client,
|
||||
pub filter_pipeline: Arc<FilterPipeline>,
|
||||
/// When false, agentic signal analysis is skipped on LLM responses to save CPU.
|
||||
/// Controlled by `overrides.disable_signals` in plano config.
|
||||
pub signals_enabled: bool,
|
||||
}
|
||||
|
|
|
|||
175
crates/brightstaff/src/bin/signals_replay.rs
Normal file
175
crates/brightstaff/src/bin/signals_replay.rs
Normal file
|
|
@ -0,0 +1,175 @@
|
|||
//! `signals-replay` — batch driver for the `brightstaff` signal analyzer.
|
||||
//!
|
||||
//! Reads JSONL conversations from stdin (one per line) and emits matching
|
||||
//! JSONL reports on stdout, one per input conversation, in the same order.
|
||||
//!
|
||||
//! Input shape (per line):
|
||||
//! ```json
|
||||
//! {"id": "convo-42", "messages": [{"from": "human", "value": "..."}, ...]}
|
||||
//! ```
|
||||
//!
|
||||
//! Output shape (per line, success):
|
||||
//! ```json
|
||||
//! {"id": "convo-42", "report": { ...python-compatible SignalReport dict... }}
|
||||
//! ```
|
||||
//!
|
||||
//! On per-line failure (parse / analyzer error), emits:
|
||||
//! ```json
|
||||
//! {"id": "convo-42", "error": "..."}
|
||||
//! ```
|
||||
//!
|
||||
//! The output report dict is shaped to match the Python reference's
|
||||
//! `SignalReport.to_dict()` byte-for-byte so the parity comparator can do a
|
||||
//! direct structural diff.
|
||||
|
||||
use std::io::{self, BufRead, BufWriter, Write};
|
||||
|
||||
use serde::Deserialize;
|
||||
use serde_json::{json, Map, Value};
|
||||
|
||||
use brightstaff::signals::{SignalAnalyzer, SignalGroup, SignalReport};
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct InputLine {
|
||||
id: Value,
|
||||
messages: Vec<MessageRow>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct MessageRow {
|
||||
#[serde(default)]
|
||||
from: String,
|
||||
#[serde(default)]
|
||||
value: String,
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let stdin = io::stdin();
|
||||
let stdout = io::stdout();
|
||||
let mut out = BufWriter::new(stdout.lock());
|
||||
let analyzer = SignalAnalyzer::default();
|
||||
|
||||
for line in stdin.lock().lines() {
|
||||
let line = match line {
|
||||
Ok(l) => l,
|
||||
Err(e) => {
|
||||
eprintln!("read error: {e}");
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
let trimmed = line.trim();
|
||||
if trimmed.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let result = process_line(&analyzer, trimmed);
|
||||
// Always emit one line per input line so id ordering stays aligned.
|
||||
if let Err(e) = writeln!(out, "{result}") {
|
||||
eprintln!("write error: {e}");
|
||||
std::process::exit(1);
|
||||
}
|
||||
// Flush periodically isn't strictly needed — BufWriter handles it,
|
||||
// and the parent process reads the whole stream when we're done.
|
||||
}
|
||||
let _ = out.flush();
|
||||
}
|
||||
|
||||
fn process_line(analyzer: &SignalAnalyzer, line: &str) -> Value {
|
||||
let parsed: InputLine = match serde_json::from_str(line) {
|
||||
Ok(p) => p,
|
||||
Err(e) => {
|
||||
return json!({
|
||||
"id": Value::Null,
|
||||
"error": format!("input parse: {e}"),
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
let id = parsed.id.clone();
|
||||
|
||||
let view: Vec<brightstaff::signals::analyzer::ShareGptMessage<'_>> = parsed
|
||||
.messages
|
||||
.iter()
|
||||
.map(|m| brightstaff::signals::analyzer::ShareGptMessage {
|
||||
from: m.from.as_str(),
|
||||
value: m.value.as_str(),
|
||||
})
|
||||
.collect();
|
||||
|
||||
let report = analyzer.analyze_sharegpt(&view);
|
||||
let report_dict = report_to_python_dict(&report);
|
||||
json!({
|
||||
"id": id,
|
||||
"report": report_dict,
|
||||
})
|
||||
}
|
||||
|
||||
/// Convert a `SignalReport` into the Python reference's `to_dict()` shape.
|
||||
///
|
||||
/// Ordering of category keys in each layer dict follows the Python source
|
||||
/// exactly so even string-equality comparisons behave deterministically.
|
||||
fn report_to_python_dict(r: &SignalReport) -> Value {
|
||||
let mut interaction = Map::new();
|
||||
interaction.insert(
|
||||
"misalignment".to_string(),
|
||||
signal_group_to_python(&r.interaction.misalignment),
|
||||
);
|
||||
interaction.insert(
|
||||
"stagnation".to_string(),
|
||||
signal_group_to_python(&r.interaction.stagnation),
|
||||
);
|
||||
interaction.insert(
|
||||
"disengagement".to_string(),
|
||||
signal_group_to_python(&r.interaction.disengagement),
|
||||
);
|
||||
interaction.insert(
|
||||
"satisfaction".to_string(),
|
||||
signal_group_to_python(&r.interaction.satisfaction),
|
||||
);
|
||||
|
||||
let mut execution = Map::new();
|
||||
execution.insert(
|
||||
"failure".to_string(),
|
||||
signal_group_to_python(&r.execution.failure),
|
||||
);
|
||||
execution.insert(
|
||||
"loops".to_string(),
|
||||
signal_group_to_python(&r.execution.loops),
|
||||
);
|
||||
|
||||
let mut environment = Map::new();
|
||||
environment.insert(
|
||||
"exhaustion".to_string(),
|
||||
signal_group_to_python(&r.environment.exhaustion),
|
||||
);
|
||||
|
||||
json!({
|
||||
"interaction_signals": Value::Object(interaction),
|
||||
"execution_signals": Value::Object(execution),
|
||||
"environment_signals": Value::Object(environment),
|
||||
"overall_quality": r.overall_quality.as_str(),
|
||||
"summary": r.summary,
|
||||
})
|
||||
}
|
||||
|
||||
fn signal_group_to_python(g: &SignalGroup) -> Value {
|
||||
let signals: Vec<Value> = g
|
||||
.signals
|
||||
.iter()
|
||||
.map(|s| {
|
||||
json!({
|
||||
"signal_type": s.signal_type.as_str(),
|
||||
"message_index": s.message_index,
|
||||
"snippet": s.snippet,
|
||||
"confidence": s.confidence,
|
||||
"metadata": s.metadata,
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
json!({
|
||||
"category": g.category,
|
||||
"count": g.count,
|
||||
"severity": g.severity,
|
||||
"signals": signals,
|
||||
})
|
||||
}
|
||||
53
crates/brightstaff/src/handlers/debug.rs
Normal file
53
crates/brightstaff/src/handlers/debug.rs
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
use bytes::Bytes;
|
||||
use http_body_util::combinators::BoxBody;
|
||||
use hyper::{Response, StatusCode};
|
||||
|
||||
use super::full;
|
||||
|
||||
#[derive(serde::Serialize)]
|
||||
struct MemStats {
|
||||
allocated_bytes: usize,
|
||||
resident_bytes: usize,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
error: Option<String>,
|
||||
}
|
||||
|
||||
/// Returns jemalloc memory statistics as JSON.
|
||||
/// Falls back to a stub when the jemalloc feature is disabled.
|
||||
pub async fn memstats() -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
|
||||
let stats = get_jemalloc_stats();
|
||||
let json = serde_json::to_string(&stats).unwrap();
|
||||
Ok(Response::builder()
|
||||
.status(StatusCode::OK)
|
||||
.header("Content-Type", "application/json")
|
||||
.body(full(json))
|
||||
.unwrap())
|
||||
}
|
||||
|
||||
#[cfg(feature = "jemalloc")]
|
||||
fn get_jemalloc_stats() -> MemStats {
|
||||
use tikv_jemalloc_ctl::{epoch, stats};
|
||||
|
||||
if let Err(e) = epoch::advance() {
|
||||
return MemStats {
|
||||
allocated_bytes: 0,
|
||||
resident_bytes: 0,
|
||||
error: Some(format!("failed to advance jemalloc epoch: {e}")),
|
||||
};
|
||||
}
|
||||
|
||||
MemStats {
|
||||
allocated_bytes: stats::allocated::read().unwrap_or(0),
|
||||
resident_bytes: stats::resident::read().unwrap_or(0),
|
||||
error: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "jemalloc"))]
|
||||
fn get_jemalloc_stats() -> MemStats {
|
||||
MemStats {
|
||||
allocated_bytes: 0,
|
||||
resident_bytes: 0,
|
||||
error: Some("jemalloc feature not enabled".to_string()),
|
||||
}
|
||||
}
|
||||
|
|
@ -441,10 +441,8 @@ impl ArchFunctionHandler {
|
|||
}
|
||||
}
|
||||
// Handle str/string conversions
|
||||
"str" | "string" => {
|
||||
if !value.is_string() {
|
||||
return Ok(json!(value.to_string()));
|
||||
}
|
||||
"str" | "string" if !value.is_string() => {
|
||||
return Ok(json!(value.to_string()));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,16 +24,18 @@ use crate::app_state::AppState;
|
|||
use crate::handlers::agents::pipeline::PipelineProcessor;
|
||||
use crate::handlers::extract_request_id;
|
||||
use crate::handlers::full;
|
||||
use crate::metrics as bs_metrics;
|
||||
use crate::state::response_state_processor::ResponsesStateProcessor;
|
||||
use crate::state::{
|
||||
extract_input_items, retrieve_and_combine_input, StateStorage, StateStorageError,
|
||||
};
|
||||
use crate::streaming::{
|
||||
create_streaming_response, create_streaming_response_with_output_filter, truncate_message,
|
||||
ObservableStreamProcessor, StreamProcessor,
|
||||
LlmMetricsCtx, ObservableStreamProcessor, StreamProcessor,
|
||||
};
|
||||
use crate::tracing::{
|
||||
collect_custom_trace_attributes, llm as tracing_llm, operation_component, set_service_name,
|
||||
collect_custom_trace_attributes, llm as tracing_llm, operation_component,
|
||||
plano as tracing_plano, set_service_name,
|
||||
};
|
||||
use model_selection::router_chat_get_upstream_model;
|
||||
|
||||
|
|
@ -102,15 +104,36 @@ async fn llm_chat_inner(
|
|||
.and_then(|hdr| request_headers.get(hdr))
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.map(|s| s.to_string());
|
||||
let pinned_model: Option<String> = if let Some(ref sid) = session_id {
|
||||
let cached_route = if let Some(ref sid) = session_id {
|
||||
state
|
||||
.orchestrator_service
|
||||
.get_cached_route(sid, tenant_id.as_deref())
|
||||
.await
|
||||
.map(|c| c.model_name)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let (pinned_model, pinned_route_name): (Option<String>, Option<String>) = match cached_route {
|
||||
Some(c) => (Some(c.model_name), c.route_name),
|
||||
None => (None, None),
|
||||
};
|
||||
|
||||
// Record session id on the LLM span for the observability console.
|
||||
if let Some(ref sid) = session_id {
|
||||
get_active_span(|span| {
|
||||
span.set_attribute(opentelemetry::KeyValue::new(
|
||||
tracing_plano::SESSION_ID,
|
||||
sid.clone(),
|
||||
));
|
||||
});
|
||||
}
|
||||
if let Some(ref route_name) = pinned_route_name {
|
||||
get_active_span(|span| {
|
||||
span.set_attribute(opentelemetry::KeyValue::new(
|
||||
tracing_plano::ROUTE_NAME,
|
||||
route_name.clone(),
|
||||
));
|
||||
});
|
||||
}
|
||||
|
||||
let full_qualified_llm_provider_url = format!("{}{}", state.llm_provider_url, request_path);
|
||||
|
||||
|
|
@ -120,6 +143,7 @@ async fn llm_chat_inner(
|
|||
&request_path,
|
||||
&state.model_aliases,
|
||||
&state.llm_providers,
|
||||
state.signals_enabled,
|
||||
)
|
||||
.await
|
||||
{
|
||||
|
|
@ -311,6 +335,18 @@ async fn llm_chat_inner(
|
|||
alias_resolved_model.clone()
|
||||
};
|
||||
|
||||
// Record route name on the LLM span (only when the orchestrator produced one).
|
||||
if let Some(ref rn) = route_name {
|
||||
if !rn.is_empty() && rn != "none" {
|
||||
get_active_span(|span| {
|
||||
span.set_attribute(opentelemetry::KeyValue::new(
|
||||
tracing_plano::ROUTE_NAME,
|
||||
rn.clone(),
|
||||
));
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(ref sid) = session_id {
|
||||
state
|
||||
.orchestrator_service
|
||||
|
|
@ -373,6 +409,7 @@ async fn parse_and_validate_request(
|
|||
request_path: &str,
|
||||
model_aliases: &Option<HashMap<String, ModelAlias>>,
|
||||
llm_providers: &Arc<RwLock<LlmProviders>>,
|
||||
signals_enabled: bool,
|
||||
) -> Result<PreparedRequest, Response<BoxBody<Bytes, hyper::Error>>> {
|
||||
let raw_bytes = request
|
||||
.collect()
|
||||
|
|
@ -451,7 +488,11 @@ async fn parse_and_validate_request(
|
|||
let user_message_preview = client_request
|
||||
.get_recent_user_message()
|
||||
.map(|msg| truncate_message(&msg, 50));
|
||||
let messages_for_signals = Some(client_request.get_messages());
|
||||
let messages_for_signals = if signals_enabled {
|
||||
Some(client_request.get_messages())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Set the upstream model name and strip routing metadata
|
||||
client_request.set_model(model_name_only.clone());
|
||||
|
|
@ -652,6 +693,13 @@ async fn send_upstream(
|
|||
|
||||
let request_start_time = std::time::Instant::now();
|
||||
|
||||
// Labels for LLM upstream metrics. We prefer `resolved_model` (post-routing)
|
||||
// and derive the provider from its `provider/model` prefix. This matches the
|
||||
// same model id the cost/latency router keys off.
|
||||
let (metric_provider_raw, metric_model_raw) = bs_metrics::split_provider_model(resolved_model);
|
||||
let metric_provider = metric_provider_raw.to_string();
|
||||
let metric_model = metric_model_raw.to_string();
|
||||
|
||||
let llm_response = match http_client
|
||||
.post(upstream_url)
|
||||
.headers(request_headers.clone())
|
||||
|
|
@ -661,6 +709,14 @@ async fn send_upstream(
|
|||
{
|
||||
Ok(res) => res,
|
||||
Err(err) => {
|
||||
let err_class = bs_metrics::llm_error_class_from_reqwest(&err);
|
||||
bs_metrics::record_llm_upstream(
|
||||
&metric_provider,
|
||||
&metric_model,
|
||||
0,
|
||||
err_class,
|
||||
request_start_time.elapsed(),
|
||||
);
|
||||
let err_msg = format!("Failed to send request: {}", err);
|
||||
let mut internal_error = Response::new(full(err_msg));
|
||||
*internal_error.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
|
||||
|
|
@ -671,6 +727,36 @@ async fn send_upstream(
|
|||
// Propagate upstream headers and status
|
||||
let response_headers = llm_response.headers().clone();
|
||||
let upstream_status = llm_response.status();
|
||||
|
||||
// Upstream routers (e.g. DigitalOcean Gradient) may return an
|
||||
// `x-model-router-selected-route` header indicating which task-level
|
||||
// route the request was classified into (e.g. "Code Generation"). Surface
|
||||
// it as `plano.route.name` so the obs console's Route hit % panel can
|
||||
// show the breakdown even when Plano's own orchestrator wasn't in the
|
||||
// routing path. Any value from Plano's orchestrator already set earlier
|
||||
// takes precedence — this only fires when the span doesn't already have
|
||||
// a route name.
|
||||
if let Some(upstream_route) = response_headers
|
||||
.get("x-model-router-selected-route")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
{
|
||||
if !upstream_route.is_empty() {
|
||||
get_active_span(|span| {
|
||||
span.set_attribute(opentelemetry::KeyValue::new(
|
||||
crate::tracing::plano::ROUTE_NAME,
|
||||
upstream_route.to_string(),
|
||||
));
|
||||
});
|
||||
}
|
||||
}
|
||||
// Record the upstream HTTP status on the span for the obs console.
|
||||
get_active_span(|span| {
|
||||
span.set_attribute(opentelemetry::KeyValue::new(
|
||||
crate::tracing::http::STATUS_CODE,
|
||||
upstream_status.as_u16() as i64,
|
||||
));
|
||||
});
|
||||
|
||||
let mut response = Response::builder().status(upstream_status);
|
||||
if let Some(headers) = response.headers_mut() {
|
||||
for (name, value) in response_headers.iter() {
|
||||
|
|
@ -686,7 +772,12 @@ async fn send_upstream(
|
|||
span_name,
|
||||
request_start_time,
|
||||
messages_for_signals,
|
||||
);
|
||||
)
|
||||
.with_llm_metrics(LlmMetricsCtx {
|
||||
provider: metric_provider.clone(),
|
||||
model: metric_model.clone(),
|
||||
upstream_status: upstream_status.as_u16(),
|
||||
});
|
||||
|
||||
let output_filter_request_headers = if filter_pipeline.has_output_filters() {
|
||||
Some(request_headers.clone())
|
||||
|
|
|
|||
|
|
@ -5,10 +5,24 @@ use hyper::StatusCode;
|
|||
use std::sync::Arc;
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use crate::metrics as bs_metrics;
|
||||
use crate::metrics::labels as metric_labels;
|
||||
use crate::router::orchestrator::OrchestratorService;
|
||||
use crate::streaming::truncate_message;
|
||||
use crate::tracing::routing;
|
||||
|
||||
/// Classify a request path (already stripped of `/agents` or `/routing` by
|
||||
/// the caller) into the fixed `route` label used on routing metrics.
|
||||
fn route_label_for_path(request_path: &str) -> &'static str {
|
||||
if request_path.starts_with("/agents") {
|
||||
metric_labels::ROUTE_AGENT
|
||||
} else if request_path.starts_with("/routing") {
|
||||
metric_labels::ROUTE_ROUTING
|
||||
} else {
|
||||
metric_labels::ROUTE_LLM
|
||||
}
|
||||
}
|
||||
|
||||
pub struct RoutingResult {
|
||||
/// Primary model to use (first in the ranked list).
|
||||
pub model_name: String,
|
||||
|
|
@ -106,15 +120,23 @@ pub async fn router_chat_get_upstream_model(
|
|||
)
|
||||
.await;
|
||||
|
||||
let determination_ms = routing_start_time.elapsed().as_millis() as i64;
|
||||
let determination_elapsed = routing_start_time.elapsed();
|
||||
let determination_ms = determination_elapsed.as_millis() as i64;
|
||||
let current_span = tracing::Span::current();
|
||||
current_span.record(routing::ROUTE_DETERMINATION_MS, determination_ms);
|
||||
let route_label = route_label_for_path(request_path);
|
||||
|
||||
match routing_result {
|
||||
Ok(route) => match route {
|
||||
Some((route_name, ranked_models)) => {
|
||||
let model_name = ranked_models.first().cloned().unwrap_or_default();
|
||||
current_span.record("route.selected_model", model_name.as_str());
|
||||
bs_metrics::record_router_decision(
|
||||
route_label,
|
||||
&model_name,
|
||||
false,
|
||||
determination_elapsed,
|
||||
);
|
||||
Ok(RoutingResult {
|
||||
model_name,
|
||||
models: ranked_models,
|
||||
|
|
@ -126,6 +148,12 @@ pub async fn router_chat_get_upstream_model(
|
|||
// This signals to llm.rs to use the original validated request model
|
||||
current_span.record("route.selected_model", "none");
|
||||
info!("no route determined, using default model");
|
||||
bs_metrics::record_router_decision(
|
||||
route_label,
|
||||
"none",
|
||||
true,
|
||||
determination_elapsed,
|
||||
);
|
||||
|
||||
Ok(RoutingResult {
|
||||
model_name: "none".to_string(),
|
||||
|
|
@ -136,6 +164,7 @@ pub async fn router_chat_get_upstream_model(
|
|||
},
|
||||
Err(err) => {
|
||||
current_span.record("route.selected_model", "unknown");
|
||||
bs_metrics::record_router_decision(route_label, "unknown", true, determination_elapsed);
|
||||
Err(RoutingError::internal_error(format!(
|
||||
"Failed to determine route: {}",
|
||||
err
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
pub mod agents;
|
||||
pub mod debug;
|
||||
pub mod function_calling;
|
||||
pub mod llm;
|
||||
pub mod models;
|
||||
|
|
|
|||
|
|
@ -12,6 +12,8 @@ use tracing::{debug, info, info_span, warn, Instrument};
|
|||
|
||||
use super::extract_or_generate_traceparent;
|
||||
use crate::handlers::llm::model_selection::router_chat_get_upstream_model;
|
||||
use crate::metrics as bs_metrics;
|
||||
use crate::metrics::labels as metric_labels;
|
||||
use crate::router::orchestrator::OrchestratorService;
|
||||
use crate::tracing::{collect_custom_trace_attributes, operation_component, set_service_name};
|
||||
|
||||
|
|
@ -230,6 +232,17 @@ async fn routing_decision_inner(
|
|||
pinned: false,
|
||||
};
|
||||
|
||||
// Distinguish "decision served" (a concrete model picked) from
|
||||
// "no_candidates" (the sentinel "none" returned when nothing
|
||||
// matched). The handler still responds 200 in both cases, so RED
|
||||
// metrics alone can't tell them apart.
|
||||
let outcome = if response.models.first().map(|m| m == "none").unwrap_or(true) {
|
||||
metric_labels::ROUTING_SVC_NO_CANDIDATES
|
||||
} else {
|
||||
metric_labels::ROUTING_SVC_DECISION_SERVED
|
||||
};
|
||||
bs_metrics::record_routing_service_outcome(outcome);
|
||||
|
||||
info!(
|
||||
primary_model = %response.models.first().map(|s| s.as_str()).unwrap_or("none"),
|
||||
total_models = response.models.len(),
|
||||
|
|
@ -249,6 +262,7 @@ async fn routing_decision_inner(
|
|||
.unwrap())
|
||||
}
|
||||
Err(err) => {
|
||||
bs_metrics::record_routing_service_outcome(metric_labels::ROUTING_SVC_POLICY_ERROR);
|
||||
warn!(error = %err.message, "routing decision failed");
|
||||
Ok(BrightStaffError::InternalServerError(err.message).into_response())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
pub mod app_state;
|
||||
pub mod handlers;
|
||||
pub mod metrics;
|
||||
pub mod router;
|
||||
pub mod session_cache;
|
||||
pub mod signals;
|
||||
|
|
|
|||
|
|
@ -1,10 +1,17 @@
|
|||
#[cfg(feature = "jemalloc")]
|
||||
#[global_allocator]
|
||||
static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
|
||||
|
||||
use brightstaff::app_state::AppState;
|
||||
use brightstaff::handlers::agents::orchestrator::agent_chat;
|
||||
use brightstaff::handlers::debug;
|
||||
use brightstaff::handlers::empty;
|
||||
use brightstaff::handlers::function_calling::function_calling_chat_handler;
|
||||
use brightstaff::handlers::llm::llm_chat;
|
||||
use brightstaff::handlers::models::list_models;
|
||||
use brightstaff::handlers::routing_service::routing_decision;
|
||||
use brightstaff::metrics as bs_metrics;
|
||||
use brightstaff::metrics::labels as metric_labels;
|
||||
use brightstaff::router::model_metrics::ModelMetricsService;
|
||||
use brightstaff::router::orchestrator::OrchestratorService;
|
||||
use brightstaff::session_cache::init_session_cache;
|
||||
|
|
@ -326,6 +333,8 @@ async fn init_app_state(
|
|||
.as_ref()
|
||||
.and_then(|tracing| tracing.span_attributes.clone());
|
||||
|
||||
let signals_enabled = !overrides.disable_signals.unwrap_or(false);
|
||||
|
||||
Ok(AppState {
|
||||
orchestrator_service,
|
||||
model_aliases: config.model_aliases.clone(),
|
||||
|
|
@ -337,6 +346,7 @@ async fn init_app_state(
|
|||
span_attributes,
|
||||
http_client: reqwest::Client::new(),
|
||||
filter_pipeline,
|
||||
signals_enabled,
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -384,10 +394,79 @@ async fn init_state_storage(
|
|||
// Request routing
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Normalized method label — limited set so we never emit a free-form string.
|
||||
fn method_label(method: &Method) -> &'static str {
|
||||
match *method {
|
||||
Method::GET => "GET",
|
||||
Method::POST => "POST",
|
||||
Method::PUT => "PUT",
|
||||
Method::DELETE => "DELETE",
|
||||
Method::PATCH => "PATCH",
|
||||
Method::HEAD => "HEAD",
|
||||
Method::OPTIONS => "OPTIONS",
|
||||
_ => "OTHER",
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute the fixed `handler` metric label from the request's path+method.
|
||||
/// Returning `None` for fall-through means `route()` will hand the request to
|
||||
/// the catch-all 404 branch.
|
||||
fn handler_label_for(method: &Method, path: &str) -> &'static str {
|
||||
if let Some(stripped) = path.strip_prefix("/agents") {
|
||||
if matches!(
|
||||
stripped,
|
||||
CHAT_COMPLETIONS_PATH | MESSAGES_PATH | OPENAI_RESPONSES_API_PATH
|
||||
) {
|
||||
return metric_labels::HANDLER_AGENT_CHAT;
|
||||
}
|
||||
}
|
||||
if let Some(stripped) = path.strip_prefix("/routing") {
|
||||
if matches!(
|
||||
stripped,
|
||||
CHAT_COMPLETIONS_PATH | MESSAGES_PATH | OPENAI_RESPONSES_API_PATH
|
||||
) {
|
||||
return metric_labels::HANDLER_ROUTING_DECISION;
|
||||
}
|
||||
}
|
||||
match (method, path) {
|
||||
(&Method::POST, CHAT_COMPLETIONS_PATH | MESSAGES_PATH | OPENAI_RESPONSES_API_PATH) => {
|
||||
metric_labels::HANDLER_LLM_CHAT
|
||||
}
|
||||
(&Method::POST, "/function_calling") => metric_labels::HANDLER_FUNCTION_CALLING,
|
||||
(&Method::GET, "/v1/models" | "/agents/v1/models") => metric_labels::HANDLER_LIST_MODELS,
|
||||
(&Method::OPTIONS, "/v1/models" | "/agents/v1/models") => {
|
||||
metric_labels::HANDLER_CORS_PREFLIGHT
|
||||
}
|
||||
_ => metric_labels::HANDLER_NOT_FOUND,
|
||||
}
|
||||
}
|
||||
|
||||
/// Route an incoming HTTP request to the appropriate handler.
|
||||
async fn route(
|
||||
req: Request<Incoming>,
|
||||
state: Arc<AppState>,
|
||||
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
|
||||
let handler = handler_label_for(req.method(), req.uri().path());
|
||||
let method = method_label(req.method());
|
||||
let started = std::time::Instant::now();
|
||||
let _in_flight = bs_metrics::InFlightGuard::new(handler);
|
||||
|
||||
let result = dispatch(req, state).await;
|
||||
|
||||
let status = match &result {
|
||||
Ok(resp) => resp.status().as_u16(),
|
||||
// hyper::Error here means the body couldn't be produced; conventionally 500.
|
||||
Err(_) => 500,
|
||||
};
|
||||
bs_metrics::record_http(handler, method, status, started);
|
||||
result
|
||||
}
|
||||
|
||||
/// Inner dispatcher split out so `route()` can wrap it with metrics without
|
||||
/// duplicating the match tree.
|
||||
async fn dispatch(
|
||||
req: Request<Incoming>,
|
||||
state: Arc<AppState>,
|
||||
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
|
||||
let parent_cx = global::get_text_map_propagator(|p| p.extract(&HeaderExtractor(req.headers())));
|
||||
let path = req.uri().path().to_string();
|
||||
|
|
@ -439,6 +518,7 @@ async fn route(
|
|||
Ok(list_models(Arc::clone(&state.llm_providers)).await)
|
||||
}
|
||||
(&Method::OPTIONS, "/v1/models" | "/agents/v1/models") => cors_preflight(),
|
||||
(&Method::GET, "/debug/memstats") => debug::memstats().await,
|
||||
_ => {
|
||||
debug!(method = %req.method(), path = %path, "no route found");
|
||||
let mut not_found = Response::new(empty());
|
||||
|
|
@ -503,6 +583,7 @@ async fn run_server(state: Arc<AppState>) -> Result<(), Box<dyn std::error::Erro
|
|||
async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||
let config = load_config()?;
|
||||
let _tracer_provider = init_tracer(config.tracing.as_ref());
|
||||
bs_metrics::init();
|
||||
info!("loaded plano_config.yaml");
|
||||
let state = Arc::new(init_app_state(&config).await?);
|
||||
run_server(state).await
|
||||
|
|
|
|||
38
crates/brightstaff/src/metrics/labels.rs
Normal file
38
crates/brightstaff/src/metrics/labels.rs
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
//! Fixed label-value constants so callers never emit free-form strings
|
||||
//! (which would blow up cardinality).
|
||||
|
||||
// Handler enum — derived from the path+method match in `route()`.
|
||||
pub const HANDLER_AGENT_CHAT: &str = "agent_chat";
|
||||
pub const HANDLER_ROUTING_DECISION: &str = "routing_decision";
|
||||
pub const HANDLER_LLM_CHAT: &str = "llm_chat";
|
||||
pub const HANDLER_FUNCTION_CALLING: &str = "function_calling";
|
||||
pub const HANDLER_LIST_MODELS: &str = "list_models";
|
||||
pub const HANDLER_CORS_PREFLIGHT: &str = "cors_preflight";
|
||||
pub const HANDLER_NOT_FOUND: &str = "not_found";
|
||||
|
||||
// Router "route" class — which brightstaff endpoint prompted the decision.
|
||||
pub const ROUTE_AGENT: &str = "agent";
|
||||
pub const ROUTE_ROUTING: &str = "routing";
|
||||
pub const ROUTE_LLM: &str = "llm";
|
||||
|
||||
// Token kind for brightstaff_llm_tokens_total.
|
||||
pub const TOKEN_KIND_PROMPT: &str = "prompt";
|
||||
pub const TOKEN_KIND_COMPLETION: &str = "completion";
|
||||
|
||||
// LLM error_class values (match docstring in metrics/mod.rs).
|
||||
pub const LLM_ERR_NONE: &str = "none";
|
||||
pub const LLM_ERR_TIMEOUT: &str = "timeout";
|
||||
pub const LLM_ERR_CONNECT: &str = "connect";
|
||||
pub const LLM_ERR_PARSE: &str = "parse";
|
||||
pub const LLM_ERR_OTHER: &str = "other";
|
||||
pub const LLM_ERR_STREAM: &str = "stream";
|
||||
|
||||
// Routing service outcome values.
|
||||
pub const ROUTING_SVC_DECISION_SERVED: &str = "decision_served";
|
||||
pub const ROUTING_SVC_NO_CANDIDATES: &str = "no_candidates";
|
||||
pub const ROUTING_SVC_POLICY_ERROR: &str = "policy_error";
|
||||
|
||||
// Session cache outcome values.
|
||||
pub const SESSION_CACHE_HIT: &str = "hit";
|
||||
pub const SESSION_CACHE_MISS: &str = "miss";
|
||||
pub const SESSION_CACHE_STORE: &str = "store";
|
||||
377
crates/brightstaff/src/metrics/mod.rs
Normal file
377
crates/brightstaff/src/metrics/mod.rs
Normal file
|
|
@ -0,0 +1,377 @@
|
|||
//! Prometheus metrics for brightstaff.
|
||||
//!
|
||||
//! Installs the `metrics` global recorder backed by
|
||||
//! `metrics-exporter-prometheus` and exposes a `/metrics` HTTP endpoint on a
|
||||
//! dedicated admin port (default `0.0.0.0:9092`, overridable via
|
||||
//! `METRICS_BIND_ADDRESS`).
|
||||
//!
|
||||
//! Emitted metric families (see `describe_all` for full list):
|
||||
//! - HTTP RED: `brightstaff_http_requests_total`,
|
||||
//! `brightstaff_http_request_duration_seconds`,
|
||||
//! `brightstaff_http_in_flight_requests`.
|
||||
//! - LLM upstream: `brightstaff_llm_upstream_requests_total`,
|
||||
//! `brightstaff_llm_upstream_duration_seconds`,
|
||||
//! `brightstaff_llm_time_to_first_token_seconds`,
|
||||
//! `brightstaff_llm_tokens_total`,
|
||||
//! `brightstaff_llm_tokens_usage_missing_total`.
|
||||
//! - Routing: `brightstaff_router_decisions_total`,
|
||||
//! `brightstaff_router_decision_duration_seconds`,
|
||||
//! `brightstaff_routing_service_requests_total`,
|
||||
//! `brightstaff_session_cache_events_total`.
|
||||
//! - Process: via `metrics-process`.
|
||||
//! - Build: `brightstaff_build_info`.
|
||||
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::OnceLock;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use metrics::{counter, describe_counter, describe_gauge, describe_histogram, gauge, histogram};
|
||||
use metrics_exporter_prometheus::{Matcher, PrometheusBuilder};
|
||||
use tracing::{info, warn};
|
||||
|
||||
pub mod labels;
|
||||
|
||||
/// Guard flag so tests don't re-install the global recorder.
|
||||
static INIT: OnceLock<()> = OnceLock::new();
|
||||
|
||||
const DEFAULT_METRICS_BIND: &str = "0.0.0.0:9092";
|
||||
|
||||
/// HTTP request duration buckets (seconds). Capped at 60s.
|
||||
const HTTP_BUCKETS: &[f64] = &[
|
||||
0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0, 60.0,
|
||||
];
|
||||
|
||||
/// LLM upstream / TTFT buckets (seconds). Capped at 120s because provider
|
||||
/// completions routinely run that long.
|
||||
const LLM_BUCKETS: &[f64] = &[0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0, 60.0, 120.0];
|
||||
|
||||
/// Router decision buckets (seconds). The orchestrator call itself is usually
|
||||
/// sub-second but bucketed generously in case of upstream slowness.
|
||||
const ROUTER_BUCKETS: &[f64] = &[
|
||||
0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0,
|
||||
];
|
||||
|
||||
/// Install the global recorder and spawn the `/metrics` HTTP listener.
|
||||
///
|
||||
/// Safe to call more than once; subsequent calls are no-ops so tests that
|
||||
/// construct their own recorder still work.
|
||||
pub fn init() {
|
||||
if INIT.get().is_some() {
|
||||
return;
|
||||
}
|
||||
|
||||
let bind: SocketAddr = std::env::var("METRICS_BIND_ADDRESS")
|
||||
.unwrap_or_else(|_| DEFAULT_METRICS_BIND.to_string())
|
||||
.parse()
|
||||
.unwrap_or_else(|err| {
|
||||
warn!(error = %err, default = DEFAULT_METRICS_BIND, "invalid METRICS_BIND_ADDRESS, falling back to default");
|
||||
DEFAULT_METRICS_BIND.parse().expect("default bind parses")
|
||||
});
|
||||
|
||||
let builder = PrometheusBuilder::new()
|
||||
.with_http_listener(bind)
|
||||
.set_buckets_for_metric(
|
||||
Matcher::Full("brightstaff_http_request_duration_seconds".to_string()),
|
||||
HTTP_BUCKETS,
|
||||
)
|
||||
.and_then(|b| {
|
||||
b.set_buckets_for_metric(Matcher::Prefix("brightstaff_llm_".to_string()), LLM_BUCKETS)
|
||||
})
|
||||
.and_then(|b| {
|
||||
b.set_buckets_for_metric(
|
||||
Matcher::Full("brightstaff_router_decision_duration_seconds".to_string()),
|
||||
ROUTER_BUCKETS,
|
||||
)
|
||||
});
|
||||
|
||||
let builder = match builder {
|
||||
Ok(b) => b,
|
||||
Err(err) => {
|
||||
warn!(error = %err, "failed to configure metrics buckets, using defaults");
|
||||
PrometheusBuilder::new().with_http_listener(bind)
|
||||
}
|
||||
};
|
||||
|
||||
if let Err(err) = builder.install() {
|
||||
warn!(error = %err, "failed to install Prometheus recorder; metrics disabled");
|
||||
return;
|
||||
}
|
||||
|
||||
let _ = INIT.set(());
|
||||
|
||||
describe_all();
|
||||
emit_build_info();
|
||||
|
||||
// Register process-level collector (RSS, CPU, FDs).
|
||||
let collector = metrics_process::Collector::default();
|
||||
collector.describe();
|
||||
// Prime once at startup; subsequent scrapes refresh via the exporter's
|
||||
// per-scrape render, so we additionally refresh on a short interval to
|
||||
// keep gauges moving between scrapes without requiring client pull.
|
||||
collector.collect();
|
||||
tokio::spawn(async move {
|
||||
let mut tick = tokio::time::interval(Duration::from_secs(10));
|
||||
tick.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
|
||||
loop {
|
||||
tick.tick().await;
|
||||
collector.collect();
|
||||
}
|
||||
});
|
||||
|
||||
info!(address = %bind, "metrics listener started");
|
||||
}
|
||||
|
||||
fn describe_all() {
|
||||
describe_counter!(
|
||||
"brightstaff_http_requests_total",
|
||||
"Total HTTP requests served by brightstaff, by handler and status class."
|
||||
);
|
||||
describe_histogram!(
|
||||
"brightstaff_http_request_duration_seconds",
|
||||
"Wall-clock duration of HTTP requests served by brightstaff, by handler."
|
||||
);
|
||||
describe_gauge!(
|
||||
"brightstaff_http_in_flight_requests",
|
||||
"Number of HTTP requests currently being served by brightstaff, by handler."
|
||||
);
|
||||
|
||||
describe_counter!(
|
||||
"brightstaff_llm_upstream_requests_total",
|
||||
"LLM upstream request outcomes, by provider, model, status class and error class."
|
||||
);
|
||||
describe_histogram!(
|
||||
"brightstaff_llm_upstream_duration_seconds",
|
||||
"Wall-clock duration of LLM upstream calls (stream close for streaming), by provider and model."
|
||||
);
|
||||
describe_histogram!(
|
||||
"brightstaff_llm_time_to_first_token_seconds",
|
||||
"Time from request start to first streamed byte, by provider and model (streaming only)."
|
||||
);
|
||||
describe_counter!(
|
||||
"brightstaff_llm_tokens_total",
|
||||
"Tokens reported in the provider `usage` field, by provider, model and kind (prompt/completion)."
|
||||
);
|
||||
describe_counter!(
|
||||
"brightstaff_llm_tokens_usage_missing_total",
|
||||
"LLM responses that completed without a usable `usage` block (so token counts are unknown)."
|
||||
);
|
||||
|
||||
describe_counter!(
|
||||
"brightstaff_router_decisions_total",
|
||||
"Routing decisions made by the orchestrator, by route, selected model, and whether a fallback was used."
|
||||
);
|
||||
describe_histogram!(
|
||||
"brightstaff_router_decision_duration_seconds",
|
||||
"Time spent in the orchestrator deciding a route, by route."
|
||||
);
|
||||
describe_counter!(
|
||||
"brightstaff_routing_service_requests_total",
|
||||
"Outcomes of /routing/* decision requests: decision_served, no_candidates, policy_error."
|
||||
);
|
||||
describe_counter!(
|
||||
"brightstaff_session_cache_events_total",
|
||||
"Session affinity cache lookups and stores, by outcome."
|
||||
);
|
||||
|
||||
describe_gauge!(
|
||||
"brightstaff_build_info",
|
||||
"Build metadata. Always 1; labels carry version and git SHA."
|
||||
);
|
||||
}
|
||||
|
||||
fn emit_build_info() {
|
||||
let version = env!("CARGO_PKG_VERSION");
|
||||
let git_sha = option_env!("GIT_SHA").unwrap_or("unknown");
|
||||
gauge!(
|
||||
"brightstaff_build_info",
|
||||
"version" => version.to_string(),
|
||||
"git_sha" => git_sha.to_string(),
|
||||
)
|
||||
.set(1.0);
|
||||
}
|
||||
|
||||
/// Split a provider-qualified model id like `"openai/gpt-4o"` into
|
||||
/// `(provider, model)`. Returns `("unknown", raw)` when there is no `/`.
|
||||
pub fn split_provider_model(full: &str) -> (&str, &str) {
|
||||
match full.split_once('/') {
|
||||
Some((p, m)) => (p, m),
|
||||
None => ("unknown", full),
|
||||
}
|
||||
}
|
||||
|
||||
/// Bucket an HTTP status code into `"2xx"` / `"4xx"` / `"5xx"` / `"1xx"` / `"3xx"`.
|
||||
pub fn status_class(status: u16) -> &'static str {
|
||||
match status {
|
||||
100..=199 => "1xx",
|
||||
200..=299 => "2xx",
|
||||
300..=399 => "3xx",
|
||||
400..=499 => "4xx",
|
||||
500..=599 => "5xx",
|
||||
_ => "other",
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// HTTP RED helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// RAII guard that increments the in-flight gauge on construction and
|
||||
/// decrements on drop. Pair with [`HttpTimer`] in the `route()` wrapper so the
|
||||
/// gauge drops even on error paths.
|
||||
pub struct InFlightGuard {
|
||||
handler: &'static str,
|
||||
}
|
||||
|
||||
impl InFlightGuard {
|
||||
pub fn new(handler: &'static str) -> Self {
|
||||
gauge!(
|
||||
"brightstaff_http_in_flight_requests",
|
||||
"handler" => handler,
|
||||
)
|
||||
.increment(1.0);
|
||||
Self { handler }
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for InFlightGuard {
|
||||
fn drop(&mut self) {
|
||||
gauge!(
|
||||
"brightstaff_http_in_flight_requests",
|
||||
"handler" => self.handler,
|
||||
)
|
||||
.decrement(1.0);
|
||||
}
|
||||
}
|
||||
|
||||
/// Record the HTTP request counter + duration histogram.
|
||||
pub fn record_http(handler: &'static str, method: &'static str, status: u16, started: Instant) {
|
||||
let class = status_class(status);
|
||||
counter!(
|
||||
"brightstaff_http_requests_total",
|
||||
"handler" => handler,
|
||||
"method" => method,
|
||||
"status_class" => class,
|
||||
)
|
||||
.increment(1);
|
||||
histogram!(
|
||||
"brightstaff_http_request_duration_seconds",
|
||||
"handler" => handler,
|
||||
)
|
||||
.record(started.elapsed().as_secs_f64());
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// LLM upstream helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Classify an outcome of an LLM upstream call for the `error_class` label.
|
||||
pub fn llm_error_class_from_reqwest(err: &reqwest::Error) -> &'static str {
|
||||
if err.is_timeout() {
|
||||
"timeout"
|
||||
} else if err.is_connect() {
|
||||
"connect"
|
||||
} else if err.is_decode() {
|
||||
"parse"
|
||||
} else {
|
||||
"other"
|
||||
}
|
||||
}
|
||||
|
||||
/// Record the outcome of an LLM upstream call. `status` is the HTTP status
|
||||
/// the upstream returned (0 if the call never produced one, e.g. send failure).
|
||||
/// `error_class` is `"none"` on success, or a discriminated error label.
|
||||
pub fn record_llm_upstream(
|
||||
provider: &str,
|
||||
model: &str,
|
||||
status: u16,
|
||||
error_class: &str,
|
||||
duration: Duration,
|
||||
) {
|
||||
let class = if status == 0 {
|
||||
"error"
|
||||
} else {
|
||||
status_class(status)
|
||||
};
|
||||
counter!(
|
||||
"brightstaff_llm_upstream_requests_total",
|
||||
"provider" => provider.to_string(),
|
||||
"model" => model.to_string(),
|
||||
"status_class" => class,
|
||||
"error_class" => error_class.to_string(),
|
||||
)
|
||||
.increment(1);
|
||||
histogram!(
|
||||
"brightstaff_llm_upstream_duration_seconds",
|
||||
"provider" => provider.to_string(),
|
||||
"model" => model.to_string(),
|
||||
)
|
||||
.record(duration.as_secs_f64());
|
||||
}
|
||||
|
||||
pub fn record_llm_ttft(provider: &str, model: &str, ttft: Duration) {
|
||||
histogram!(
|
||||
"brightstaff_llm_time_to_first_token_seconds",
|
||||
"provider" => provider.to_string(),
|
||||
"model" => model.to_string(),
|
||||
)
|
||||
.record(ttft.as_secs_f64());
|
||||
}
|
||||
|
||||
pub fn record_llm_tokens(provider: &str, model: &str, kind: &'static str, count: u64) {
|
||||
counter!(
|
||||
"brightstaff_llm_tokens_total",
|
||||
"provider" => provider.to_string(),
|
||||
"model" => model.to_string(),
|
||||
"kind" => kind,
|
||||
)
|
||||
.increment(count);
|
||||
}
|
||||
|
||||
pub fn record_llm_tokens_usage_missing(provider: &str, model: &str) {
|
||||
counter!(
|
||||
"brightstaff_llm_tokens_usage_missing_total",
|
||||
"provider" => provider.to_string(),
|
||||
"model" => model.to_string(),
|
||||
)
|
||||
.increment(1);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Router helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
pub fn record_router_decision(
|
||||
route: &'static str,
|
||||
selected_model: &str,
|
||||
fallback: bool,
|
||||
duration: Duration,
|
||||
) {
|
||||
counter!(
|
||||
"brightstaff_router_decisions_total",
|
||||
"route" => route,
|
||||
"selected_model" => selected_model.to_string(),
|
||||
"fallback" => if fallback { "true" } else { "false" },
|
||||
)
|
||||
.increment(1);
|
||||
histogram!(
|
||||
"brightstaff_router_decision_duration_seconds",
|
||||
"route" => route,
|
||||
)
|
||||
.record(duration.as_secs_f64());
|
||||
}
|
||||
|
||||
pub fn record_routing_service_outcome(outcome: &'static str) {
|
||||
counter!(
|
||||
"brightstaff_routing_service_requests_total",
|
||||
"outcome" => outcome,
|
||||
)
|
||||
.increment(1);
|
||||
}
|
||||
|
||||
pub fn record_session_cache_event(outcome: &'static str) {
|
||||
counter!(
|
||||
"brightstaff_session_cache_events_total",
|
||||
"outcome" => outcome,
|
||||
)
|
||||
.increment(1);
|
||||
}
|
||||
|
|
@ -1,8 +1,14 @@
|
|||
use hermesllm::apis::openai::ChatCompletionsResponse;
|
||||
use hyper::header;
|
||||
use serde::Deserialize;
|
||||
use thiserror::Error;
|
||||
use tracing::warn;
|
||||
|
||||
/// Max bytes of raw upstream body we include in a log message or error text
|
||||
/// when the body is not a recognizable error envelope. Keeps logs from being
|
||||
/// flooded by huge HTML error pages.
|
||||
const RAW_BODY_LOG_LIMIT: usize = 512;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum HttpError {
|
||||
#[error("Failed to send request: {0}")]
|
||||
|
|
@ -10,13 +16,64 @@ pub enum HttpError {
|
|||
|
||||
#[error("Failed to parse JSON response: {0}")]
|
||||
Json(serde_json::Error, String),
|
||||
|
||||
#[error("Upstream returned {status}: {message}")]
|
||||
Upstream { status: u16, message: String },
|
||||
}
|
||||
|
||||
/// Shape of an OpenAI-style error response body, e.g.
|
||||
/// `{"error": {"message": "...", "type": "...", "param": "...", "code": ...}}`.
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct UpstreamErrorEnvelope {
|
||||
error: UpstreamErrorBody,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct UpstreamErrorBody {
|
||||
message: String,
|
||||
#[serde(default, rename = "type")]
|
||||
err_type: Option<String>,
|
||||
#[serde(default)]
|
||||
param: Option<String>,
|
||||
}
|
||||
|
||||
/// Extract a human-readable error message from an upstream response body.
|
||||
/// Tries to parse an OpenAI-style `{"error": {"message": ...}}` envelope; if
|
||||
/// that fails, falls back to the first `RAW_BODY_LOG_LIMIT` bytes of the raw
|
||||
/// body (UTF-8 safe).
|
||||
fn extract_upstream_error_message(body: &str) -> String {
|
||||
if let Ok(env) = serde_json::from_str::<UpstreamErrorEnvelope>(body) {
|
||||
let mut msg = env.error.message;
|
||||
if let Some(param) = env.error.param {
|
||||
msg.push_str(&format!(" (param={param})"));
|
||||
}
|
||||
if let Some(err_type) = env.error.err_type {
|
||||
msg.push_str(&format!(" [type={err_type}]"));
|
||||
}
|
||||
return msg;
|
||||
}
|
||||
truncate_for_log(body).to_string()
|
||||
}
|
||||
|
||||
fn truncate_for_log(s: &str) -> &str {
|
||||
if s.len() <= RAW_BODY_LOG_LIMIT {
|
||||
return s;
|
||||
}
|
||||
let mut end = RAW_BODY_LOG_LIMIT;
|
||||
while end > 0 && !s.is_char_boundary(end) {
|
||||
end -= 1;
|
||||
}
|
||||
&s[..end]
|
||||
}
|
||||
|
||||
/// Sends a POST request to the given URL and extracts the text content
|
||||
/// from the first choice of the `ChatCompletionsResponse`.
|
||||
///
|
||||
/// Returns `Some((content, elapsed))` on success, or `None` if the response
|
||||
/// had no choices or the first choice had no content.
|
||||
/// Returns `Some((content, elapsed))` on success, `None` if the response
|
||||
/// had no choices or the first choice had no content. Returns
|
||||
/// `HttpError::Upstream` for any non-2xx status, carrying a message
|
||||
/// extracted from the OpenAI-style error envelope (or a truncated raw body
|
||||
/// if the body is not in that shape).
|
||||
pub async fn post_and_extract_content(
|
||||
client: &reqwest::Client,
|
||||
url: &str,
|
||||
|
|
@ -26,17 +83,36 @@ pub async fn post_and_extract_content(
|
|||
let start_time = std::time::Instant::now();
|
||||
|
||||
let res = client.post(url).headers(headers).body(body).send().await?;
|
||||
let status = res.status();
|
||||
|
||||
let body = res.text().await?;
|
||||
let elapsed = start_time.elapsed();
|
||||
|
||||
if !status.is_success() {
|
||||
let message = extract_upstream_error_message(&body);
|
||||
warn!(
|
||||
status = status.as_u16(),
|
||||
message = %message,
|
||||
body_size = body.len(),
|
||||
"upstream returned error response"
|
||||
);
|
||||
return Err(HttpError::Upstream {
|
||||
status: status.as_u16(),
|
||||
message,
|
||||
});
|
||||
}
|
||||
|
||||
let response: ChatCompletionsResponse = serde_json::from_str(&body).map_err(|err| {
|
||||
warn!(error = %err, body = %body, "failed to parse json response");
|
||||
warn!(
|
||||
error = %err,
|
||||
body = %truncate_for_log(&body),
|
||||
"failed to parse json response",
|
||||
);
|
||||
HttpError::Json(err, format!("Failed to parse JSON: {}", body))
|
||||
})?;
|
||||
|
||||
if response.choices.is_empty() {
|
||||
warn!(body = %body, "no choices in response");
|
||||
warn!(body = %truncate_for_log(&body), "no choices in response");
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
|
|
@ -46,3 +122,52 @@ pub async fn post_and_extract_content(
|
|||
.as_ref()
|
||||
.map(|c| (c.clone(), elapsed)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn extracts_message_from_openai_style_error_envelope() {
|
||||
let body = r#"{"error":{"code":400,"message":"This model's maximum context length is 32768 tokens. However, you requested 0 output tokens and your prompt contains at least 32769 input tokens, for a total of at least 32769 tokens.","param":"input_tokens","type":"BadRequestError"}}"#;
|
||||
let msg = extract_upstream_error_message(body);
|
||||
assert!(
|
||||
msg.starts_with("This model's maximum context length is 32768 tokens."),
|
||||
"unexpected message: {msg}"
|
||||
);
|
||||
assert!(msg.contains("(param=input_tokens)"));
|
||||
assert!(msg.contains("[type=BadRequestError]"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extracts_message_without_optional_fields() {
|
||||
let body = r#"{"error":{"message":"something broke"}}"#;
|
||||
let msg = extract_upstream_error_message(body);
|
||||
assert_eq!(msg, "something broke");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn falls_back_to_raw_body_when_not_error_envelope() {
|
||||
let body = "<html><body>502 Bad Gateway</body></html>";
|
||||
let msg = extract_upstream_error_message(body);
|
||||
assert_eq!(msg, body);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncates_non_envelope_bodies_in_logs() {
|
||||
let body = "x".repeat(RAW_BODY_LOG_LIMIT * 3);
|
||||
let msg = extract_upstream_error_message(&body);
|
||||
assert_eq!(msg.len(), RAW_BODY_LOG_LIMIT);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_for_log_respects_utf8_boundaries() {
|
||||
// 2-byte characters; picking a length that would split mid-char.
|
||||
let body = "é".repeat(RAW_BODY_LOG_LIMIT);
|
||||
let out = truncate_for_log(&body);
|
||||
// Should be a valid &str (implicit — would panic if we returned
|
||||
// a non-boundary slice) and at most RAW_BODY_LOG_LIMIT bytes.
|
||||
assert!(out.len() <= RAW_BODY_LOG_LIMIT);
|
||||
assert!(out.chars().all(|c| c == 'é'));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,3 +3,5 @@ pub mod model_metrics;
|
|||
pub mod orchestrator;
|
||||
pub mod orchestrator_model;
|
||||
pub mod orchestrator_model_v1;
|
||||
#[cfg(test)]
|
||||
mod stress_tests;
|
||||
|
|
|
|||
|
|
@ -15,6 +15,8 @@ use super::http::{self, post_and_extract_content};
|
|||
use super::model_metrics::ModelMetricsService;
|
||||
use super::orchestrator_model::OrchestratorModel;
|
||||
|
||||
use crate::metrics as bs_metrics;
|
||||
use crate::metrics::labels as metric_labels;
|
||||
use crate::router::orchestrator_model_v1;
|
||||
use crate::session_cache::SessionCache;
|
||||
|
||||
|
|
@ -130,7 +132,13 @@ impl OrchestratorService {
|
|||
tenant_id: Option<&str>,
|
||||
) -> Option<CachedRoute> {
|
||||
let cache = self.session_cache.as_ref()?;
|
||||
cache.get(&Self::session_key(tenant_id, session_id)).await
|
||||
let result = cache.get(&Self::session_key(tenant_id, session_id)).await;
|
||||
bs_metrics::record_session_cache_event(if result.is_some() {
|
||||
metric_labels::SESSION_CACHE_HIT
|
||||
} else {
|
||||
metric_labels::SESSION_CACHE_MISS
|
||||
});
|
||||
result
|
||||
}
|
||||
|
||||
pub async fn cache_route(
|
||||
|
|
@ -151,6 +159,7 @@ impl OrchestratorService {
|
|||
self.session_ttl,
|
||||
)
|
||||
.await;
|
||||
bs_metrics::record_session_cache_event(metric_labels::SESSION_CACHE_STORE);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -10,6 +10,18 @@ use super::orchestrator_model::{OrchestratorModel, OrchestratorModelError};
|
|||
|
||||
pub const MAX_TOKEN_LEN: usize = 8192; // Default max token length for the orchestration model
|
||||
|
||||
/// Hard cap on the number of recent messages considered when building the
|
||||
/// routing prompt. Bounds prompt growth for long-running conversations and
|
||||
/// acts as an outer guardrail before the token-budget loop runs. The most
|
||||
/// recent `MAX_ROUTING_TURNS` filtered messages are kept; older turns are
|
||||
/// dropped entirely.
|
||||
pub const MAX_ROUTING_TURNS: usize = 16;
|
||||
|
||||
/// Unicode ellipsis used to mark where content was trimmed out of a long
|
||||
/// message. Helps signal to the downstream router model that the message was
|
||||
/// truncated.
|
||||
const TRIM_MARKER: &str = "…";
|
||||
|
||||
/// Custom JSON formatter that produces spaced JSON (space after colons and commas), same as JSON in python
|
||||
struct SpacedJsonFormatter;
|
||||
|
||||
|
|
@ -176,10 +188,9 @@ impl OrchestratorModel for OrchestratorModelV1 {
|
|||
messages: &[Message],
|
||||
usage_preferences_from_request: &Option<Vec<AgentUsagePreference>>,
|
||||
) -> ChatCompletionsRequest {
|
||||
// remove system prompt, tool calls, tool call response and messages without content
|
||||
// if content is empty its likely a tool call
|
||||
// when role == tool its tool call response
|
||||
let messages_vec = messages
|
||||
// Remove system/developer/tool messages and messages without extractable
|
||||
// text (tool calls have no text content we can classify against).
|
||||
let filtered: Vec<&Message> = messages
|
||||
.iter()
|
||||
.filter(|m| {
|
||||
m.role != Role::System
|
||||
|
|
@ -187,37 +198,72 @@ impl OrchestratorModel for OrchestratorModelV1 {
|
|||
&& m.role != Role::Tool
|
||||
&& !m.content.extract_text().is_empty()
|
||||
})
|
||||
.collect::<Vec<&Message>>();
|
||||
.collect();
|
||||
|
||||
// Following code is to ensure that the conversation does not exceed max token length
|
||||
// Note: we use a simple heuristic to estimate token count based on character length to optimize for performance
|
||||
// Outer guardrail: only consider the last `MAX_ROUTING_TURNS` filtered
|
||||
// messages when building the routing prompt. Keeps prompt growth
|
||||
// predictable for long conversations regardless of per-message size.
|
||||
let start = filtered.len().saturating_sub(MAX_ROUTING_TURNS);
|
||||
let messages_vec: &[&Message] = &filtered[start..];
|
||||
|
||||
// Ensure the conversation does not exceed the configured token budget.
|
||||
// We use `len() / TOKEN_LENGTH_DIVISOR` as a cheap token estimate to
|
||||
// avoid running a real tokenizer on the hot path.
|
||||
let mut token_count = ARCH_ORCHESTRATOR_V1_SYSTEM_PROMPT.len() / TOKEN_LENGTH_DIVISOR;
|
||||
let mut selected_messages_list_reversed: Vec<&Message> = vec![];
|
||||
let mut selected_messages_list_reversed: Vec<Message> = vec![];
|
||||
for (selected_messsage_count, message) in messages_vec.iter().rev().enumerate() {
|
||||
let message_token_count = message.content.extract_text().len() / TOKEN_LENGTH_DIVISOR;
|
||||
token_count += message_token_count;
|
||||
if token_count > self.max_token_length {
|
||||
let message_text = message.content.extract_text();
|
||||
let message_token_count = message_text.len() / TOKEN_LENGTH_DIVISOR;
|
||||
if token_count + message_token_count > self.max_token_length {
|
||||
let remaining_tokens = self.max_token_length.saturating_sub(token_count);
|
||||
debug!(
|
||||
token_count = token_count,
|
||||
attempted_total_tokens = token_count + message_token_count,
|
||||
max_tokens = self.max_token_length,
|
||||
remaining_tokens,
|
||||
selected = selected_messsage_count,
|
||||
total = messages_vec.len(),
|
||||
"token count exceeds max, truncating conversation"
|
||||
);
|
||||
if message.role == Role::User {
|
||||
// If message that exceeds max token length is from user, we need to keep it
|
||||
selected_messages_list_reversed.push(message);
|
||||
// If the overflow message is from the user we need to keep
|
||||
// some of it so the orchestrator still sees the latest user
|
||||
// intent. Use a middle-trim (head + ellipsis + tail): users
|
||||
// often frame the task at the start AND put the actual ask
|
||||
// at the end of a long pasted block, so preserving both is
|
||||
// better than a head-only cut. The ellipsis also signals to
|
||||
// the router model that content was dropped.
|
||||
if message.role == Role::User && remaining_tokens > 0 {
|
||||
let max_bytes = remaining_tokens.saturating_mul(TOKEN_LENGTH_DIVISOR);
|
||||
let truncated = trim_middle_utf8(&message_text, max_bytes);
|
||||
selected_messages_list_reversed.push(Message {
|
||||
role: Role::User,
|
||||
content: Some(MessageContent::Text(truncated)),
|
||||
name: None,
|
||||
tool_calls: None,
|
||||
tool_call_id: None,
|
||||
});
|
||||
}
|
||||
break;
|
||||
}
|
||||
// If we are here, it means that the message is within the max token length
|
||||
selected_messages_list_reversed.push(message);
|
||||
token_count += message_token_count;
|
||||
selected_messages_list_reversed.push(Message {
|
||||
role: message.role.clone(),
|
||||
content: Some(MessageContent::Text(message_text)),
|
||||
name: None,
|
||||
tool_calls: None,
|
||||
tool_call_id: None,
|
||||
});
|
||||
}
|
||||
|
||||
if selected_messages_list_reversed.is_empty() {
|
||||
debug!("no messages selected, using last message");
|
||||
if let Some(last_message) = messages_vec.last() {
|
||||
selected_messages_list_reversed.push(last_message);
|
||||
selected_messages_list_reversed.push(Message {
|
||||
role: last_message.role.clone(),
|
||||
content: Some(MessageContent::Text(last_message.content.extract_text())),
|
||||
name: None,
|
||||
tool_calls: None,
|
||||
tool_call_id: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -237,22 +283,8 @@ impl OrchestratorModel for OrchestratorModelV1 {
|
|||
}
|
||||
|
||||
// Reverse the selected messages to maintain the conversation order
|
||||
let selected_conversation_list = selected_messages_list_reversed
|
||||
.iter()
|
||||
.rev()
|
||||
.map(|message| Message {
|
||||
role: message.role.clone(),
|
||||
content: Some(MessageContent::Text(
|
||||
message
|
||||
.content
|
||||
.as_ref()
|
||||
.map_or(String::new(), |c| c.to_string()),
|
||||
)),
|
||||
name: None,
|
||||
tool_calls: None,
|
||||
tool_call_id: None,
|
||||
})
|
||||
.collect::<Vec<Message>>();
|
||||
let selected_conversation_list: Vec<Message> =
|
||||
selected_messages_list_reversed.into_iter().rev().collect();
|
||||
|
||||
// Generate the orchestrator request message based on the usage preferences.
|
||||
// If preferences are passed in request then we use them;
|
||||
|
|
@ -405,6 +437,45 @@ fn fix_json_response(body: &str) -> String {
|
|||
body.replace("'", "\"").replace("\\n", "")
|
||||
}
|
||||
|
||||
/// Truncate `s` so the result is at most `max_bytes` bytes long, keeping
|
||||
/// roughly 60% from the start and 40% from the end, with a Unicode ellipsis
|
||||
/// separating the two. All splits respect UTF-8 character boundaries. When
|
||||
/// `max_bytes` is too small to fit the marker at all, falls back to a
|
||||
/// head-only truncation.
|
||||
fn trim_middle_utf8(s: &str, max_bytes: usize) -> String {
|
||||
if s.len() <= max_bytes {
|
||||
return s.to_string();
|
||||
}
|
||||
if max_bytes <= TRIM_MARKER.len() {
|
||||
// Not enough room even for the marker — just keep the start.
|
||||
let mut end = max_bytes;
|
||||
while end > 0 && !s.is_char_boundary(end) {
|
||||
end -= 1;
|
||||
}
|
||||
return s[..end].to_string();
|
||||
}
|
||||
|
||||
let available = max_bytes - TRIM_MARKER.len();
|
||||
// Bias toward the start (60%) where task framing typically lives, while
|
||||
// still preserving ~40% of the tail where the user's actual ask often
|
||||
// appears after a long paste.
|
||||
let mut start_len = available * 3 / 5;
|
||||
while start_len > 0 && !s.is_char_boundary(start_len) {
|
||||
start_len -= 1;
|
||||
}
|
||||
let end_len = available - start_len;
|
||||
let mut end_start = s.len().saturating_sub(end_len);
|
||||
while end_start < s.len() && !s.is_char_boundary(end_start) {
|
||||
end_start += 1;
|
||||
}
|
||||
|
||||
let mut out = String::with_capacity(start_len + TRIM_MARKER.len() + (s.len() - end_start));
|
||||
out.push_str(&s[..start_len]);
|
||||
out.push_str(TRIM_MARKER);
|
||||
out.push_str(&s[end_start..]);
|
||||
out
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for dyn OrchestratorModel {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "OrchestratorModel")
|
||||
|
|
@ -777,6 +848,10 @@ If no routes are needed, return an empty list for `route`.
|
|||
|
||||
#[test]
|
||||
fn test_conversation_trim_upto_user_message() {
|
||||
// With max_token_length=230, the older user message "given the image
|
||||
// In style of Andy Warhol" overflows the remaining budget and gets
|
||||
// middle-trimmed (head + ellipsis + tail) until it fits. Newer turns
|
||||
// are kept in full.
|
||||
let expected_prompt = r#"
|
||||
You are a helpful assistant that selects the most suitable routes based on user intent.
|
||||
You are provided with a list of available routes enclosed within <routes></routes> XML tags:
|
||||
|
|
@ -789,7 +864,7 @@ You are also given the conversation context enclosed within <conversation></conv
|
|||
[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "given the image In style of Andy Warhol"
|
||||
"content": "given…rhol"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
|
|
@ -862,6 +937,190 @@ If no routes are needed, return an empty list for `route`.
|
|||
assert_eq!(expected_prompt, prompt);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_huge_single_user_message_is_middle_trimmed() {
|
||||
// Regression test for the case where a single, extremely large user
|
||||
// message was being passed to the orchestrator verbatim and blowing
|
||||
// past the upstream model's context window. The trimmer must now
|
||||
// middle-trim (head + ellipsis + tail) the oversized message so the
|
||||
// resulting request stays within the configured budget, and the
|
||||
// trim marker must be present so the router model knows content
|
||||
// was dropped.
|
||||
let orchestrations_str = r#"
|
||||
{
|
||||
"gpt-4o": [
|
||||
{"name": "Image generation", "description": "generating image"}
|
||||
]
|
||||
}
|
||||
"#;
|
||||
let agent_orchestrations = serde_json::from_str::<
|
||||
HashMap<String, Vec<OrchestrationPreference>>,
|
||||
>(orchestrations_str)
|
||||
.unwrap();
|
||||
|
||||
let max_token_length = 2048;
|
||||
let orchestrator = OrchestratorModelV1::new(
|
||||
agent_orchestrations,
|
||||
"test-model".to_string(),
|
||||
max_token_length,
|
||||
);
|
||||
|
||||
// ~500KB of content — same scale as the real payload that triggered
|
||||
// the production upstream 400.
|
||||
let head = "HEAD_MARKER_START ";
|
||||
let tail = " TAIL_MARKER_END";
|
||||
let filler = "A".repeat(500_000);
|
||||
let huge_user_content = format!("{head}{filler}{tail}");
|
||||
|
||||
let conversation = vec![Message {
|
||||
role: Role::User,
|
||||
content: Some(MessageContent::Text(huge_user_content.clone())),
|
||||
name: None,
|
||||
tool_calls: None,
|
||||
tool_call_id: None,
|
||||
}];
|
||||
|
||||
let req = orchestrator.generate_request(&conversation, &None);
|
||||
let prompt = req.messages[0].content.extract_text();
|
||||
|
||||
// Prompt must stay bounded. Generous ceiling = budget-in-bytes +
|
||||
// scaffolding + slack. Real result should be well under this.
|
||||
let byte_ceiling = max_token_length * TOKEN_LENGTH_DIVISOR
|
||||
+ ARCH_ORCHESTRATOR_V1_SYSTEM_PROMPT.len()
|
||||
+ 1024;
|
||||
assert!(
|
||||
prompt.len() < byte_ceiling,
|
||||
"prompt length {} exceeded ceiling {} — truncation did not apply",
|
||||
prompt.len(),
|
||||
byte_ceiling,
|
||||
);
|
||||
|
||||
// Not all 500k filler chars survive.
|
||||
let a_count = prompt.chars().filter(|c| *c == 'A').count();
|
||||
assert!(
|
||||
a_count < filler.len(),
|
||||
"expected user message to be truncated; all {} 'A's survived",
|
||||
a_count
|
||||
);
|
||||
assert!(
|
||||
a_count > 0,
|
||||
"expected some of the user message to survive truncation"
|
||||
);
|
||||
|
||||
// Head and tail of the message must both be preserved (that's the
|
||||
// whole point of middle-trim over head-only).
|
||||
assert!(
|
||||
prompt.contains(head),
|
||||
"head marker missing — head was not preserved"
|
||||
);
|
||||
assert!(
|
||||
prompt.contains(tail),
|
||||
"tail marker missing — tail was not preserved"
|
||||
);
|
||||
|
||||
// Trim marker must be present so the router model can see that
|
||||
// content was omitted.
|
||||
assert!(
|
||||
prompt.contains(TRIM_MARKER),
|
||||
"ellipsis trim marker missing from truncated prompt"
|
||||
);
|
||||
|
||||
// Routing prompt scaffolding remains intact.
|
||||
assert!(prompt.contains("<conversation>"));
|
||||
assert!(prompt.contains("<routes>"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_turn_cap_limits_routing_history() {
|
||||
// The outer turn-cap guardrail should keep only the last
|
||||
// `MAX_ROUTING_TURNS` filtered messages regardless of how long the
|
||||
// conversation is. We build a conversation with alternating
|
||||
// user/assistant turns tagged with their index and verify that only
|
||||
// the tail of the conversation makes it into the prompt.
|
||||
let orchestrations_str = r#"
|
||||
{
|
||||
"gpt-4o": [
|
||||
{"name": "Image generation", "description": "generating image"}
|
||||
]
|
||||
}
|
||||
"#;
|
||||
let agent_orchestrations = serde_json::from_str::<
|
||||
HashMap<String, Vec<OrchestrationPreference>>,
|
||||
>(orchestrations_str)
|
||||
.unwrap();
|
||||
|
||||
let orchestrator =
|
||||
OrchestratorModelV1::new(agent_orchestrations, "test-model".to_string(), usize::MAX);
|
||||
|
||||
let mut conversation: Vec<Message> = Vec::new();
|
||||
let total_turns = MAX_ROUTING_TURNS * 2; // well past the cap
|
||||
for i in 0..total_turns {
|
||||
let role = if i % 2 == 0 {
|
||||
Role::User
|
||||
} else {
|
||||
Role::Assistant
|
||||
};
|
||||
conversation.push(Message {
|
||||
role,
|
||||
content: Some(MessageContent::Text(format!("turn-{i:03}"))),
|
||||
name: None,
|
||||
tool_calls: None,
|
||||
tool_call_id: None,
|
||||
});
|
||||
}
|
||||
|
||||
let req = orchestrator.generate_request(&conversation, &None);
|
||||
let prompt = req.messages[0].content.extract_text();
|
||||
|
||||
// The last MAX_ROUTING_TURNS messages (indexes total-cap..total)
|
||||
// must all appear.
|
||||
for i in (total_turns - MAX_ROUTING_TURNS)..total_turns {
|
||||
let tag = format!("turn-{i:03}");
|
||||
assert!(
|
||||
prompt.contains(&tag),
|
||||
"expected recent turn tag {tag} to be present"
|
||||
);
|
||||
}
|
||||
|
||||
// And earlier turns (indexes 0..total-cap) must all be dropped.
|
||||
for i in 0..(total_turns - MAX_ROUTING_TURNS) {
|
||||
let tag = format!("turn-{i:03}");
|
||||
assert!(
|
||||
!prompt.contains(&tag),
|
||||
"old turn tag {tag} leaked past turn cap into the prompt"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_trim_middle_utf8_helper() {
|
||||
// No-op when already small enough.
|
||||
assert_eq!(trim_middle_utf8("hello", 100), "hello");
|
||||
assert_eq!(trim_middle_utf8("hello", 5), "hello");
|
||||
|
||||
// 60/40 split with ellipsis when too long.
|
||||
let long = "a".repeat(20);
|
||||
let out = trim_middle_utf8(&long, 10);
|
||||
assert!(out.len() <= 10);
|
||||
assert!(out.contains(TRIM_MARKER));
|
||||
// Exactly one ellipsis, rest are 'a's.
|
||||
assert_eq!(out.matches(TRIM_MARKER).count(), 1);
|
||||
assert!(out.chars().filter(|c| *c == 'a').count() > 0);
|
||||
|
||||
// When max_bytes is smaller than the marker, falls back to
|
||||
// head-only truncation (no marker).
|
||||
let out = trim_middle_utf8("abcdefgh", 2);
|
||||
assert_eq!(out, "ab");
|
||||
|
||||
// UTF-8 boundary safety: 2-byte chars.
|
||||
let s = "é".repeat(50); // 100 bytes
|
||||
let out = trim_middle_utf8(&s, 25);
|
||||
assert!(out.len() <= 25);
|
||||
// Must still be valid UTF-8 that only contains 'é' and the marker.
|
||||
let ok = out.chars().all(|c| c == 'é' || c == '…');
|
||||
assert!(ok, "unexpected char in trimmed output: {out:?}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_non_text_input() {
|
||||
let expected_prompt = r#"
|
||||
|
|
|
|||
264
crates/brightstaff/src/router/stress_tests.rs
Normal file
264
crates/brightstaff/src/router/stress_tests.rs
Normal file
|
|
@ -0,0 +1,264 @@
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::router::orchestrator::OrchestratorService;
|
||||
use crate::session_cache::memory::MemorySessionCache;
|
||||
use common::configuration::{SelectionPolicy, SelectionPreference, TopLevelRoutingPreference};
|
||||
use hermesllm::apis::openai::{Message, MessageContent, Role};
|
||||
use std::sync::Arc;
|
||||
|
||||
fn make_messages(n: usize) -> Vec<Message> {
|
||||
(0..n)
|
||||
.map(|i| Message {
|
||||
role: if i % 2 == 0 {
|
||||
Role::User
|
||||
} else {
|
||||
Role::Assistant
|
||||
},
|
||||
content: Some(MessageContent::Text(format!(
|
||||
"This is message number {i} with some padding text to make it realistic."
|
||||
))),
|
||||
name: None,
|
||||
tool_calls: None,
|
||||
tool_call_id: None,
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn make_routing_prefs() -> Vec<TopLevelRoutingPreference> {
|
||||
vec![
|
||||
TopLevelRoutingPreference {
|
||||
name: "code_generation".to_string(),
|
||||
description: "Code generation and debugging tasks".to_string(),
|
||||
models: vec![
|
||||
"openai/gpt-4o".to_string(),
|
||||
"openai/gpt-4o-mini".to_string(),
|
||||
],
|
||||
selection_policy: SelectionPolicy {
|
||||
prefer: SelectionPreference::None,
|
||||
},
|
||||
},
|
||||
TopLevelRoutingPreference {
|
||||
name: "summarization".to_string(),
|
||||
description: "Summarizing documents and text".to_string(),
|
||||
models: vec![
|
||||
"anthropic/claude-3-sonnet".to_string(),
|
||||
"openai/gpt-4o-mini".to_string(),
|
||||
],
|
||||
selection_policy: SelectionPolicy {
|
||||
prefer: SelectionPreference::None,
|
||||
},
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
/// Stress test: exercise the full routing code path N times using a mock
|
||||
/// HTTP server and measure jemalloc allocated bytes before/after.
|
||||
///
|
||||
/// This catches:
|
||||
/// - Memory leaks in generate_request / parse_response
|
||||
/// - Leaks in reqwest connection handling
|
||||
/// - String accumulation in the orchestrator model
|
||||
/// - Fragmentation (jemalloc allocated vs resident)
|
||||
#[tokio::test]
|
||||
async fn stress_test_routing_determine_route() {
|
||||
let mut server = mockito::Server::new_async().await;
|
||||
let router_url = format!("{}/v1/chat/completions", server.url());
|
||||
|
||||
let mock_response = serde_json::json!({
|
||||
"id": "chatcmpl-mock",
|
||||
"object": "chat.completion",
|
||||
"created": 1234567890,
|
||||
"model": "plano-orchestrator",
|
||||
"choices": [{
|
||||
"index": 0,
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "{\"route\": \"code_generation\"}"
|
||||
},
|
||||
"finish_reason": "stop"
|
||||
}],
|
||||
"usage": {"prompt_tokens": 100, "completion_tokens": 10, "total_tokens": 110}
|
||||
});
|
||||
|
||||
let _mock = server
|
||||
.mock("POST", "/v1/chat/completions")
|
||||
.with_status(200)
|
||||
.with_header("content-type", "application/json")
|
||||
.with_body(mock_response.to_string())
|
||||
.expect_at_least(1)
|
||||
.create_async()
|
||||
.await;
|
||||
|
||||
let prefs = make_routing_prefs();
|
||||
let session_cache = Arc::new(MemorySessionCache::new(1000));
|
||||
let orchestrator_service = Arc::new(OrchestratorService::with_routing(
|
||||
router_url,
|
||||
"Plano-Orchestrator".to_string(),
|
||||
"plano-orchestrator".to_string(),
|
||||
Some(prefs.clone()),
|
||||
None,
|
||||
None,
|
||||
session_cache,
|
||||
None,
|
||||
2048,
|
||||
));
|
||||
|
||||
// Warm up: a few requests to stabilize allocator state
|
||||
for _ in 0..10 {
|
||||
let msgs = make_messages(5);
|
||||
let _ = orchestrator_service
|
||||
.determine_route(&msgs, None, "warmup")
|
||||
.await;
|
||||
}
|
||||
|
||||
// Snapshot memory after warmup
|
||||
let baseline = get_allocated();
|
||||
|
||||
let num_iterations = 2000;
|
||||
|
||||
for i in 0..num_iterations {
|
||||
let msgs = make_messages(5 + (i % 10));
|
||||
let inline = if i % 3 == 0 {
|
||||
Some(make_routing_prefs())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let _ = orchestrator_service
|
||||
.determine_route(&msgs, inline, &format!("req-{i}"))
|
||||
.await;
|
||||
}
|
||||
|
||||
let after = get_allocated();
|
||||
|
||||
let growth = after.saturating_sub(baseline);
|
||||
let growth_mb = growth as f64 / (1024.0 * 1024.0);
|
||||
let per_request = if num_iterations > 0 {
|
||||
growth / num_iterations
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
eprintln!("=== Routing Stress Test Results ===");
|
||||
eprintln!(" Iterations: {num_iterations}");
|
||||
eprintln!(" Baseline alloc: {} bytes", baseline);
|
||||
eprintln!(" Final alloc: {} bytes", after);
|
||||
eprintln!(" Growth: {} bytes ({growth_mb:.2} MB)", growth);
|
||||
eprintln!(" Per-request: {} bytes", per_request);
|
||||
|
||||
// Allow up to 256 bytes per request of retained growth (connection pool, etc.)
|
||||
// A true leak would show thousands of bytes per request.
|
||||
assert!(
|
||||
per_request < 256,
|
||||
"Possible memory leak: {per_request} bytes/request retained after {num_iterations} iterations"
|
||||
);
|
||||
}
|
||||
|
||||
/// Stress test with high concurrency: many parallel determine_route calls.
|
||||
#[tokio::test]
|
||||
async fn stress_test_routing_concurrent() {
|
||||
let mut server = mockito::Server::new_async().await;
|
||||
let router_url = format!("{}/v1/chat/completions", server.url());
|
||||
|
||||
let mock_response = serde_json::json!({
|
||||
"id": "chatcmpl-mock",
|
||||
"object": "chat.completion",
|
||||
"created": 1234567890,
|
||||
"model": "plano-orchestrator",
|
||||
"choices": [{
|
||||
"index": 0,
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "{\"route\": \"summarization\"}"
|
||||
},
|
||||
"finish_reason": "stop"
|
||||
}],
|
||||
"usage": {"prompt_tokens": 100, "completion_tokens": 10, "total_tokens": 110}
|
||||
});
|
||||
|
||||
let _mock = server
|
||||
.mock("POST", "/v1/chat/completions")
|
||||
.with_status(200)
|
||||
.with_header("content-type", "application/json")
|
||||
.with_body(mock_response.to_string())
|
||||
.expect_at_least(1)
|
||||
.create_async()
|
||||
.await;
|
||||
|
||||
let prefs = make_routing_prefs();
|
||||
let session_cache = Arc::new(MemorySessionCache::new(1000));
|
||||
let orchestrator_service = Arc::new(OrchestratorService::with_routing(
|
||||
router_url,
|
||||
"Plano-Orchestrator".to_string(),
|
||||
"plano-orchestrator".to_string(),
|
||||
Some(prefs),
|
||||
None,
|
||||
None,
|
||||
session_cache,
|
||||
None,
|
||||
2048,
|
||||
));
|
||||
|
||||
// Warm up
|
||||
for _ in 0..20 {
|
||||
let msgs = make_messages(3);
|
||||
let _ = orchestrator_service
|
||||
.determine_route(&msgs, None, "warmup")
|
||||
.await;
|
||||
}
|
||||
|
||||
let baseline = get_allocated();
|
||||
|
||||
let concurrency = 50;
|
||||
let requests_per_task = 100;
|
||||
let total = concurrency * requests_per_task;
|
||||
|
||||
let mut handles = vec![];
|
||||
for t in 0..concurrency {
|
||||
let svc = Arc::clone(&orchestrator_service);
|
||||
let handle = tokio::spawn(async move {
|
||||
for r in 0..requests_per_task {
|
||||
let msgs = make_messages(3 + (r % 8));
|
||||
let _ = svc
|
||||
.determine_route(&msgs, None, &format!("req-{t}-{r}"))
|
||||
.await;
|
||||
}
|
||||
});
|
||||
handles.push(handle);
|
||||
}
|
||||
|
||||
for h in handles {
|
||||
h.await.unwrap();
|
||||
}
|
||||
|
||||
let after = get_allocated();
|
||||
let growth = after.saturating_sub(baseline);
|
||||
let per_request = growth / total;
|
||||
|
||||
eprintln!("=== Concurrent Routing Stress Test Results ===");
|
||||
eprintln!(" Tasks: {concurrency} x {requests_per_task} = {total}");
|
||||
eprintln!(" Baseline: {} bytes", baseline);
|
||||
eprintln!(" Final: {} bytes", after);
|
||||
eprintln!(
|
||||
" Growth: {} bytes ({:.2} MB)",
|
||||
growth,
|
||||
growth as f64 / 1_048_576.0
|
||||
);
|
||||
eprintln!(" Per-request: {} bytes", per_request);
|
||||
|
||||
assert!(
|
||||
per_request < 512,
|
||||
"Possible memory leak under concurrency: {per_request} bytes/request retained after {total} requests"
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(feature = "jemalloc")]
|
||||
fn get_allocated() -> usize {
|
||||
tikv_jemalloc_ctl::epoch::advance().unwrap();
|
||||
tikv_jemalloc_ctl::stats::allocated::read().unwrap_or(0)
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "jemalloc"))]
|
||||
fn get_allocated() -> usize {
|
||||
0
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load diff
347
crates/brightstaff/src/signals/environment/exhaustion.rs
Normal file
347
crates/brightstaff/src/signals/environment/exhaustion.rs
Normal file
|
|
@ -0,0 +1,347 @@
|
|||
//! Environment exhaustion detector. Direct port of
|
||||
//! `signals/environment/exhaustion.py`.
|
||||
|
||||
use std::sync::OnceLock;
|
||||
|
||||
use regex::Regex;
|
||||
use serde_json::json;
|
||||
|
||||
use crate::signals::analyzer::ShareGptMessage;
|
||||
use crate::signals::schemas::{SignalGroup, SignalInstance, SignalType};
|
||||
|
||||
pub const API_ERROR_PATTERNS: &[&str] = &[
|
||||
r"500\s*(internal\s+)?server\s+error",
|
||||
r"502\s*bad\s+gateway",
|
||||
r"503\s*service\s+unavailable",
|
||||
r"504\s*gateway\s+timeout",
|
||||
r"internal\s+server\s+error",
|
||||
r"service\s+unavailable",
|
||||
r"server\s+error",
|
||||
r"backend\s+error",
|
||||
r"upstream\s+error",
|
||||
r"service\s+temporarily\s+unavailable",
|
||||
r"maintenance\s+mode",
|
||||
r"under\s+maintenance",
|
||||
r"try\s+again\s+later",
|
||||
r"temporarily\s+unavailable",
|
||||
r"system\s+error",
|
||||
r"unexpected\s+error",
|
||||
r"unhandled\s+exception",
|
||||
];
|
||||
|
||||
pub const TIMEOUT_PATTERNS: &[&str] = &[
|
||||
r"timeout",
|
||||
r"timed?\s*out",
|
||||
r"etimedout",
|
||||
r"connection\s+timed?\s*out",
|
||||
r"read\s+timed?\s*out",
|
||||
r"request\s+timed?\s*out",
|
||||
r"gateway\s+timeout",
|
||||
r"deadline\s+exceeded",
|
||||
r"took\s+too\s+long",
|
||||
r"operation\s+timed?\s*out",
|
||||
r"socket\s+timeout",
|
||||
];
|
||||
|
||||
pub const RATE_LIMIT_PATTERNS: &[&str] = &[
|
||||
r"rate\s+limit",
|
||||
r"rate.limited",
|
||||
r"(status|error|http)\s*:?\s*429",
|
||||
r"429\s+(too\s+many|rate|limit)",
|
||||
r"too\s+many\s+requests?",
|
||||
r"quota\s+exceeded",
|
||||
r"quota\s+limit",
|
||||
r"throttl(ed|ing)",
|
||||
r"request\s+limit",
|
||||
r"api\s+limit",
|
||||
r"calls?\s+per\s+(second|minute|hour|day)",
|
||||
r"exceeded\s+.*\s+limit",
|
||||
r"slow\s+down",
|
||||
r"retry\s+after",
|
||||
r"requests?\s+exceeded",
|
||||
];
|
||||
|
||||
pub const NETWORK_PATTERNS: &[&str] = &[
|
||||
r"connection\s+refused",
|
||||
r"econnrefused",
|
||||
r"econnreset",
|
||||
r"connection\s+reset",
|
||||
r"enotfound",
|
||||
r"dns\s+(error|failure|lookup)",
|
||||
r"host\s+not\s+found",
|
||||
r"network\s+(error|failure|unreachable)",
|
||||
r"no\s+route\s+to\s+host",
|
||||
r"socket\s+error",
|
||||
r"connection\s+failed",
|
||||
r"unable\s+to\s+connect",
|
||||
r"cannot\s+connect",
|
||||
r"could\s+not\s+connect",
|
||||
r"connect\s+error",
|
||||
r"ssl\s+(error|handshake|certificate)",
|
||||
r"certificate\s+(error|invalid|expired)",
|
||||
];
|
||||
|
||||
pub const MALFORMED_PATTERNS: &[&str] = &[
|
||||
r"json\s+parse\s+error",
|
||||
r"invalid\s+json",
|
||||
r"unexpected\s+token",
|
||||
r"syntax\s+error.*json",
|
||||
r"malformed\s+(response|json|data)",
|
||||
r"unexpected\s+end\s+of",
|
||||
r"parse\s+error",
|
||||
r"parsing\s+failed",
|
||||
r"invalid\s+response",
|
||||
r"unexpected\s+response",
|
||||
r"response\s+format",
|
||||
r"missing\s+field.*response",
|
||||
r"unexpected\s+schema",
|
||||
r"schema\s+validation",
|
||||
r"deserialization\s+error",
|
||||
r"failed\s+to\s+decode",
|
||||
];
|
||||
|
||||
pub const CONTEXT_OVERFLOW_PATTERNS: &[&str] = &[
|
||||
r"context\s+(length|limit|overflow|exceeded)",
|
||||
r"token\s+(limit|overflow|exceeded)",
|
||||
r"max(imum)?\s+tokens?",
|
||||
r"input\s+too\s+(long|large)",
|
||||
r"exceeds?\s+(context|token|character|input)\s+limit",
|
||||
r"message\s+too\s+(long|large)",
|
||||
r"content\s+too\s+(long|large)",
|
||||
r"truncat(ed|ion)\s+(due\s+to|because|for)\s+(length|size|limit)",
|
||||
r"maximum\s+context",
|
||||
r"prompt\s+too\s+(long|large)",
|
||||
];
|
||||
|
||||
fn compile(patterns: &[&str]) -> Regex {
|
||||
let combined = patterns
|
||||
.iter()
|
||||
.map(|p| format!("({})", p))
|
||||
.collect::<Vec<_>>()
|
||||
.join("|");
|
||||
Regex::new(&format!("(?i){}", combined)).expect("exhaustion pattern regex must compile")
|
||||
}
|
||||
|
||||
fn api_error_re() -> &'static Regex {
|
||||
static R: OnceLock<Regex> = OnceLock::new();
|
||||
R.get_or_init(|| compile(API_ERROR_PATTERNS))
|
||||
}
|
||||
fn timeout_re() -> &'static Regex {
|
||||
static R: OnceLock<Regex> = OnceLock::new();
|
||||
R.get_or_init(|| compile(TIMEOUT_PATTERNS))
|
||||
}
|
||||
fn rate_limit_re() -> &'static Regex {
|
||||
static R: OnceLock<Regex> = OnceLock::new();
|
||||
R.get_or_init(|| compile(RATE_LIMIT_PATTERNS))
|
||||
}
|
||||
fn network_re() -> &'static Regex {
|
||||
static R: OnceLock<Regex> = OnceLock::new();
|
||||
R.get_or_init(|| compile(NETWORK_PATTERNS))
|
||||
}
|
||||
fn malformed_re() -> &'static Regex {
|
||||
static R: OnceLock<Regex> = OnceLock::new();
|
||||
R.get_or_init(|| compile(MALFORMED_PATTERNS))
|
||||
}
|
||||
fn context_overflow_re() -> &'static Regex {
|
||||
static R: OnceLock<Regex> = OnceLock::new();
|
||||
R.get_or_init(|| compile(CONTEXT_OVERFLOW_PATTERNS))
|
||||
}
|
||||
|
||||
fn snippet_around(text: &str, m: regex::Match<'_>, context: usize) -> String {
|
||||
let start = m.start().saturating_sub(context);
|
||||
let end = (m.end() + context).min(text.len());
|
||||
let start = align_char_boundary(text, start, false);
|
||||
let end = align_char_boundary(text, end, true);
|
||||
let mut snippet = String::new();
|
||||
if start > 0 {
|
||||
snippet.push_str("...");
|
||||
}
|
||||
snippet.push_str(&text[start..end]);
|
||||
if end < text.len() {
|
||||
snippet.push_str("...");
|
||||
}
|
||||
snippet
|
||||
}
|
||||
|
||||
fn align_char_boundary(s: &str, mut idx: usize, forward: bool) -> usize {
|
||||
if idx >= s.len() {
|
||||
return s.len();
|
||||
}
|
||||
while !s.is_char_boundary(idx) {
|
||||
if forward {
|
||||
idx += 1;
|
||||
} else if idx == 0 {
|
||||
break;
|
||||
} else {
|
||||
idx -= 1;
|
||||
}
|
||||
}
|
||||
idx
|
||||
}
|
||||
|
||||
pub fn analyze_exhaustion(messages: &[ShareGptMessage<'_>]) -> SignalGroup {
|
||||
let mut group = SignalGroup::new("exhaustion");
|
||||
|
||||
for (i, msg) in messages.iter().enumerate() {
|
||||
if msg.from != "observation" {
|
||||
continue;
|
||||
}
|
||||
let value = msg.value;
|
||||
let lower = value.to_lowercase();
|
||||
|
||||
if let Some(m) = rate_limit_re().find(&lower) {
|
||||
group.add_signal(emit(
|
||||
SignalType::EnvironmentExhaustionRateLimit,
|
||||
i,
|
||||
snippet_around(value, m, 50),
|
||||
0.95,
|
||||
"rate_limit",
|
||||
m.as_str(),
|
||||
));
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(m) = api_error_re().find(&lower) {
|
||||
group.add_signal(emit(
|
||||
SignalType::EnvironmentExhaustionApiError,
|
||||
i,
|
||||
snippet_around(value, m, 50),
|
||||
0.9,
|
||||
"api_error",
|
||||
m.as_str(),
|
||||
));
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(m) = timeout_re().find(&lower) {
|
||||
group.add_signal(emit(
|
||||
SignalType::EnvironmentExhaustionTimeout,
|
||||
i,
|
||||
snippet_around(value, m, 50),
|
||||
0.9,
|
||||
"timeout",
|
||||
m.as_str(),
|
||||
));
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(m) = network_re().find(&lower) {
|
||||
group.add_signal(emit(
|
||||
SignalType::EnvironmentExhaustionNetwork,
|
||||
i,
|
||||
snippet_around(value, m, 50),
|
||||
0.9,
|
||||
"network",
|
||||
m.as_str(),
|
||||
));
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(m) = malformed_re().find(&lower) {
|
||||
group.add_signal(emit(
|
||||
SignalType::EnvironmentExhaustionMalformed,
|
||||
i,
|
||||
snippet_around(value, m, 50),
|
||||
0.85,
|
||||
"malformed_response",
|
||||
m.as_str(),
|
||||
));
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(m) = context_overflow_re().find(&lower) {
|
||||
group.add_signal(emit(
|
||||
SignalType::EnvironmentExhaustionContextOverflow,
|
||||
i,
|
||||
snippet_around(value, m, 50),
|
||||
0.9,
|
||||
"context_overflow",
|
||||
m.as_str(),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
group
|
||||
}
|
||||
|
||||
fn emit(
|
||||
t: SignalType,
|
||||
idx: usize,
|
||||
snippet: String,
|
||||
confidence: f32,
|
||||
kind: &str,
|
||||
matched: &str,
|
||||
) -> SignalInstance {
|
||||
SignalInstance::new(t, idx, snippet)
|
||||
.with_confidence(confidence)
|
||||
.with_metadata(json!({
|
||||
"exhaustion_type": kind,
|
||||
"matched": matched,
|
||||
}))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn obs(value: &str) -> ShareGptMessage<'_> {
|
||||
ShareGptMessage {
|
||||
from: "observation",
|
||||
value,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_rate_limit() {
|
||||
let g = analyze_exhaustion(&[obs("HTTP 429: too many requests, retry after 30s")]);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::EnvironmentExhaustionRateLimit)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_api_error() {
|
||||
let g = analyze_exhaustion(&[obs("503 service unavailable - try again later")]);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::EnvironmentExhaustionApiError)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_timeout() {
|
||||
let g = analyze_exhaustion(&[obs("Connection timed out after 30 seconds")]);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::EnvironmentExhaustionTimeout)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_network_failure() {
|
||||
let g = analyze_exhaustion(&[obs("ECONNREFUSED: connection refused by remote host")]);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::EnvironmentExhaustionNetwork)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_malformed_response() {
|
||||
let g = analyze_exhaustion(&[obs("Invalid JSON: unexpected token at position 42")]);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::EnvironmentExhaustionMalformed)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_context_overflow() {
|
||||
let g = analyze_exhaustion(&[obs("Maximum context length exceeded for this model")]);
|
||||
assert!(g.signals.iter().any(|s| matches!(
|
||||
s.signal_type,
|
||||
SignalType::EnvironmentExhaustionContextOverflow
|
||||
)));
|
||||
}
|
||||
}
|
||||
3
crates/brightstaff/src/signals/environment/mod.rs
Normal file
3
crates/brightstaff/src/signals/environment/mod.rs
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
//! Environment signals: exhaustion (external system failures and constraints).
|
||||
|
||||
pub mod exhaustion;
|
||||
388
crates/brightstaff/src/signals/execution/failure.rs
Normal file
388
crates/brightstaff/src/signals/execution/failure.rs
Normal file
|
|
@ -0,0 +1,388 @@
|
|||
//! Execution failure detector. Direct port of `signals/execution/failure.py`.
|
||||
|
||||
use std::sync::OnceLock;
|
||||
|
||||
use regex::Regex;
|
||||
use serde_json::json;
|
||||
|
||||
use crate::signals::analyzer::ShareGptMessage;
|
||||
use crate::signals::schemas::{SignalGroup, SignalInstance, SignalType};
|
||||
|
||||
pub const INVALID_ARGS_PATTERNS: &[&str] = &[
|
||||
r"invalid\s+argument",
|
||||
r"invalid\s+parameter",
|
||||
r"invalid\s+type",
|
||||
r"type\s*error",
|
||||
r"expected\s+\w+\s*,?\s*got\s+\w+",
|
||||
r"required\s+field",
|
||||
r"required\s+parameter",
|
||||
r"missing\s+required",
|
||||
r"missing\s+argument",
|
||||
r"validation\s+failed",
|
||||
r"validation\s+error",
|
||||
r"invalid\s+value",
|
||||
r"invalid\s+format",
|
||||
r"must\s+be\s+(a|an)\s+\w+",
|
||||
r"cannot\s+be\s+(null|empty|none)",
|
||||
r"is\s+not\s+valid",
|
||||
r"does\s+not\s+match",
|
||||
r"out\s+of\s+range",
|
||||
r"invalid\s+date",
|
||||
r"invalid\s+json",
|
||||
r"malformed\s+request",
|
||||
];
|
||||
|
||||
pub const BAD_QUERY_PATTERNS: &[&str] = &[
|
||||
r"invalid\s+query",
|
||||
r"query\s+syntax\s+error",
|
||||
r"malformed\s+query",
|
||||
r"unknown\s+field",
|
||||
r"invalid\s+field",
|
||||
r"invalid\s+filter",
|
||||
r"invalid\s+search",
|
||||
r"unknown\s+id",
|
||||
r"invalid\s+id",
|
||||
r"id\s+format\s+error",
|
||||
r"invalid\s+identifier",
|
||||
r"query\s+failed",
|
||||
r"search\s+error",
|
||||
r"invalid\s+operator",
|
||||
r"unsupported\s+query",
|
||||
];
|
||||
|
||||
pub const TOOL_NOT_FOUND_PATTERNS: &[&str] = &[
|
||||
r"unknown\s+function",
|
||||
r"unknown\s+tool",
|
||||
r"function\s+not\s+found",
|
||||
r"tool\s+not\s+found",
|
||||
r"no\s+such\s+function",
|
||||
r"no\s+such\s+tool",
|
||||
r"undefined\s+function",
|
||||
r"action\s+not\s+supported",
|
||||
r"invalid\s+tool",
|
||||
r"invalid\s+function",
|
||||
r"unrecognized\s+function",
|
||||
];
|
||||
|
||||
pub const AUTH_MISUSE_PATTERNS: &[&str] = &[
|
||||
r"\bunauthorized\b",
|
||||
r"(status|error|http|code)\s*:?\s*401",
|
||||
r"401\s+unauthorized",
|
||||
r"403\s+forbidden",
|
||||
r"permission\s+denied",
|
||||
r"access\s+denied",
|
||||
r"authentication\s+required",
|
||||
r"invalid\s+credentials",
|
||||
r"invalid\s+token",
|
||||
r"token\s+expired",
|
||||
r"missing\s+authorization",
|
||||
r"\bforbidden\b",
|
||||
r"not\s+authorized",
|
||||
r"insufficient\s+permissions?",
|
||||
];
|
||||
|
||||
pub const STATE_ERROR_PATTERNS: &[&str] = &[
|
||||
r"invalid\s+state",
|
||||
r"illegal\s+state",
|
||||
r"must\s+call\s+\w+\s+first",
|
||||
r"must\s+\w+\s+before",
|
||||
r"cannot\s+\w+\s+before",
|
||||
r"already\s+(exists?|created|started|finished)",
|
||||
r"not\s+initialized",
|
||||
r"not\s+started",
|
||||
r"already\s+in\s+progress",
|
||||
r"operation\s+in\s+progress",
|
||||
r"sequence\s+error",
|
||||
r"precondition\s+failed",
|
||||
r"(status|error|http)\s*:?\s*409",
|
||||
r"409\s+conflict",
|
||||
r"\bconflict\b",
|
||||
];
|
||||
|
||||
fn compile(patterns: &[&str]) -> Regex {
|
||||
// Use `(?i)` flag for case-insensitive matching, matching Python's `re.IGNORECASE`.
|
||||
let combined = patterns
|
||||
.iter()
|
||||
.map(|p| format!("({})", p))
|
||||
.collect::<Vec<_>>()
|
||||
.join("|");
|
||||
Regex::new(&format!("(?i){}", combined)).expect("failure pattern regex must compile")
|
||||
}
|
||||
|
||||
fn invalid_args_re() -> &'static Regex {
|
||||
static R: OnceLock<Regex> = OnceLock::new();
|
||||
R.get_or_init(|| compile(INVALID_ARGS_PATTERNS))
|
||||
}
|
||||
fn bad_query_re() -> &'static Regex {
|
||||
static R: OnceLock<Regex> = OnceLock::new();
|
||||
R.get_or_init(|| compile(BAD_QUERY_PATTERNS))
|
||||
}
|
||||
fn tool_not_found_re() -> &'static Regex {
|
||||
static R: OnceLock<Regex> = OnceLock::new();
|
||||
R.get_or_init(|| compile(TOOL_NOT_FOUND_PATTERNS))
|
||||
}
|
||||
fn auth_misuse_re() -> &'static Regex {
|
||||
static R: OnceLock<Regex> = OnceLock::new();
|
||||
R.get_or_init(|| compile(AUTH_MISUSE_PATTERNS))
|
||||
}
|
||||
fn state_error_re() -> &'static Regex {
|
||||
static R: OnceLock<Regex> = OnceLock::new();
|
||||
R.get_or_init(|| compile(STATE_ERROR_PATTERNS))
|
||||
}
|
||||
|
||||
/// Pull tool name + args from a `function_call` message. Mirrors
|
||||
/// `_extract_tool_info` in the reference.
|
||||
pub(crate) fn extract_tool_info(value: &str) -> (String, String) {
|
||||
if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(value) {
|
||||
if let Some(obj) = parsed.as_object() {
|
||||
let name = obj
|
||||
.get("name")
|
||||
.or_else(|| obj.get("function"))
|
||||
.and_then(|v| v.as_str())
|
||||
.map(|s| s.to_string())
|
||||
.unwrap_or_else(|| "unknown".to_string());
|
||||
let args = match obj.get("arguments").or_else(|| obj.get("args")) {
|
||||
Some(serde_json::Value::Object(o)) => {
|
||||
serde_json::to_string(&serde_json::Value::Object(o.clone())).unwrap_or_default()
|
||||
}
|
||||
Some(other) => other
|
||||
.as_str()
|
||||
.map(|s| s.to_string())
|
||||
.unwrap_or_else(|| serde_json::to_string(other).unwrap_or_default()),
|
||||
None => String::new(),
|
||||
};
|
||||
return (name, args);
|
||||
}
|
||||
}
|
||||
let mut snippet: String = value.chars().take(200).collect();
|
||||
snippet.shrink_to_fit();
|
||||
("unknown".to_string(), snippet)
|
||||
}
|
||||
|
||||
/// Build a context-window snippet around a regex match, with leading/trailing
|
||||
/// ellipses when truncated. Mirrors `_get_snippet`.
|
||||
fn snippet_around(text: &str, m: regex::Match<'_>, context: usize) -> String {
|
||||
let start = m.start().saturating_sub(context);
|
||||
let end = (m.end() + context).min(text.len());
|
||||
// Ensure we cut on UTF-8 boundaries.
|
||||
let start = align_char_boundary(text, start, false);
|
||||
let end = align_char_boundary(text, end, true);
|
||||
let mut snippet = String::new();
|
||||
if start > 0 {
|
||||
snippet.push_str("...");
|
||||
}
|
||||
snippet.push_str(&text[start..end]);
|
||||
if end < text.len() {
|
||||
snippet.push_str("...");
|
||||
}
|
||||
snippet
|
||||
}
|
||||
|
||||
fn align_char_boundary(s: &str, mut idx: usize, forward: bool) -> usize {
|
||||
if idx >= s.len() {
|
||||
return s.len();
|
||||
}
|
||||
while !s.is_char_boundary(idx) {
|
||||
if forward {
|
||||
idx += 1;
|
||||
} else if idx == 0 {
|
||||
break;
|
||||
} else {
|
||||
idx -= 1;
|
||||
}
|
||||
}
|
||||
idx
|
||||
}
|
||||
|
||||
pub fn analyze_failure(messages: &[ShareGptMessage<'_>]) -> SignalGroup {
|
||||
let mut group = SignalGroup::new("failure");
|
||||
let mut last_call: Option<(usize, String, String)> = None;
|
||||
|
||||
for (i, msg) in messages.iter().enumerate() {
|
||||
match msg.from {
|
||||
"function_call" => {
|
||||
let (name, args) = extract_tool_info(msg.value);
|
||||
last_call = Some((i, name, args));
|
||||
continue;
|
||||
}
|
||||
"observation" => {}
|
||||
_ => continue,
|
||||
}
|
||||
|
||||
let value = msg.value;
|
||||
let lower = value.to_lowercase();
|
||||
let (call_index, tool_name) = match &last_call {
|
||||
Some((idx, name, _)) => (*idx, name.clone()),
|
||||
None => (i.saturating_sub(1), "unknown".to_string()),
|
||||
};
|
||||
|
||||
if let Some(m) = invalid_args_re().find(&lower) {
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::ExecutionFailureInvalidArgs,
|
||||
i,
|
||||
snippet_around(value, m, 50),
|
||||
)
|
||||
.with_confidence(0.9)
|
||||
.with_metadata(json!({
|
||||
"tool_name": tool_name,
|
||||
"call_index": call_index,
|
||||
"error_type": "invalid_args",
|
||||
"matched": m.as_str(),
|
||||
})),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(m) = tool_not_found_re().find(&lower) {
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::ExecutionFailureToolNotFound,
|
||||
i,
|
||||
snippet_around(value, m, 50),
|
||||
)
|
||||
.with_confidence(0.95)
|
||||
.with_metadata(json!({
|
||||
"tool_name": tool_name,
|
||||
"call_index": call_index,
|
||||
"error_type": "tool_not_found",
|
||||
"matched": m.as_str(),
|
||||
})),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(m) = auth_misuse_re().find(&lower) {
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::ExecutionFailureAuthMisuse,
|
||||
i,
|
||||
snippet_around(value, m, 50),
|
||||
)
|
||||
.with_confidence(0.8)
|
||||
.with_metadata(json!({
|
||||
"tool_name": tool_name,
|
||||
"call_index": call_index,
|
||||
"error_type": "auth_misuse",
|
||||
"matched": m.as_str(),
|
||||
})),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(m) = state_error_re().find(&lower) {
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::ExecutionFailureStateError,
|
||||
i,
|
||||
snippet_around(value, m, 50),
|
||||
)
|
||||
.with_confidence(0.85)
|
||||
.with_metadata(json!({
|
||||
"tool_name": tool_name,
|
||||
"call_index": call_index,
|
||||
"error_type": "state_error",
|
||||
"matched": m.as_str(),
|
||||
})),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(m) = bad_query_re().find(&lower) {
|
||||
let confidence = if ["error", "invalid", "failed"]
|
||||
.iter()
|
||||
.any(|w| lower.contains(w))
|
||||
{
|
||||
0.8
|
||||
} else {
|
||||
0.6
|
||||
};
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::ExecutionFailureBadQuery,
|
||||
i,
|
||||
snippet_around(value, m, 50),
|
||||
)
|
||||
.with_confidence(confidence)
|
||||
.with_metadata(json!({
|
||||
"tool_name": tool_name,
|
||||
"call_index": call_index,
|
||||
"error_type": "bad_query",
|
||||
"matched": m.as_str(),
|
||||
})),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
group
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn fc(value: &str) -> ShareGptMessage<'_> {
|
||||
ShareGptMessage {
|
||||
from: "function_call",
|
||||
value,
|
||||
}
|
||||
}
|
||||
fn obs(value: &str) -> ShareGptMessage<'_> {
|
||||
ShareGptMessage {
|
||||
from: "observation",
|
||||
value,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_invalid_args() {
|
||||
let msgs = vec![
|
||||
fc(r#"{"name":"create_user","arguments":{"age":"twelve"}}"#),
|
||||
obs("Error: validation failed - expected integer got string for field age"),
|
||||
];
|
||||
let g = analyze_failure(&msgs);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::ExecutionFailureInvalidArgs)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_tool_not_found() {
|
||||
let msgs = vec![
|
||||
fc(r#"{"name":"send_thought","arguments":{}}"#),
|
||||
obs("Error: unknown function 'send_thought'"),
|
||||
];
|
||||
let g = analyze_failure(&msgs);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::ExecutionFailureToolNotFound)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_auth_misuse() {
|
||||
let msgs = vec![
|
||||
fc(r#"{"name":"get_secret","arguments":{}}"#),
|
||||
obs("HTTP 401 Unauthorized"),
|
||||
];
|
||||
let g = analyze_failure(&msgs);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::ExecutionFailureAuthMisuse)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_state_error() {
|
||||
let msgs = vec![
|
||||
fc(r#"{"name":"commit_tx","arguments":{}}"#),
|
||||
obs("must call begin_tx first"),
|
||||
];
|
||||
let g = analyze_failure(&msgs);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::ExecutionFailureStateError)));
|
||||
}
|
||||
}
|
||||
433
crates/brightstaff/src/signals/execution/loops.rs
Normal file
433
crates/brightstaff/src/signals/execution/loops.rs
Normal file
|
|
@ -0,0 +1,433 @@
|
|||
//! Execution loops detector. Direct port of `signals/execution/loops.py`.
|
||||
|
||||
use serde_json::json;
|
||||
|
||||
use crate::signals::analyzer::ShareGptMessage;
|
||||
use crate::signals::schemas::{SignalGroup, SignalInstance, SignalType};
|
||||
|
||||
pub const RETRY_THRESHOLD: usize = 3;
|
||||
pub const PARAMETER_DRIFT_THRESHOLD: usize = 3;
|
||||
pub const OSCILLATION_CYCLES_THRESHOLD: usize = 3;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ToolCall {
|
||||
pub index: usize,
|
||||
pub name: String,
|
||||
/// Canonical JSON string of arguments (sorted keys when parseable).
|
||||
pub args: String,
|
||||
pub args_dict: Option<serde_json::Map<String, serde_json::Value>>,
|
||||
}
|
||||
|
||||
impl ToolCall {
|
||||
pub fn args_equal(&self, other: &ToolCall) -> bool {
|
||||
match (&self.args_dict, &other.args_dict) {
|
||||
(Some(a), Some(b)) => a == b,
|
||||
_ => self.args == other.args,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_tool_call(index: usize, msg: &ShareGptMessage<'_>) -> Option<ToolCall> {
|
||||
if msg.from != "function_call" {
|
||||
return None;
|
||||
}
|
||||
let value = msg.value;
|
||||
|
||||
if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(value) {
|
||||
if let Some(obj) = parsed.as_object() {
|
||||
let name = obj
|
||||
.get("name")
|
||||
.or_else(|| obj.get("function"))
|
||||
.and_then(|v| v.as_str())
|
||||
.map(|s| s.to_string())
|
||||
.unwrap_or_else(|| "unknown".to_string());
|
||||
let raw_args = obj.get("arguments").or_else(|| obj.get("args"));
|
||||
let (args_str, args_dict) = match raw_args {
|
||||
Some(serde_json::Value::Object(o)) => {
|
||||
let mut keys: Vec<&String> = o.keys().collect();
|
||||
keys.sort();
|
||||
let mut canon = serde_json::Map::new();
|
||||
for k in keys {
|
||||
canon.insert(k.clone(), o[k].clone());
|
||||
}
|
||||
(
|
||||
serde_json::to_string(&serde_json::Value::Object(canon.clone()))
|
||||
.unwrap_or_default(),
|
||||
Some(canon),
|
||||
)
|
||||
}
|
||||
Some(other) => (
|
||||
other
|
||||
.as_str()
|
||||
.map(|s| s.to_string())
|
||||
.unwrap_or_else(|| serde_json::to_string(other).unwrap_or_default()),
|
||||
None,
|
||||
),
|
||||
None => (String::new(), None),
|
||||
};
|
||||
return Some(ToolCall {
|
||||
index,
|
||||
name,
|
||||
args: args_str,
|
||||
args_dict,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(paren) = value.find('(') {
|
||||
if paren > 0 {
|
||||
let name = value[..paren].trim().to_string();
|
||||
let args_part = &value[paren..];
|
||||
if args_part.starts_with('(') && args_part.ends_with(')') {
|
||||
let inner = args_part[1..args_part.len() - 1].trim();
|
||||
if let Ok(serde_json::Value::Object(o)) =
|
||||
serde_json::from_str::<serde_json::Value>(inner)
|
||||
{
|
||||
let mut keys: Vec<&String> = o.keys().collect();
|
||||
keys.sort();
|
||||
let mut canon = serde_json::Map::new();
|
||||
for k in keys {
|
||||
canon.insert(k.clone(), o[k].clone());
|
||||
}
|
||||
return Some(ToolCall {
|
||||
index,
|
||||
name,
|
||||
args: serde_json::to_string(&serde_json::Value::Object(canon.clone()))
|
||||
.unwrap_or_default(),
|
||||
args_dict: Some(canon),
|
||||
});
|
||||
}
|
||||
return Some(ToolCall {
|
||||
index,
|
||||
name,
|
||||
args: inner.to_string(),
|
||||
args_dict: None,
|
||||
});
|
||||
}
|
||||
return Some(ToolCall {
|
||||
index,
|
||||
name,
|
||||
args: args_part.to_string(),
|
||||
args_dict: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Some(ToolCall {
|
||||
index,
|
||||
name: value.trim().to_string(),
|
||||
args: String::new(),
|
||||
args_dict: None,
|
||||
})
|
||||
}
|
||||
|
||||
fn extract_tool_calls(messages: &[ShareGptMessage<'_>]) -> Vec<ToolCall> {
|
||||
let mut out = Vec::new();
|
||||
for (i, msg) in messages.iter().enumerate() {
|
||||
if let Some(c) = parse_tool_call(i, msg) {
|
||||
out.push(c);
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn detect_retry(calls: &[ToolCall]) -> Vec<(usize, usize, String)> {
|
||||
if calls.len() < RETRY_THRESHOLD {
|
||||
return Vec::new();
|
||||
}
|
||||
let mut patterns = Vec::new();
|
||||
let mut i = 0;
|
||||
while i < calls.len() {
|
||||
let current = &calls[i];
|
||||
let mut j = i + 1;
|
||||
let mut run_length = 1;
|
||||
while j < calls.len() {
|
||||
if calls[j].name == current.name && calls[j].args_equal(current) {
|
||||
run_length += 1;
|
||||
j += 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if run_length >= RETRY_THRESHOLD {
|
||||
patterns.push((calls[i].index, calls[j - 1].index, current.name.clone()));
|
||||
i = j;
|
||||
} else {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
patterns
|
||||
}
|
||||
|
||||
fn detect_parameter_drift(calls: &[ToolCall]) -> Vec<(usize, usize, String, usize)> {
|
||||
if calls.len() < PARAMETER_DRIFT_THRESHOLD {
|
||||
return Vec::new();
|
||||
}
|
||||
let mut patterns = Vec::new();
|
||||
let mut i = 0;
|
||||
while i < calls.len() {
|
||||
let current_name = calls[i].name.clone();
|
||||
let mut seen_args: Vec<String> = vec![calls[i].args.clone()];
|
||||
let mut unique_args = 1;
|
||||
let mut j = i + 1;
|
||||
while j < calls.len() {
|
||||
if calls[j].name != current_name {
|
||||
break;
|
||||
}
|
||||
if !seen_args.iter().any(|a| a == &calls[j].args) {
|
||||
seen_args.push(calls[j].args.clone());
|
||||
unique_args += 1;
|
||||
}
|
||||
j += 1;
|
||||
}
|
||||
let run_length = j - i;
|
||||
if run_length >= PARAMETER_DRIFT_THRESHOLD && unique_args >= 2 {
|
||||
patterns.push((
|
||||
calls[i].index,
|
||||
calls[j - 1].index,
|
||||
current_name,
|
||||
unique_args,
|
||||
));
|
||||
i = j;
|
||||
} else {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
patterns
|
||||
}
|
||||
|
||||
fn detect_oscillation(calls: &[ToolCall]) -> Vec<(usize, usize, Vec<String>, usize)> {
|
||||
let min_calls = 2 * OSCILLATION_CYCLES_THRESHOLD;
|
||||
if calls.len() < min_calls {
|
||||
return Vec::new();
|
||||
}
|
||||
let mut patterns = Vec::new();
|
||||
let mut i: usize = 0;
|
||||
while i + min_calls <= calls.len() {
|
||||
let max_pat_len = (5usize).min(calls.len() - i);
|
||||
let mut found_for_i = false;
|
||||
for pat_len in 2..=max_pat_len {
|
||||
let pattern_names: Vec<String> =
|
||||
(0..pat_len).map(|k| calls[i + k].name.clone()).collect();
|
||||
let unique: std::collections::HashSet<&String> = pattern_names.iter().collect();
|
||||
if unique.len() < 2 {
|
||||
continue;
|
||||
}
|
||||
let mut cycles = 1;
|
||||
let mut pos = i + pat_len;
|
||||
while pos + pat_len <= calls.len() {
|
||||
let mut all_match = true;
|
||||
for k in 0..pat_len {
|
||||
if calls[pos + k].name != pattern_names[k] {
|
||||
all_match = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if all_match {
|
||||
cycles += 1;
|
||||
pos += pat_len;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if cycles >= OSCILLATION_CYCLES_THRESHOLD {
|
||||
let end_idx_in_calls = i + (cycles * pat_len) - 1;
|
||||
patterns.push((
|
||||
calls[i].index,
|
||||
calls[end_idx_in_calls].index,
|
||||
pattern_names,
|
||||
cycles,
|
||||
));
|
||||
// Mirror Python: `i = end_idx + 1 - pattern_len`. We set `i` so that
|
||||
// the next outer iteration begins after we account for overlap.
|
||||
i = end_idx_in_calls + 1 - pat_len;
|
||||
found_for_i = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if !found_for_i {
|
||||
i += 1;
|
||||
} else {
|
||||
// Match Python's `i = end_idx + 1 - pattern_len; break` then loop.
|
||||
// We'll continue; the outer while re-checks i.
|
||||
}
|
||||
}
|
||||
if patterns.len() > 1 {
|
||||
patterns = deduplicate_patterns(patterns);
|
||||
}
|
||||
patterns
|
||||
}
|
||||
|
||||
fn deduplicate_patterns(
|
||||
mut patterns: Vec<(usize, usize, Vec<String>, usize)>,
|
||||
) -> Vec<(usize, usize, Vec<String>, usize)> {
|
||||
if patterns.is_empty() {
|
||||
return patterns;
|
||||
}
|
||||
patterns.sort_by(|a, b| {
|
||||
let ord = a.0.cmp(&b.0);
|
||||
if ord != std::cmp::Ordering::Equal {
|
||||
ord
|
||||
} else {
|
||||
(b.1 - b.0).cmp(&(a.1 - a.0))
|
||||
}
|
||||
});
|
||||
let mut result = Vec::new();
|
||||
let mut last_end: i64 = -1;
|
||||
for p in patterns {
|
||||
if (p.0 as i64) > last_end {
|
||||
last_end = p.1 as i64;
|
||||
result.push(p);
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
pub fn analyze_loops(messages: &[ShareGptMessage<'_>]) -> SignalGroup {
|
||||
let mut group = SignalGroup::new("loops");
|
||||
let calls = extract_tool_calls(messages);
|
||||
if calls.len() < RETRY_THRESHOLD {
|
||||
return group;
|
||||
}
|
||||
|
||||
let retries = detect_retry(&calls);
|
||||
for (start_idx, end_idx, tool_name) in &retries {
|
||||
let call_count = calls
|
||||
.iter()
|
||||
.filter(|c| *start_idx <= c.index && c.index <= *end_idx)
|
||||
.count();
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::ExecutionLoopsRetry,
|
||||
*start_idx,
|
||||
format!(
|
||||
"Tool '{}' called {} times with identical arguments",
|
||||
tool_name, call_count
|
||||
),
|
||||
)
|
||||
.with_confidence(0.95)
|
||||
.with_metadata(json!({
|
||||
"tool_name": tool_name,
|
||||
"start_index": start_idx,
|
||||
"end_index": end_idx,
|
||||
"call_count": call_count,
|
||||
"loop_type": "retry",
|
||||
})),
|
||||
);
|
||||
}
|
||||
|
||||
let drifts = detect_parameter_drift(&calls);
|
||||
for (start_idx, end_idx, tool_name, variation_count) in &drifts {
|
||||
let overlaps_retry = retries
|
||||
.iter()
|
||||
.any(|r| !(*end_idx < r.0 || *start_idx > r.1));
|
||||
if overlaps_retry {
|
||||
continue;
|
||||
}
|
||||
let call_count = calls
|
||||
.iter()
|
||||
.filter(|c| *start_idx <= c.index && c.index <= *end_idx)
|
||||
.count();
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::ExecutionLoopsParameterDrift,
|
||||
*start_idx,
|
||||
format!(
|
||||
"Tool '{}' called {} times with {} different argument variations",
|
||||
tool_name, call_count, variation_count
|
||||
),
|
||||
)
|
||||
.with_confidence(0.85)
|
||||
.with_metadata(json!({
|
||||
"tool_name": tool_name,
|
||||
"start_index": start_idx,
|
||||
"end_index": end_idx,
|
||||
"call_count": call_count,
|
||||
"variation_count": variation_count,
|
||||
"loop_type": "parameter_drift",
|
||||
})),
|
||||
);
|
||||
}
|
||||
|
||||
let oscillations = detect_oscillation(&calls);
|
||||
for (start_idx, end_idx, tool_names, cycle_count) in &oscillations {
|
||||
let pattern_str = tool_names.join(" \u{2192} ");
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::ExecutionLoopsOscillation,
|
||||
*start_idx,
|
||||
format!(
|
||||
"Oscillation pattern [{}] repeated {} times",
|
||||
pattern_str, cycle_count
|
||||
),
|
||||
)
|
||||
.with_confidence(0.9)
|
||||
.with_metadata(json!({
|
||||
"pattern": tool_names,
|
||||
"start_index": start_idx,
|
||||
"end_index": end_idx,
|
||||
"cycle_count": cycle_count,
|
||||
"loop_type": "oscillation",
|
||||
})),
|
||||
);
|
||||
}
|
||||
|
||||
group
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn fc(value: &str) -> ShareGptMessage<'_> {
|
||||
ShareGptMessage {
|
||||
from: "function_call",
|
||||
value,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_retry_loop() {
|
||||
let arg = r#"{"name":"check_status","arguments":{"id":"abc"}}"#;
|
||||
let msgs = vec![fc(arg), fc(arg), fc(arg), fc(arg)];
|
||||
let g = analyze_loops(&msgs);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::ExecutionLoopsRetry)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_parameter_drift() {
|
||||
let msgs = vec![
|
||||
fc(r#"{"name":"search","arguments":{"q":"a"}}"#),
|
||||
fc(r#"{"name":"search","arguments":{"q":"ab"}}"#),
|
||||
fc(r#"{"name":"search","arguments":{"q":"abc"}}"#),
|
||||
fc(r#"{"name":"search","arguments":{"q":"abcd"}}"#),
|
||||
];
|
||||
let g = analyze_loops(&msgs);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::ExecutionLoopsParameterDrift)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_oscillation() {
|
||||
let a = r#"{"name":"toolA","arguments":{}}"#;
|
||||
let b = r#"{"name":"toolB","arguments":{}}"#;
|
||||
let msgs = vec![fc(a), fc(b), fc(a), fc(b), fc(a), fc(b)];
|
||||
let g = analyze_loops(&msgs);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::ExecutionLoopsOscillation)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_signals_when_few_calls() {
|
||||
let msgs = vec![fc(r#"{"name":"only_once","arguments":{}}"#)];
|
||||
let g = analyze_loops(&msgs);
|
||||
assert!(g.signals.is_empty());
|
||||
}
|
||||
}
|
||||
5
crates/brightstaff/src/signals/execution/mod.rs
Normal file
5
crates/brightstaff/src/signals/execution/mod.rs
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
//! Execution signals: failure (agent-caused tool errors) and loops
|
||||
//! (repetitive tool-call behavior).
|
||||
|
||||
pub mod failure;
|
||||
pub mod loops;
|
||||
193
crates/brightstaff/src/signals/interaction/constants.rs
Normal file
193
crates/brightstaff/src/signals/interaction/constants.rs
Normal file
|
|
@ -0,0 +1,193 @@
|
|||
//! Shared constants for the interaction layer detectors.
|
||||
//!
|
||||
//! Direct port of `signals/interaction/constants.py`.
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::sync::OnceLock;
|
||||
|
||||
pub const POSITIVE_PREFIXES: &[&str] = &[
|
||||
"yes",
|
||||
"yeah",
|
||||
"yep",
|
||||
"yup",
|
||||
"sure",
|
||||
"ok",
|
||||
"okay",
|
||||
"great",
|
||||
"awesome",
|
||||
"perfect",
|
||||
"thanks",
|
||||
"thank",
|
||||
"wonderful",
|
||||
"excellent",
|
||||
"amazing",
|
||||
"nice",
|
||||
"good",
|
||||
"cool",
|
||||
"absolutely",
|
||||
"definitely",
|
||||
"please",
|
||||
];
|
||||
|
||||
pub const CONFIRMATION_PREFIXES: &[&str] = &[
|
||||
"yes",
|
||||
"yeah",
|
||||
"yep",
|
||||
"yup",
|
||||
"correct",
|
||||
"right",
|
||||
"that's correct",
|
||||
"thats correct",
|
||||
"that's right",
|
||||
"thats right",
|
||||
"that is correct",
|
||||
"that is right",
|
||||
];
|
||||
|
||||
const STOPWORD_LIST: &[&str] = &[
|
||||
"a",
|
||||
"about",
|
||||
"above",
|
||||
"after",
|
||||
"again",
|
||||
"against",
|
||||
"all",
|
||||
"am",
|
||||
"an",
|
||||
"and",
|
||||
"any",
|
||||
"are",
|
||||
"as",
|
||||
"at",
|
||||
"be",
|
||||
"because",
|
||||
"been",
|
||||
"before",
|
||||
"being",
|
||||
"below",
|
||||
"between",
|
||||
"both",
|
||||
"but",
|
||||
"by",
|
||||
"can",
|
||||
"could",
|
||||
"did",
|
||||
"do",
|
||||
"does",
|
||||
"doing",
|
||||
"down",
|
||||
"during",
|
||||
"each",
|
||||
"few",
|
||||
"for",
|
||||
"from",
|
||||
"further",
|
||||
"had",
|
||||
"has",
|
||||
"have",
|
||||
"having",
|
||||
"he",
|
||||
"her",
|
||||
"here",
|
||||
"hers",
|
||||
"herself",
|
||||
"him",
|
||||
"himself",
|
||||
"his",
|
||||
"how",
|
||||
"i",
|
||||
"if",
|
||||
"in",
|
||||
"into",
|
||||
"is",
|
||||
"it",
|
||||
"its",
|
||||
"itself",
|
||||
"just",
|
||||
"me",
|
||||
"more",
|
||||
"most",
|
||||
"my",
|
||||
"myself",
|
||||
"no",
|
||||
"nor",
|
||||
"not",
|
||||
"now",
|
||||
"of",
|
||||
"off",
|
||||
"on",
|
||||
"once",
|
||||
"only",
|
||||
"or",
|
||||
"other",
|
||||
"our",
|
||||
"ours",
|
||||
"ourselves",
|
||||
"out",
|
||||
"over",
|
||||
"own",
|
||||
"same",
|
||||
"she",
|
||||
"should",
|
||||
"so",
|
||||
"some",
|
||||
"such",
|
||||
"than",
|
||||
"that",
|
||||
"the",
|
||||
"their",
|
||||
"theirs",
|
||||
"them",
|
||||
"themselves",
|
||||
"then",
|
||||
"there",
|
||||
"these",
|
||||
"they",
|
||||
"this",
|
||||
"those",
|
||||
"through",
|
||||
"to",
|
||||
"too",
|
||||
"under",
|
||||
"until",
|
||||
"up",
|
||||
"very",
|
||||
"was",
|
||||
"we",
|
||||
"were",
|
||||
"what",
|
||||
"when",
|
||||
"where",
|
||||
"which",
|
||||
"while",
|
||||
"who",
|
||||
"whom",
|
||||
"why",
|
||||
"with",
|
||||
"would",
|
||||
"you",
|
||||
"your",
|
||||
"yours",
|
||||
"yourself",
|
||||
"yourselves",
|
||||
];
|
||||
|
||||
pub fn stopwords() -> &'static HashSet<&'static str> {
|
||||
static SET: OnceLock<HashSet<&'static str>> = OnceLock::new();
|
||||
SET.get_or_init(|| STOPWORD_LIST.iter().copied().collect())
|
||||
}
|
||||
|
||||
/// Returns true if `text` (case-insensitive, trimmed) starts with any of the
|
||||
/// given prefixes treated as **whole tokens or token sequences**. This matches
|
||||
/// the Python's `text_lower.startswith(prefix)` plus the natural intent that
|
||||
/// `"please"` shouldn't fire on `"pleased"`.
|
||||
pub fn starts_with_prefix(text: &str, prefixes: &[&str]) -> bool {
|
||||
let lowered = text.to_lowercase();
|
||||
let trimmed = lowered.trim_start();
|
||||
for prefix in prefixes {
|
||||
if trimmed.starts_with(prefix) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
445
crates/brightstaff/src/signals/interaction/disengagement.rs
Normal file
445
crates/brightstaff/src/signals/interaction/disengagement.rs
Normal file
|
|
@ -0,0 +1,445 @@
|
|||
//! Disengagement signals: escalation, quit, negative stance.
|
||||
//!
|
||||
//! Direct port of `signals/interaction/disengagement.py`.
|
||||
|
||||
use std::sync::OnceLock;
|
||||
|
||||
use regex::Regex;
|
||||
use serde_json::json;
|
||||
|
||||
use super::constants::{starts_with_prefix, POSITIVE_PREFIXES};
|
||||
use crate::signals::schemas::{SignalGroup, SignalInstance, SignalType};
|
||||
use crate::signals::text_processing::{normalize_patterns, NormalizedMessage, NormalizedPattern};
|
||||
|
||||
const ESCALATION_PATTERN_TEXTS: &[&str] = &[
|
||||
// Human requests
|
||||
"speak to a human",
|
||||
"talk to a human",
|
||||
"connect me to a human",
|
||||
"connect me with a human",
|
||||
"transfer me to a human",
|
||||
"get me a human",
|
||||
"chat with a human",
|
||||
// Person requests
|
||||
"speak to a person",
|
||||
"talk to a person",
|
||||
"connect me to a person",
|
||||
"connect me with a person",
|
||||
"transfer me to a person",
|
||||
"get me a person",
|
||||
"chat with a person",
|
||||
// Real person requests
|
||||
"speak to a real person",
|
||||
"talk to a real person",
|
||||
"connect me to a real person",
|
||||
"connect me with a real person",
|
||||
"transfer me to a real person",
|
||||
"get me a real person",
|
||||
"chat with a real person",
|
||||
// Actual person requests
|
||||
"speak to an actual person",
|
||||
"talk to an actual person",
|
||||
"connect me to an actual person",
|
||||
"connect me with an actual person",
|
||||
"transfer me to an actual person",
|
||||
"get me an actual person",
|
||||
"chat with an actual person",
|
||||
// Supervisor requests
|
||||
"speak to a supervisor",
|
||||
"talk to a supervisor",
|
||||
"connect me to a supervisor",
|
||||
"connect me with a supervisor",
|
||||
"transfer me to a supervisor",
|
||||
"get me a supervisor",
|
||||
"chat with a supervisor",
|
||||
// Manager requests
|
||||
"speak to a manager",
|
||||
"talk to a manager",
|
||||
"connect me to a manager",
|
||||
"connect me with a manager",
|
||||
"transfer me to a manager",
|
||||
"get me a manager",
|
||||
"chat with a manager",
|
||||
// Customer service requests
|
||||
"speak to customer service",
|
||||
"talk to customer service",
|
||||
"connect me to customer service",
|
||||
"connect me with customer service",
|
||||
"transfer me to customer service",
|
||||
"get me customer service",
|
||||
"chat with customer service",
|
||||
// Customer support requests
|
||||
"speak to customer support",
|
||||
"talk to customer support",
|
||||
"connect me to customer support",
|
||||
"connect me with customer support",
|
||||
"transfer me to customer support",
|
||||
"get me customer support",
|
||||
"chat with customer support",
|
||||
// Support requests
|
||||
"speak to support",
|
||||
"talk to support",
|
||||
"connect me to support",
|
||||
"connect me with support",
|
||||
"transfer me to support",
|
||||
"get me support",
|
||||
"chat with support",
|
||||
// Tech support requests
|
||||
"speak to tech support",
|
||||
"talk to tech support",
|
||||
"connect me to tech support",
|
||||
"connect me with tech support",
|
||||
"transfer me to tech support",
|
||||
"get me tech support",
|
||||
"chat with tech support",
|
||||
// Help desk requests
|
||||
"speak to help desk",
|
||||
"talk to help desk",
|
||||
"connect me to help desk",
|
||||
"connect me with help desk",
|
||||
"transfer me to help desk",
|
||||
"get me help desk",
|
||||
"chat with help desk",
|
||||
// Explicit escalation
|
||||
"escalate this",
|
||||
];
|
||||
|
||||
const QUIT_PATTERN_TEXTS: &[&str] = &[
|
||||
"i give up",
|
||||
"i'm giving up",
|
||||
"im giving up",
|
||||
"i'm going to quit",
|
||||
"i quit",
|
||||
"forget it",
|
||||
"forget this",
|
||||
"screw it",
|
||||
"screw this",
|
||||
"don't bother trying",
|
||||
"don't bother with this",
|
||||
"don't bother with it",
|
||||
"don't even bother",
|
||||
"why bother",
|
||||
"not worth it",
|
||||
"this is hopeless",
|
||||
"going elsewhere",
|
||||
"try somewhere else",
|
||||
"look elsewhere",
|
||||
];
|
||||
|
||||
const NEGATIVE_STANCE_PATTERN_TEXTS: &[&str] = &[
|
||||
"this is useless",
|
||||
"not helpful",
|
||||
"doesn't help",
|
||||
"not helping",
|
||||
"you're not helping",
|
||||
"youre not helping",
|
||||
"this doesn't work",
|
||||
"this doesnt work",
|
||||
"this isn't working",
|
||||
"this isnt working",
|
||||
"still doesn't work",
|
||||
"still doesnt work",
|
||||
"still not working",
|
||||
"still isn't working",
|
||||
"still isnt working",
|
||||
"waste of time",
|
||||
"wasting my time",
|
||||
"this is ridiculous",
|
||||
"this is absurd",
|
||||
"this is insane",
|
||||
"this is stupid",
|
||||
"this is dumb",
|
||||
"this sucks",
|
||||
"this is frustrating",
|
||||
"not good enough",
|
||||
"why can't you",
|
||||
"why cant you",
|
||||
"same issue",
|
||||
"did that already",
|
||||
"done that already",
|
||||
"tried that already",
|
||||
"already tried that",
|
||||
"i've done that",
|
||||
"ive done that",
|
||||
"i've tried that",
|
||||
"ive tried that",
|
||||
"i'm disappointed",
|
||||
"im disappointed",
|
||||
"disappointed with you",
|
||||
"disappointed in you",
|
||||
"useless bot",
|
||||
"dumb bot",
|
||||
"stupid bot",
|
||||
];
|
||||
|
||||
const AGENT_DIRECTED_PROFANITY_PATTERN_TEXTS: &[&str] = &[
|
||||
"this is bullshit",
|
||||
"what bullshit",
|
||||
"such bullshit",
|
||||
"total bullshit",
|
||||
"complete bullshit",
|
||||
"this is crap",
|
||||
"what crap",
|
||||
"this is shit",
|
||||
"what the hell is wrong with you",
|
||||
"what the fuck is wrong with you",
|
||||
"you're fucking useless",
|
||||
"youre fucking useless",
|
||||
"you are fucking useless",
|
||||
"fucking useless",
|
||||
"this bot is shit",
|
||||
"this bot is crap",
|
||||
"damn bot",
|
||||
"fucking bot",
|
||||
"stupid fucking",
|
||||
"are you fucking kidding",
|
||||
"wtf is wrong with you",
|
||||
"wtf is this",
|
||||
"ffs just",
|
||||
"for fucks sake",
|
||||
"for fuck's sake",
|
||||
"what the f**k",
|
||||
"what the f*ck",
|
||||
"what the f***",
|
||||
"that's bullsh*t",
|
||||
"thats bullsh*t",
|
||||
"that's bull***t",
|
||||
"thats bull***t",
|
||||
"that's bs",
|
||||
"thats bs",
|
||||
"this is bullsh*t",
|
||||
"this is bull***t",
|
||||
"this is bs",
|
||||
];
|
||||
|
||||
fn escalation_patterns() -> &'static Vec<NormalizedPattern> {
|
||||
static PATS: OnceLock<Vec<NormalizedPattern>> = OnceLock::new();
|
||||
PATS.get_or_init(|| normalize_patterns(ESCALATION_PATTERN_TEXTS))
|
||||
}
|
||||
|
||||
fn quit_patterns() -> &'static Vec<NormalizedPattern> {
|
||||
static PATS: OnceLock<Vec<NormalizedPattern>> = OnceLock::new();
|
||||
PATS.get_or_init(|| normalize_patterns(QUIT_PATTERN_TEXTS))
|
||||
}
|
||||
|
||||
fn negative_stance_patterns() -> &'static Vec<NormalizedPattern> {
|
||||
static PATS: OnceLock<Vec<NormalizedPattern>> = OnceLock::new();
|
||||
PATS.get_or_init(|| normalize_patterns(NEGATIVE_STANCE_PATTERN_TEXTS))
|
||||
}
|
||||
|
||||
fn profanity_patterns() -> &'static Vec<NormalizedPattern> {
|
||||
static PATS: OnceLock<Vec<NormalizedPattern>> = OnceLock::new();
|
||||
PATS.get_or_init(|| normalize_patterns(AGENT_DIRECTED_PROFANITY_PATTERN_TEXTS))
|
||||
}
|
||||
|
||||
fn re_consecutive_q() -> &'static Regex {
|
||||
static R: OnceLock<Regex> = OnceLock::new();
|
||||
R.get_or_init(|| Regex::new(r"\?{2,}").unwrap())
|
||||
}
|
||||
fn re_consecutive_e() -> &'static Regex {
|
||||
static R: OnceLock<Regex> = OnceLock::new();
|
||||
R.get_or_init(|| Regex::new(r"!{2,}").unwrap())
|
||||
}
|
||||
fn re_mixed_punct() -> &'static Regex {
|
||||
static R: OnceLock<Regex> = OnceLock::new();
|
||||
R.get_or_init(|| Regex::new(r"[?!]{3,}").unwrap())
|
||||
}
|
||||
|
||||
pub fn analyze_disengagement(
|
||||
normalized_messages: &[(usize, &str, NormalizedMessage)],
|
||||
char_ngram_threshold: f32,
|
||||
token_cosine_threshold: f32,
|
||||
) -> SignalGroup {
|
||||
let mut group = SignalGroup::new("disengagement");
|
||||
|
||||
for (idx, role, norm_msg) in normalized_messages {
|
||||
if *role != "human" {
|
||||
continue;
|
||||
}
|
||||
|
||||
let text = &norm_msg.raw;
|
||||
|
||||
// All-caps shouting check.
|
||||
let alpha_chars: String = text.chars().filter(|c| c.is_alphabetic()).collect();
|
||||
if alpha_chars.chars().count() >= 10 {
|
||||
let upper_count = alpha_chars.chars().filter(|c| c.is_uppercase()).count();
|
||||
let upper_ratio = upper_count as f32 / alpha_chars.chars().count() as f32;
|
||||
if upper_ratio >= 0.8 {
|
||||
let snippet: String = text.chars().take(50).collect();
|
||||
group.add_signal(
|
||||
SignalInstance::new(SignalType::DisengagementNegativeStance, *idx, snippet)
|
||||
.with_metadata(json!({
|
||||
"indicator_type": "all_caps",
|
||||
"upper_ratio": upper_ratio,
|
||||
})),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Excessive consecutive punctuation.
|
||||
let starts_with_positive = starts_with_prefix(text, POSITIVE_PREFIXES);
|
||||
let cq = re_consecutive_q().find_iter(text).count();
|
||||
let ce = re_consecutive_e().find_iter(text).count();
|
||||
let mixed = re_mixed_punct().find_iter(text).count();
|
||||
if !starts_with_positive && (cq >= 1 || ce >= 1 || mixed >= 1) {
|
||||
let snippet: String = text.chars().take(50).collect();
|
||||
group.add_signal(
|
||||
SignalInstance::new(SignalType::DisengagementNegativeStance, *idx, snippet)
|
||||
.with_metadata(json!({
|
||||
"indicator_type": "excessive_punctuation",
|
||||
"consecutive_questions": cq,
|
||||
"consecutive_exclamations": ce,
|
||||
"mixed_punctuation": mixed,
|
||||
})),
|
||||
);
|
||||
}
|
||||
|
||||
// Escalation patterns.
|
||||
let mut found_escalation = false;
|
||||
for pattern in escalation_patterns() {
|
||||
if norm_msg.matches_normalized_pattern(
|
||||
pattern,
|
||||
char_ngram_threshold,
|
||||
token_cosine_threshold,
|
||||
) {
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::DisengagementEscalation,
|
||||
*idx,
|
||||
pattern.raw.clone(),
|
||||
)
|
||||
.with_metadata(json!({"pattern_type": "escalation"})),
|
||||
);
|
||||
found_escalation = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Quit patterns (independent of escalation).
|
||||
for pattern in quit_patterns() {
|
||||
if norm_msg.matches_normalized_pattern(
|
||||
pattern,
|
||||
char_ngram_threshold,
|
||||
token_cosine_threshold,
|
||||
) {
|
||||
group.add_signal(
|
||||
SignalInstance::new(SignalType::DisengagementQuit, *idx, pattern.raw.clone())
|
||||
.with_metadata(json!({"pattern_type": "quit"})),
|
||||
);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Profanity (more specific) before generic negative stance.
|
||||
let mut found_profanity = false;
|
||||
for pattern in profanity_patterns() {
|
||||
if norm_msg.matches_normalized_pattern(
|
||||
pattern,
|
||||
char_ngram_threshold,
|
||||
token_cosine_threshold,
|
||||
) {
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::DisengagementNegativeStance,
|
||||
*idx,
|
||||
pattern.raw.clone(),
|
||||
)
|
||||
.with_metadata(json!({
|
||||
"indicator_type": "profanity",
|
||||
"pattern": pattern.raw,
|
||||
})),
|
||||
);
|
||||
found_profanity = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if !found_escalation && !found_profanity {
|
||||
for pattern in negative_stance_patterns() {
|
||||
if norm_msg.matches_normalized_pattern(
|
||||
pattern,
|
||||
char_ngram_threshold,
|
||||
token_cosine_threshold,
|
||||
) {
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::DisengagementNegativeStance,
|
||||
*idx,
|
||||
pattern.raw.clone(),
|
||||
)
|
||||
.with_metadata(json!({
|
||||
"indicator_type": "complaint",
|
||||
"pattern": pattern.raw,
|
||||
})),
|
||||
);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
group
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn nm(s: &str) -> NormalizedMessage {
|
||||
NormalizedMessage::from_text(s, 2000)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_human_escalation_request() {
|
||||
let msgs = vec![(
|
||||
0usize,
|
||||
"human",
|
||||
nm("This is taking forever, get me a human"),
|
||||
)];
|
||||
let g = analyze_disengagement(&msgs, 0.65, 0.6);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::DisengagementEscalation)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_quit_intent() {
|
||||
let msgs = vec![(0usize, "human", nm("Forget it, I give up"))];
|
||||
let g = analyze_disengagement(&msgs, 0.65, 0.6);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::DisengagementQuit)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_negative_stance_complaint() {
|
||||
let msgs = vec![(0usize, "human", nm("This is useless"))];
|
||||
let g = analyze_disengagement(&msgs, 0.65, 0.6);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::DisengagementNegativeStance)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_excessive_punctuation_as_negative_stance() {
|
||||
let msgs = vec![(0usize, "human", nm("WHY isn't this working???"))];
|
||||
let g = analyze_disengagement(&msgs, 0.65, 0.6);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::DisengagementNegativeStance)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn positive_excitement_is_not_disengagement() {
|
||||
let msgs = vec![(0usize, "human", nm("Yes!! That's perfect!!!"))];
|
||||
let g = analyze_disengagement(&msgs, 0.65, 0.6);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.all(|s| !matches!(s.signal_type, SignalType::DisengagementNegativeStance)));
|
||||
}
|
||||
}
|
||||
338
crates/brightstaff/src/signals/interaction/misalignment.rs
Normal file
338
crates/brightstaff/src/signals/interaction/misalignment.rs
Normal file
|
|
@ -0,0 +1,338 @@
|
|||
//! Misalignment signals: corrections, rephrases, clarifications.
|
||||
//!
|
||||
//! Direct port of `signals/interaction/misalignment.py`.
|
||||
|
||||
use std::sync::OnceLock;
|
||||
|
||||
use serde_json::json;
|
||||
|
||||
use super::constants::{stopwords, CONFIRMATION_PREFIXES};
|
||||
use crate::signals::schemas::{SignalGroup, SignalInstance, SignalType};
|
||||
use crate::signals::text_processing::{normalize_patterns, NormalizedMessage, NormalizedPattern};
|
||||
|
||||
const CORRECTION_PATTERN_TEXTS: &[&str] = &[
|
||||
"no, i meant",
|
||||
"no i meant",
|
||||
"no, i said",
|
||||
"no i said",
|
||||
"no, i asked",
|
||||
"no i asked",
|
||||
"nah, i meant",
|
||||
"nope, i meant",
|
||||
"not what i said",
|
||||
"not what i asked",
|
||||
"that's not what i said",
|
||||
"that's not what i asked",
|
||||
"that's not what i meant",
|
||||
"thats not what i said",
|
||||
"thats not what i asked",
|
||||
"thats not what i meant",
|
||||
"that's not what you",
|
||||
"no that's not what i",
|
||||
"no, that's not what i",
|
||||
"you're not quite right",
|
||||
"youre not quite right",
|
||||
"you're not exactly right",
|
||||
"youre not exactly right",
|
||||
"you're wrong about",
|
||||
"youre wrong about",
|
||||
"i just said",
|
||||
"i already said",
|
||||
"i already told you",
|
||||
];
|
||||
|
||||
const REPHRASE_PATTERN_TEXTS: &[&str] = &[
|
||||
"let me rephrase",
|
||||
"let me explain again",
|
||||
"what i'm trying to say",
|
||||
"what i'm saying is",
|
||||
"in other words",
|
||||
];
|
||||
|
||||
const CLARIFICATION_PATTERN_TEXTS: &[&str] = &[
|
||||
"i don't understand",
|
||||
"don't understand",
|
||||
"not understanding",
|
||||
"can't understand",
|
||||
"don't get it",
|
||||
"don't follow",
|
||||
"i'm confused",
|
||||
"so confused",
|
||||
"makes no sense",
|
||||
"doesn't make sense",
|
||||
"not making sense",
|
||||
"what do you mean",
|
||||
"what does that mean",
|
||||
"what are you saying",
|
||||
"i'm lost",
|
||||
"totally lost",
|
||||
"lost me",
|
||||
"no clue what you",
|
||||
"no idea what you",
|
||||
"no clue what that",
|
||||
"no idea what that",
|
||||
"come again",
|
||||
"say that again",
|
||||
"repeat that",
|
||||
"trouble following",
|
||||
"hard to follow",
|
||||
"can't follow",
|
||||
];
|
||||
|
||||
fn correction_patterns() -> &'static Vec<NormalizedPattern> {
|
||||
static PATS: OnceLock<Vec<NormalizedPattern>> = OnceLock::new();
|
||||
PATS.get_or_init(|| normalize_patterns(CORRECTION_PATTERN_TEXTS))
|
||||
}
|
||||
|
||||
fn rephrase_patterns() -> &'static Vec<NormalizedPattern> {
|
||||
static PATS: OnceLock<Vec<NormalizedPattern>> = OnceLock::new();
|
||||
PATS.get_or_init(|| normalize_patterns(REPHRASE_PATTERN_TEXTS))
|
||||
}
|
||||
|
||||
fn clarification_patterns() -> &'static Vec<NormalizedPattern> {
|
||||
static PATS: OnceLock<Vec<NormalizedPattern>> = OnceLock::new();
|
||||
PATS.get_or_init(|| normalize_patterns(CLARIFICATION_PATTERN_TEXTS))
|
||||
}
|
||||
|
||||
fn is_confirmation_message(text: &str) -> bool {
|
||||
let lowered = text.to_lowercase();
|
||||
let trimmed = lowered.trim();
|
||||
CONFIRMATION_PREFIXES.iter().any(|p| trimmed.starts_with(p))
|
||||
}
|
||||
|
||||
/// Detect whether two user messages appear to be rephrases of each other.
|
||||
pub fn is_similar_rephrase(
|
||||
norm_msg1: &NormalizedMessage,
|
||||
norm_msg2: &NormalizedMessage,
|
||||
overlap_threshold: f32,
|
||||
min_meaningful_tokens: usize,
|
||||
max_new_content_ratio: f32,
|
||||
) -> bool {
|
||||
if norm_msg1.tokens.len() < 3 || norm_msg2.tokens.len() < 3 {
|
||||
return false;
|
||||
}
|
||||
if is_confirmation_message(&norm_msg1.raw) {
|
||||
return false;
|
||||
}
|
||||
|
||||
let stops = stopwords();
|
||||
let tokens1: std::collections::HashSet<&str> = norm_msg1
|
||||
.tokens
|
||||
.iter()
|
||||
.filter(|t| !stops.contains(t.as_str()))
|
||||
.map(|s| s.as_str())
|
||||
.collect();
|
||||
let tokens2: std::collections::HashSet<&str> = norm_msg2
|
||||
.tokens
|
||||
.iter()
|
||||
.filter(|t| !stops.contains(t.as_str()))
|
||||
.map(|s| s.as_str())
|
||||
.collect();
|
||||
|
||||
if tokens1.len() < min_meaningful_tokens || tokens2.len() < min_meaningful_tokens {
|
||||
return false;
|
||||
}
|
||||
|
||||
let new_tokens: std::collections::HashSet<&&str> = tokens1.difference(&tokens2).collect();
|
||||
let new_content_ratio = if tokens1.is_empty() {
|
||||
0.0
|
||||
} else {
|
||||
new_tokens.len() as f32 / tokens1.len() as f32
|
||||
};
|
||||
if new_content_ratio > max_new_content_ratio {
|
||||
return false;
|
||||
}
|
||||
|
||||
let intersection = tokens1.intersection(&tokens2).count();
|
||||
let min_size = tokens1.len().min(tokens2.len());
|
||||
if min_size == 0 {
|
||||
return false;
|
||||
}
|
||||
let overlap_ratio = intersection as f32 / min_size as f32;
|
||||
overlap_ratio >= overlap_threshold
|
||||
}
|
||||
|
||||
/// Analyze user messages for misalignment signals.
|
||||
pub fn analyze_misalignment(
|
||||
normalized_messages: &[(usize, &str, NormalizedMessage)],
|
||||
char_ngram_threshold: f32,
|
||||
token_cosine_threshold: f32,
|
||||
) -> SignalGroup {
|
||||
let mut group = SignalGroup::new("misalignment");
|
||||
|
||||
let mut prev_user_idx: Option<usize> = None;
|
||||
let mut prev_user_msg: Option<&NormalizedMessage> = None;
|
||||
|
||||
for (idx, role, norm_msg) in normalized_messages {
|
||||
if *role != "human" {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut found_in_turn = false;
|
||||
|
||||
for pattern in correction_patterns() {
|
||||
if norm_msg.matches_normalized_pattern(
|
||||
pattern,
|
||||
char_ngram_threshold,
|
||||
token_cosine_threshold,
|
||||
) {
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::MisalignmentCorrection,
|
||||
*idx,
|
||||
pattern.raw.clone(),
|
||||
)
|
||||
.with_metadata(json!({"pattern_type": "correction"})),
|
||||
);
|
||||
found_in_turn = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if found_in_turn {
|
||||
prev_user_idx = Some(*idx);
|
||||
prev_user_msg = Some(norm_msg);
|
||||
continue;
|
||||
}
|
||||
|
||||
for pattern in rephrase_patterns() {
|
||||
if norm_msg.matches_normalized_pattern(
|
||||
pattern,
|
||||
char_ngram_threshold,
|
||||
token_cosine_threshold,
|
||||
) {
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::MisalignmentRephrase,
|
||||
*idx,
|
||||
pattern.raw.clone(),
|
||||
)
|
||||
.with_metadata(json!({"pattern_type": "rephrase"})),
|
||||
);
|
||||
found_in_turn = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if found_in_turn {
|
||||
prev_user_idx = Some(*idx);
|
||||
prev_user_msg = Some(norm_msg);
|
||||
continue;
|
||||
}
|
||||
|
||||
for pattern in clarification_patterns() {
|
||||
if norm_msg.matches_normalized_pattern(
|
||||
pattern,
|
||||
char_ngram_threshold,
|
||||
token_cosine_threshold,
|
||||
) {
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::MisalignmentClarification,
|
||||
*idx,
|
||||
pattern.raw.clone(),
|
||||
)
|
||||
.with_metadata(json!({"pattern_type": "clarification"})),
|
||||
);
|
||||
found_in_turn = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if found_in_turn {
|
||||
prev_user_idx = Some(*idx);
|
||||
prev_user_msg = Some(norm_msg);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Semantic rephrase vs the previous user message (recent only).
|
||||
if let (Some(prev_idx), Some(prev_msg)) = (prev_user_idx, prev_user_msg) {
|
||||
let turns_between = idx.saturating_sub(prev_idx);
|
||||
if turns_between <= 3 && is_similar_rephrase(norm_msg, prev_msg, 0.75, 4, 0.5) {
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::MisalignmentRephrase,
|
||||
*idx,
|
||||
"[similar rephrase detected]",
|
||||
)
|
||||
.with_confidence(0.8)
|
||||
.with_metadata(json!({
|
||||
"pattern_type": "semantic_rephrase",
|
||||
"compared_to": prev_idx,
|
||||
})),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
prev_user_idx = Some(*idx);
|
||||
prev_user_msg = Some(norm_msg);
|
||||
}
|
||||
|
||||
group
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn nm(s: &str) -> NormalizedMessage {
|
||||
NormalizedMessage::from_text(s, 2000)
|
||||
}
|
||||
|
||||
fn make(items: &[(&'static str, &str)]) -> Vec<(usize, &'static str, NormalizedMessage)> {
|
||||
items
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, (role, text))| (i, *role, nm(text)))
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_explicit_correction() {
|
||||
let msgs = make(&[
|
||||
("human", "Show me my orders"),
|
||||
("gpt", "Sure, here are your invoices"),
|
||||
("human", "No, I meant my recent orders"),
|
||||
]);
|
||||
let g = analyze_misalignment(&msgs, 0.65, 0.6);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::MisalignmentCorrection)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_rephrase_marker() {
|
||||
let msgs = make(&[
|
||||
("human", "Show me X"),
|
||||
("gpt", "Sure"),
|
||||
("human", "Let me rephrase: I want X grouped by date"),
|
||||
]);
|
||||
let g = analyze_misalignment(&msgs, 0.65, 0.6);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::MisalignmentRephrase)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_clarification_request() {
|
||||
let msgs = make(&[
|
||||
("human", "Run the report"),
|
||||
("gpt", "Foobar quux baz."),
|
||||
("human", "I don't understand what you mean"),
|
||||
]);
|
||||
let g = analyze_misalignment(&msgs, 0.65, 0.6);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::MisalignmentClarification)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn confirmation_is_not_a_rephrase() {
|
||||
let m1 = nm("Yes, that's correct, please proceed with the order");
|
||||
let m2 = nm("please proceed with the order for the same product");
|
||||
assert!(!is_similar_rephrase(&m1, &m2, 0.75, 4, 0.5));
|
||||
}
|
||||
}
|
||||
10
crates/brightstaff/src/signals/interaction/mod.rs
Normal file
10
crates/brightstaff/src/signals/interaction/mod.rs
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
//! Interaction signals: misalignment, stagnation, disengagement, satisfaction.
|
||||
//!
|
||||
//! These signals capture how the dialogue itself unfolds (semantic alignment,
|
||||
//! progress, engagement, closure) independent of tool execution outcomes.
|
||||
|
||||
pub mod constants;
|
||||
pub mod disengagement;
|
||||
pub mod misalignment;
|
||||
pub mod satisfaction;
|
||||
pub mod stagnation;
|
||||
177
crates/brightstaff/src/signals/interaction/satisfaction.rs
Normal file
177
crates/brightstaff/src/signals/interaction/satisfaction.rs
Normal file
|
|
@ -0,0 +1,177 @@
|
|||
//! Satisfaction signals: gratitude, confirmation, success.
|
||||
//!
|
||||
//! Direct port of `signals/interaction/satisfaction.py`.
|
||||
|
||||
use std::sync::OnceLock;
|
||||
|
||||
use serde_json::json;
|
||||
|
||||
use crate::signals::schemas::{SignalGroup, SignalInstance, SignalType};
|
||||
use crate::signals::text_processing::{normalize_patterns, NormalizedMessage, NormalizedPattern};
|
||||
|
||||
const GRATITUDE_PATTERN_TEXTS: &[&str] = &[
|
||||
"that's helpful",
|
||||
"that helps",
|
||||
"this helps",
|
||||
"appreciate it",
|
||||
"appreciate that",
|
||||
"that's perfect",
|
||||
"exactly what i needed",
|
||||
"just what i needed",
|
||||
"you're the best",
|
||||
"you rock",
|
||||
"you're awesome",
|
||||
"you're amazing",
|
||||
"you're great",
|
||||
];
|
||||
|
||||
const CONFIRMATION_PATTERN_TEXTS: &[&str] = &[
|
||||
"that works",
|
||||
"this works",
|
||||
"that's great",
|
||||
"that's amazing",
|
||||
"this is great",
|
||||
"that's awesome",
|
||||
"love it",
|
||||
"love this",
|
||||
"love that",
|
||||
];
|
||||
|
||||
const SUCCESS_PATTERN_TEXTS: &[&str] = &[
|
||||
"it worked",
|
||||
"that worked",
|
||||
"this worked",
|
||||
"it's working",
|
||||
"that's working",
|
||||
"this is working",
|
||||
];
|
||||
|
||||
fn gratitude_patterns() -> &'static Vec<NormalizedPattern> {
|
||||
static PATS: OnceLock<Vec<NormalizedPattern>> = OnceLock::new();
|
||||
PATS.get_or_init(|| normalize_patterns(GRATITUDE_PATTERN_TEXTS))
|
||||
}
|
||||
|
||||
fn confirmation_patterns() -> &'static Vec<NormalizedPattern> {
|
||||
static PATS: OnceLock<Vec<NormalizedPattern>> = OnceLock::new();
|
||||
PATS.get_or_init(|| normalize_patterns(CONFIRMATION_PATTERN_TEXTS))
|
||||
}
|
||||
|
||||
fn success_patterns() -> &'static Vec<NormalizedPattern> {
|
||||
static PATS: OnceLock<Vec<NormalizedPattern>> = OnceLock::new();
|
||||
PATS.get_or_init(|| normalize_patterns(SUCCESS_PATTERN_TEXTS))
|
||||
}
|
||||
|
||||
pub fn analyze_satisfaction(
|
||||
normalized_messages: &[(usize, &str, NormalizedMessage)],
|
||||
char_ngram_threshold: f32,
|
||||
token_cosine_threshold: f32,
|
||||
) -> SignalGroup {
|
||||
let mut group = SignalGroup::new("satisfaction");
|
||||
|
||||
for (idx, role, norm_msg) in normalized_messages {
|
||||
if *role != "human" {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut found = false;
|
||||
|
||||
for pattern in gratitude_patterns() {
|
||||
if norm_msg.matches_normalized_pattern(
|
||||
pattern,
|
||||
char_ngram_threshold,
|
||||
token_cosine_threshold,
|
||||
) {
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::SatisfactionGratitude,
|
||||
*idx,
|
||||
pattern.raw.clone(),
|
||||
)
|
||||
.with_metadata(json!({"pattern_type": "gratitude"})),
|
||||
);
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if found {
|
||||
continue;
|
||||
}
|
||||
|
||||
for pattern in confirmation_patterns() {
|
||||
if norm_msg.matches_normalized_pattern(
|
||||
pattern,
|
||||
char_ngram_threshold,
|
||||
token_cosine_threshold,
|
||||
) {
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::SatisfactionConfirmation,
|
||||
*idx,
|
||||
pattern.raw.clone(),
|
||||
)
|
||||
.with_metadata(json!({"pattern_type": "confirmation"})),
|
||||
);
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if found {
|
||||
continue;
|
||||
}
|
||||
|
||||
for pattern in success_patterns() {
|
||||
if norm_msg.matches_normalized_pattern(
|
||||
pattern,
|
||||
char_ngram_threshold,
|
||||
token_cosine_threshold,
|
||||
) {
|
||||
group.add_signal(
|
||||
SignalInstance::new(SignalType::SatisfactionSuccess, *idx, pattern.raw.clone())
|
||||
.with_metadata(json!({"pattern_type": "success"})),
|
||||
);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
group
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn nm(s: &str) -> NormalizedMessage {
|
||||
NormalizedMessage::from_text(s, 2000)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_gratitude() {
|
||||
let msgs = vec![(0usize, "human", nm("That's perfect, appreciate it!"))];
|
||||
let g = analyze_satisfaction(&msgs, 0.65, 0.6);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::SatisfactionGratitude)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_confirmation() {
|
||||
let msgs = vec![(0usize, "human", nm("That works for me, thanks"))];
|
||||
let g = analyze_satisfaction(&msgs, 0.65, 0.6);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::SatisfactionConfirmation)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_success() {
|
||||
let msgs = vec![(0usize, "human", nm("Great, it worked!"))];
|
||||
let g = analyze_satisfaction(&msgs, 0.65, 0.6);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::SatisfactionSuccess)));
|
||||
}
|
||||
}
|
||||
241
crates/brightstaff/src/signals/interaction/stagnation.rs
Normal file
241
crates/brightstaff/src/signals/interaction/stagnation.rs
Normal file
|
|
@ -0,0 +1,241 @@
|
|||
//! Stagnation signals: dragging (turn-count efficiency) and repetition.
|
||||
//!
|
||||
//! Direct port of `signals/interaction/stagnation.py`.
|
||||
|
||||
use serde_json::json;
|
||||
|
||||
use super::constants::{starts_with_prefix, POSITIVE_PREFIXES};
|
||||
use crate::signals::schemas::{SignalGroup, SignalInstance, SignalType, TurnMetrics};
|
||||
use crate::signals::text_processing::NormalizedMessage;
|
||||
|
||||
/// Adapter row used by stagnation::dragging detector. Mirrors the ShareGPT
|
||||
/// `{"from": role, "value": text}` shape used in the Python reference.
|
||||
pub struct ShareGptMsg<'a> {
|
||||
pub from: &'a str,
|
||||
}
|
||||
|
||||
pub fn analyze_dragging(
|
||||
messages: &[ShareGptMsg<'_>],
|
||||
baseline_turns: usize,
|
||||
efficiency_threshold: f32,
|
||||
) -> (SignalGroup, TurnMetrics) {
|
||||
let mut group = SignalGroup::new("stagnation");
|
||||
|
||||
let mut user_turns: usize = 0;
|
||||
let mut assistant_turns: usize = 0;
|
||||
for m in messages {
|
||||
match m.from {
|
||||
"human" => user_turns += 1,
|
||||
"gpt" => assistant_turns += 1,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
let total_turns = user_turns;
|
||||
let efficiency_score: f32 = if total_turns == 0 || total_turns <= baseline_turns {
|
||||
1.0
|
||||
} else {
|
||||
let excess = (total_turns - baseline_turns) as f32;
|
||||
1.0 / (1.0 + excess * 0.25)
|
||||
};
|
||||
|
||||
let is_dragging = efficiency_score < efficiency_threshold;
|
||||
let metrics = TurnMetrics {
|
||||
total_turns,
|
||||
user_turns,
|
||||
assistant_turns,
|
||||
is_dragging,
|
||||
efficiency_score,
|
||||
};
|
||||
|
||||
if is_dragging {
|
||||
let last_idx = messages.len().saturating_sub(1);
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::StagnationDragging,
|
||||
last_idx,
|
||||
format!(
|
||||
"Conversation dragging: {} turns (efficiency: {:.2})",
|
||||
total_turns, efficiency_score
|
||||
),
|
||||
)
|
||||
.with_confidence(1.0 - efficiency_score)
|
||||
.with_metadata(json!({
|
||||
"total_turns": total_turns,
|
||||
"efficiency_score": efficiency_score,
|
||||
"baseline_turns": baseline_turns,
|
||||
})),
|
||||
);
|
||||
}
|
||||
|
||||
(group, metrics)
|
||||
}
|
||||
|
||||
pub fn analyze_repetition(
|
||||
normalized_messages: &[(usize, &str, NormalizedMessage)],
|
||||
lookback: usize,
|
||||
exact_threshold: f32,
|
||||
near_duplicate_threshold: f32,
|
||||
) -> SignalGroup {
|
||||
let mut group = SignalGroup::new("stagnation");
|
||||
|
||||
// We keep references into `normalized_messages`. Since `normalized_messages`
|
||||
// is borrowed for the whole function, this avoids cloning.
|
||||
let mut prev_human: Vec<(usize, &NormalizedMessage)> = Vec::new();
|
||||
let mut prev_gpt: Vec<(usize, &NormalizedMessage)> = Vec::new();
|
||||
|
||||
for (idx, role, norm_msg) in normalized_messages {
|
||||
if *role != "human" && *role != "gpt" {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip human positive-prefix messages; they're naturally repetitive.
|
||||
if *role == "human" && starts_with_prefix(&norm_msg.raw, POSITIVE_PREFIXES) {
|
||||
prev_human.push((*idx, norm_msg));
|
||||
continue;
|
||||
}
|
||||
|
||||
if norm_msg.tokens.len() < 5 {
|
||||
if *role == "human" {
|
||||
prev_human.push((*idx, norm_msg));
|
||||
} else {
|
||||
prev_gpt.push((*idx, norm_msg));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
let prev = if *role == "human" {
|
||||
&prev_human
|
||||
} else {
|
||||
&prev_gpt
|
||||
};
|
||||
let start = prev.len().saturating_sub(lookback);
|
||||
let mut matched = false;
|
||||
for (prev_idx, prev_msg) in &prev[start..] {
|
||||
if prev_msg.tokens.len() < 5 {
|
||||
continue;
|
||||
}
|
||||
let similarity = norm_msg.ngram_similarity_with_message(prev_msg);
|
||||
if similarity >= exact_threshold {
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::StagnationRepetition,
|
||||
*idx,
|
||||
format!("Exact repetition with message {}", prev_idx),
|
||||
)
|
||||
.with_confidence(similarity)
|
||||
.with_metadata(json!({
|
||||
"repetition_type": "exact",
|
||||
"compared_to": prev_idx,
|
||||
"similarity": similarity,
|
||||
"role": role,
|
||||
})),
|
||||
);
|
||||
matched = true;
|
||||
break;
|
||||
} else if similarity >= near_duplicate_threshold {
|
||||
group.add_signal(
|
||||
SignalInstance::new(
|
||||
SignalType::StagnationRepetition,
|
||||
*idx,
|
||||
format!("Near-duplicate with message {}", prev_idx),
|
||||
)
|
||||
.with_confidence(similarity)
|
||||
.with_metadata(json!({
|
||||
"repetition_type": "near_duplicate",
|
||||
"compared_to": prev_idx,
|
||||
"similarity": similarity,
|
||||
"role": role,
|
||||
})),
|
||||
);
|
||||
matched = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
let _ = matched;
|
||||
|
||||
if *role == "human" {
|
||||
prev_human.push((*idx, norm_msg));
|
||||
} else {
|
||||
prev_gpt.push((*idx, norm_msg));
|
||||
}
|
||||
}
|
||||
|
||||
group
|
||||
}
|
||||
|
||||
/// Combined stagnation analyzer: dragging + repetition.
|
||||
pub fn analyze_stagnation(
|
||||
messages: &[ShareGptMsg<'_>],
|
||||
normalized_messages: &[(usize, &str, NormalizedMessage)],
|
||||
baseline_turns: usize,
|
||||
) -> (SignalGroup, TurnMetrics) {
|
||||
let (dragging_group, metrics) = analyze_dragging(messages, baseline_turns, 0.5);
|
||||
let repetition_group = analyze_repetition(normalized_messages, 2, 0.95, 0.85);
|
||||
|
||||
let mut combined = SignalGroup::new("stagnation");
|
||||
for s in dragging_group.signals.iter().cloned() {
|
||||
combined.add_signal(s);
|
||||
}
|
||||
for s in repetition_group.signals.iter().cloned() {
|
||||
combined.add_signal(s);
|
||||
}
|
||||
(combined, metrics)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn nm(s: &str) -> NormalizedMessage {
|
||||
NormalizedMessage::from_text(s, 2000)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dragging_after_many_user_turns() {
|
||||
let msgs: Vec<_> = (0..15)
|
||||
.flat_map(|_| [ShareGptMsg { from: "human" }, ShareGptMsg { from: "gpt" }])
|
||||
.collect();
|
||||
let (g, m) = analyze_dragging(&msgs, 5, 0.5);
|
||||
assert!(m.is_dragging);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::StagnationDragging)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_dragging_below_baseline() {
|
||||
let msgs = vec![
|
||||
ShareGptMsg { from: "human" },
|
||||
ShareGptMsg { from: "gpt" },
|
||||
ShareGptMsg { from: "human" },
|
||||
ShareGptMsg { from: "gpt" },
|
||||
];
|
||||
let (g, m) = analyze_dragging(&msgs, 5, 0.5);
|
||||
assert!(!m.is_dragging);
|
||||
assert!(g.signals.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_exact_repetition_in_user_messages() {
|
||||
let n = vec![
|
||||
(
|
||||
0usize,
|
||||
"human",
|
||||
nm("This widget is broken and needs repair right now"),
|
||||
),
|
||||
(1, "gpt", nm("Sorry to hear that. Let me look into it.")),
|
||||
(
|
||||
2,
|
||||
"human",
|
||||
nm("This widget is broken and needs repair right now"),
|
||||
),
|
||||
];
|
||||
let g = analyze_repetition(&n, 2, 0.95, 0.85);
|
||||
assert!(g
|
||||
.signals
|
||||
.iter()
|
||||
.any(|s| matches!(s.signal_type, SignalType::StagnationRepetition)));
|
||||
}
|
||||
}
|
||||
|
|
@ -1,3 +1,26 @@
|
|||
mod analyzer;
|
||||
//! Plano signals: behavioral quality indicators for agent interactions.
|
||||
//!
|
||||
//! This is a Rust port of the paper-aligned Python reference implementation at
|
||||
//! `https://github.com/katanemo/signals` (or `/Users/shashmi/repos/signals`).
|
||||
//!
|
||||
//! Three layers of signals are detected from a conversation transcript:
|
||||
//!
|
||||
//! - **Interaction**: misalignment, stagnation, disengagement, satisfaction
|
||||
//! - **Execution**: failure, loops
|
||||
//! - **Environment**: exhaustion
|
||||
//!
|
||||
//! See `SignalType` for the full hierarchy.
|
||||
|
||||
pub use analyzer::*;
|
||||
pub mod analyzer;
|
||||
pub mod environment;
|
||||
pub mod execution;
|
||||
pub mod interaction;
|
||||
pub mod otel;
|
||||
pub mod schemas;
|
||||
pub mod text_processing;
|
||||
|
||||
pub use analyzer::{SignalAnalyzer, FLAG_MARKER};
|
||||
pub use schemas::{
|
||||
EnvironmentSignals, ExecutionSignals, InteractionQuality, InteractionSignals, SignalGroup,
|
||||
SignalInstance, SignalLayer, SignalReport, SignalType, TurnMetrics,
|
||||
};
|
||||
|
|
|
|||
241
crates/brightstaff/src/signals/otel.rs
Normal file
241
crates/brightstaff/src/signals/otel.rs
Normal file
|
|
@ -0,0 +1,241 @@
|
|||
//! Helpers for emitting `SignalReport` data to OpenTelemetry spans.
|
||||
//!
|
||||
//! Two sets of attributes are emitted:
|
||||
//!
|
||||
//! - **Legacy** keys under `signals.*` (e.g. `signals.frustration.count`),
|
||||
//! computed from the new layered counts. Preserved for one release for
|
||||
//! backward compatibility with existing dashboards.
|
||||
//! - **New** layered keys (e.g. `signals.interaction.misalignment.count`),
|
||||
//! one set of `count`/`severity` attributes per category, plus per-instance
|
||||
//! span events named `signal.<dotted_signal_type>`.
|
||||
|
||||
use opentelemetry::trace::SpanRef;
|
||||
use opentelemetry::KeyValue;
|
||||
|
||||
use crate::signals::schemas::{SignalGroup, SignalReport, SignalType};
|
||||
|
||||
/// Emit both legacy and layered OTel attributes/events for a `SignalReport`.
|
||||
///
|
||||
/// Returns `true` if any "concerning" signal was found, mirroring the previous
|
||||
/// behavior used to flag the span operation name.
|
||||
pub fn emit_signals_to_span(span: &SpanRef<'_>, report: &SignalReport) -> bool {
|
||||
emit_overall(span, report);
|
||||
emit_layered_attributes(span, report);
|
||||
emit_legacy_attributes(span, report);
|
||||
emit_signal_events(span, report);
|
||||
|
||||
is_concerning(report)
|
||||
}
|
||||
|
||||
fn emit_overall(span: &SpanRef<'_>, report: &SignalReport) {
|
||||
span.set_attribute(KeyValue::new(
|
||||
"signals.quality",
|
||||
report.overall_quality.as_str().to_string(),
|
||||
));
|
||||
span.set_attribute(KeyValue::new(
|
||||
"signals.quality_score",
|
||||
report.quality_score as f64,
|
||||
));
|
||||
span.set_attribute(KeyValue::new(
|
||||
"signals.turn_count",
|
||||
report.turn_metrics.total_turns as i64,
|
||||
));
|
||||
span.set_attribute(KeyValue::new(
|
||||
"signals.efficiency_score",
|
||||
report.turn_metrics.efficiency_score as f64,
|
||||
));
|
||||
}
|
||||
|
||||
fn emit_group(span: &SpanRef<'_>, prefix: &str, group: &SignalGroup) {
|
||||
if group.count == 0 {
|
||||
return;
|
||||
}
|
||||
span.set_attribute(KeyValue::new(
|
||||
format!("{}.count", prefix),
|
||||
group.count as i64,
|
||||
));
|
||||
span.set_attribute(KeyValue::new(
|
||||
format!("{}.severity", prefix),
|
||||
group.severity as i64,
|
||||
));
|
||||
}
|
||||
|
||||
fn emit_layered_attributes(span: &SpanRef<'_>, report: &SignalReport) {
|
||||
emit_group(
|
||||
span,
|
||||
"signals.interaction.misalignment",
|
||||
&report.interaction.misalignment,
|
||||
);
|
||||
emit_group(
|
||||
span,
|
||||
"signals.interaction.stagnation",
|
||||
&report.interaction.stagnation,
|
||||
);
|
||||
emit_group(
|
||||
span,
|
||||
"signals.interaction.disengagement",
|
||||
&report.interaction.disengagement,
|
||||
);
|
||||
emit_group(
|
||||
span,
|
||||
"signals.interaction.satisfaction",
|
||||
&report.interaction.satisfaction,
|
||||
);
|
||||
emit_group(span, "signals.execution.failure", &report.execution.failure);
|
||||
emit_group(span, "signals.execution.loops", &report.execution.loops);
|
||||
emit_group(
|
||||
span,
|
||||
"signals.environment.exhaustion",
|
||||
&report.environment.exhaustion,
|
||||
);
|
||||
}
|
||||
|
||||
fn count_of(report: &SignalReport, t: SignalType) -> usize {
|
||||
report.iter_signals().filter(|s| s.signal_type == t).count()
|
||||
}
|
||||
|
||||
/// Emit the legacy attribute keys consumed by existing dashboards. These are
|
||||
/// derived from the new `SignalReport` so no detector contract is broken.
|
||||
fn emit_legacy_attributes(span: &SpanRef<'_>, report: &SignalReport) {
|
||||
use crate::tracing::signals as legacy;
|
||||
|
||||
// signals.follow_up.repair.{count,ratio} - misalignment proxies repairs.
|
||||
let repair_count = report.interaction.misalignment.count;
|
||||
let user_turns = report.turn_metrics.user_turns.max(1) as f32;
|
||||
if repair_count > 0 {
|
||||
span.set_attribute(KeyValue::new(legacy::REPAIR_COUNT, repair_count as i64));
|
||||
let ratio = repair_count as f32 / user_turns;
|
||||
span.set_attribute(KeyValue::new(legacy::REPAIR_RATIO, format!("{:.3}", ratio)));
|
||||
}
|
||||
|
||||
// signals.frustration.{count,severity} - disengagement.negative_stance is
|
||||
// the closest legacy analog of "frustration".
|
||||
let frustration_count = count_of(report, SignalType::DisengagementNegativeStance);
|
||||
if frustration_count > 0 {
|
||||
span.set_attribute(KeyValue::new(
|
||||
legacy::FRUSTRATION_COUNT,
|
||||
frustration_count as i64,
|
||||
));
|
||||
let severity = match frustration_count {
|
||||
0 => 0,
|
||||
1..=2 => 1,
|
||||
3..=4 => 2,
|
||||
_ => 3,
|
||||
};
|
||||
span.set_attribute(KeyValue::new(legacy::FRUSTRATION_SEVERITY, severity as i64));
|
||||
}
|
||||
|
||||
// signals.repetition.count - stagnation (repetition + dragging).
|
||||
if report.interaction.stagnation.count > 0 {
|
||||
span.set_attribute(KeyValue::new(
|
||||
legacy::REPETITION_COUNT,
|
||||
report.interaction.stagnation.count as i64,
|
||||
));
|
||||
}
|
||||
|
||||
// signals.escalation.requested - any escalation/quit signal.
|
||||
let escalated = report.interaction.disengagement.signals.iter().any(|s| {
|
||||
matches!(
|
||||
s.signal_type,
|
||||
SignalType::DisengagementEscalation | SignalType::DisengagementQuit
|
||||
)
|
||||
});
|
||||
if escalated {
|
||||
span.set_attribute(KeyValue::new(legacy::ESCALATION_REQUESTED, true));
|
||||
}
|
||||
|
||||
// signals.positive_feedback.count - satisfaction signals.
|
||||
if report.interaction.satisfaction.count > 0 {
|
||||
span.set_attribute(KeyValue::new(
|
||||
legacy::POSITIVE_FEEDBACK_COUNT,
|
||||
report.interaction.satisfaction.count as i64,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
fn emit_signal_events(span: &SpanRef<'_>, report: &SignalReport) {
|
||||
for sig in report.iter_signals() {
|
||||
let event_name = format!("signal.{}", sig.signal_type.as_str());
|
||||
let mut attrs: Vec<KeyValue> = vec![
|
||||
KeyValue::new("signal.type", sig.signal_type.as_str().to_string()),
|
||||
KeyValue::new("signal.message_index", sig.message_index as i64),
|
||||
KeyValue::new("signal.confidence", sig.confidence as f64),
|
||||
];
|
||||
if !sig.snippet.is_empty() {
|
||||
attrs.push(KeyValue::new("signal.snippet", sig.snippet.clone()));
|
||||
}
|
||||
if !sig.metadata.is_null() {
|
||||
attrs.push(KeyValue::new("signal.metadata", sig.metadata.to_string()));
|
||||
}
|
||||
span.add_event(event_name, attrs);
|
||||
}
|
||||
}
|
||||
|
||||
fn is_concerning(report: &SignalReport) -> bool {
|
||||
use crate::signals::schemas::InteractionQuality;
|
||||
if matches!(
|
||||
report.overall_quality,
|
||||
InteractionQuality::Poor | InteractionQuality::Severe
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
if report.interaction.disengagement.count > 0 {
|
||||
return true;
|
||||
}
|
||||
if report.interaction.stagnation.count > 2 {
|
||||
return true;
|
||||
}
|
||||
if report.execution.failure.count > 0 || report.execution.loops.count > 0 {
|
||||
return true;
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::signals::schemas::{
|
||||
EnvironmentSignals, ExecutionSignals, InteractionQuality, InteractionSignals, SignalGroup,
|
||||
SignalInstance, SignalReport, SignalType, TurnMetrics,
|
||||
};
|
||||
|
||||
fn report_with_escalation() -> SignalReport {
|
||||
let mut diseng = SignalGroup::new("disengagement");
|
||||
diseng.add_signal(SignalInstance::new(
|
||||
SignalType::DisengagementEscalation,
|
||||
3,
|
||||
"get me a human",
|
||||
));
|
||||
SignalReport {
|
||||
interaction: InteractionSignals {
|
||||
disengagement: diseng,
|
||||
..InteractionSignals::default()
|
||||
},
|
||||
execution: ExecutionSignals::default(),
|
||||
environment: EnvironmentSignals::default(),
|
||||
overall_quality: InteractionQuality::Severe,
|
||||
quality_score: 0.0,
|
||||
turn_metrics: TurnMetrics {
|
||||
total_turns: 3,
|
||||
user_turns: 2,
|
||||
assistant_turns: 1,
|
||||
is_dragging: false,
|
||||
efficiency_score: 1.0,
|
||||
},
|
||||
summary: String::new(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn is_concerning_flags_disengagement() {
|
||||
let r = report_with_escalation();
|
||||
assert!(is_concerning(&r));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn count_of_returns_per_type_count() {
|
||||
let r = report_with_escalation();
|
||||
assert_eq!(count_of(&r, SignalType::DisengagementEscalation), 1);
|
||||
assert_eq!(count_of(&r, SignalType::DisengagementNegativeStance), 0);
|
||||
}
|
||||
}
|
||||
431
crates/brightstaff/src/signals/schemas.rs
Normal file
431
crates/brightstaff/src/signals/schemas.rs
Normal file
|
|
@ -0,0 +1,431 @@
|
|||
//! Data shapes for the signal analyzer.
|
||||
//!
|
||||
//! Mirrors `signals/schemas.py` from the reference implementation. Where the
|
||||
//! Python library exposes a `Dict[str, SignalGroup]` partitioned by category,
|
||||
//! the Rust port uses strongly-typed sub-structs (`InteractionSignals`,
|
||||
//! `ExecutionSignals`, `EnvironmentSignals`) for the same partitioning.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Hierarchical signal type. The 20 leaf variants mirror the paper taxonomy
|
||||
/// and the Python reference's `SignalType` string enum.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
pub enum SignalType {
|
||||
// Interaction > Misalignment
|
||||
MisalignmentCorrection,
|
||||
MisalignmentRephrase,
|
||||
MisalignmentClarification,
|
||||
|
||||
// Interaction > Stagnation
|
||||
StagnationDragging,
|
||||
StagnationRepetition,
|
||||
|
||||
// Interaction > Disengagement
|
||||
DisengagementEscalation,
|
||||
DisengagementQuit,
|
||||
DisengagementNegativeStance,
|
||||
|
||||
// Interaction > Satisfaction
|
||||
SatisfactionGratitude,
|
||||
SatisfactionConfirmation,
|
||||
SatisfactionSuccess,
|
||||
|
||||
// Execution > Failure
|
||||
ExecutionFailureInvalidArgs,
|
||||
ExecutionFailureBadQuery,
|
||||
ExecutionFailureToolNotFound,
|
||||
ExecutionFailureAuthMisuse,
|
||||
ExecutionFailureStateError,
|
||||
|
||||
// Execution > Loops
|
||||
ExecutionLoopsRetry,
|
||||
ExecutionLoopsParameterDrift,
|
||||
ExecutionLoopsOscillation,
|
||||
|
||||
// Environment > Exhaustion
|
||||
EnvironmentExhaustionApiError,
|
||||
EnvironmentExhaustionTimeout,
|
||||
EnvironmentExhaustionRateLimit,
|
||||
EnvironmentExhaustionNetwork,
|
||||
EnvironmentExhaustionMalformed,
|
||||
EnvironmentExhaustionContextOverflow,
|
||||
}
|
||||
|
||||
impl SignalType {
|
||||
/// Dotted hierarchical string identifier, e.g.
|
||||
/// `"interaction.misalignment.correction"`. Matches the Python reference's
|
||||
/// `SignalType` enum *value* strings byte-for-byte.
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
SignalType::MisalignmentCorrection => "interaction.misalignment.correction",
|
||||
SignalType::MisalignmentRephrase => "interaction.misalignment.rephrase",
|
||||
SignalType::MisalignmentClarification => "interaction.misalignment.clarification",
|
||||
SignalType::StagnationDragging => "interaction.stagnation.dragging",
|
||||
SignalType::StagnationRepetition => "interaction.stagnation.repetition",
|
||||
SignalType::DisengagementEscalation => "interaction.disengagement.escalation",
|
||||
SignalType::DisengagementQuit => "interaction.disengagement.quit",
|
||||
SignalType::DisengagementNegativeStance => "interaction.disengagement.negative_stance",
|
||||
SignalType::SatisfactionGratitude => "interaction.satisfaction.gratitude",
|
||||
SignalType::SatisfactionConfirmation => "interaction.satisfaction.confirmation",
|
||||
SignalType::SatisfactionSuccess => "interaction.satisfaction.success",
|
||||
SignalType::ExecutionFailureInvalidArgs => "execution.failure.invalid_args",
|
||||
SignalType::ExecutionFailureBadQuery => "execution.failure.bad_query",
|
||||
SignalType::ExecutionFailureToolNotFound => "execution.failure.tool_not_found",
|
||||
SignalType::ExecutionFailureAuthMisuse => "execution.failure.auth_misuse",
|
||||
SignalType::ExecutionFailureStateError => "execution.failure.state_error",
|
||||
SignalType::ExecutionLoopsRetry => "execution.loops.retry",
|
||||
SignalType::ExecutionLoopsParameterDrift => "execution.loops.parameter_drift",
|
||||
SignalType::ExecutionLoopsOscillation => "execution.loops.oscillation",
|
||||
SignalType::EnvironmentExhaustionApiError => "environment.exhaustion.api_error",
|
||||
SignalType::EnvironmentExhaustionTimeout => "environment.exhaustion.timeout",
|
||||
SignalType::EnvironmentExhaustionRateLimit => "environment.exhaustion.rate_limit",
|
||||
SignalType::EnvironmentExhaustionNetwork => "environment.exhaustion.network",
|
||||
SignalType::EnvironmentExhaustionMalformed => {
|
||||
"environment.exhaustion.malformed_response"
|
||||
}
|
||||
SignalType::EnvironmentExhaustionContextOverflow => {
|
||||
"environment.exhaustion.context_overflow"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn layer(&self) -> SignalLayer {
|
||||
match self {
|
||||
SignalType::MisalignmentCorrection
|
||||
| SignalType::MisalignmentRephrase
|
||||
| SignalType::MisalignmentClarification
|
||||
| SignalType::StagnationDragging
|
||||
| SignalType::StagnationRepetition
|
||||
| SignalType::DisengagementEscalation
|
||||
| SignalType::DisengagementQuit
|
||||
| SignalType::DisengagementNegativeStance
|
||||
| SignalType::SatisfactionGratitude
|
||||
| SignalType::SatisfactionConfirmation
|
||||
| SignalType::SatisfactionSuccess => SignalLayer::Interaction,
|
||||
SignalType::ExecutionFailureInvalidArgs
|
||||
| SignalType::ExecutionFailureBadQuery
|
||||
| SignalType::ExecutionFailureToolNotFound
|
||||
| SignalType::ExecutionFailureAuthMisuse
|
||||
| SignalType::ExecutionFailureStateError
|
||||
| SignalType::ExecutionLoopsRetry
|
||||
| SignalType::ExecutionLoopsParameterDrift
|
||||
| SignalType::ExecutionLoopsOscillation => SignalLayer::Execution,
|
||||
SignalType::EnvironmentExhaustionApiError
|
||||
| SignalType::EnvironmentExhaustionTimeout
|
||||
| SignalType::EnvironmentExhaustionRateLimit
|
||||
| SignalType::EnvironmentExhaustionNetwork
|
||||
| SignalType::EnvironmentExhaustionMalformed
|
||||
| SignalType::EnvironmentExhaustionContextOverflow => SignalLayer::Environment,
|
||||
}
|
||||
}
|
||||
|
||||
/// Category name within the layer (e.g. `"misalignment"`, `"failure"`).
|
||||
pub fn category(&self) -> &'static str {
|
||||
// Strip the layer prefix and take everything before the next dot.
|
||||
let s = self.as_str();
|
||||
let after_layer = s.split_once('.').map(|(_, rest)| rest).unwrap_or(s);
|
||||
after_layer
|
||||
.split_once('.')
|
||||
.map(|(c, _)| c)
|
||||
.unwrap_or(after_layer)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
pub enum SignalLayer {
|
||||
Interaction,
|
||||
Execution,
|
||||
Environment,
|
||||
}
|
||||
|
||||
impl SignalLayer {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
SignalLayer::Interaction => "interaction",
|
||||
SignalLayer::Execution => "execution",
|
||||
SignalLayer::Environment => "environment",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Overall quality assessment for an agent interaction session.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum InteractionQuality {
|
||||
Excellent,
|
||||
Good,
|
||||
Neutral,
|
||||
Poor,
|
||||
Severe,
|
||||
}
|
||||
|
||||
impl InteractionQuality {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
InteractionQuality::Excellent => "excellent",
|
||||
InteractionQuality::Good => "good",
|
||||
InteractionQuality::Neutral => "neutral",
|
||||
InteractionQuality::Poor => "poor",
|
||||
InteractionQuality::Severe => "severe",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A single detected signal instance.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SignalInstance {
|
||||
pub signal_type: SignalType,
|
||||
/// Absolute index into the original conversation `Vec<Message>`.
|
||||
pub message_index: usize,
|
||||
pub snippet: String,
|
||||
pub confidence: f32,
|
||||
/// Free-form metadata payload mirroring the Python `Dict[str, Any]`.
|
||||
/// Stored as a JSON object so we can faithfully reproduce the reference's
|
||||
/// flexible per-detector metadata.
|
||||
#[serde(default)]
|
||||
pub metadata: serde_json::Value,
|
||||
}
|
||||
|
||||
impl SignalInstance {
|
||||
pub fn new(signal_type: SignalType, message_index: usize, snippet: impl Into<String>) -> Self {
|
||||
Self {
|
||||
signal_type,
|
||||
message_index,
|
||||
snippet: snippet.into(),
|
||||
confidence: 1.0,
|
||||
metadata: serde_json::Value::Object(serde_json::Map::new()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_confidence(mut self, c: f32) -> Self {
|
||||
self.confidence = c;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_metadata(mut self, m: serde_json::Value) -> Self {
|
||||
self.metadata = m;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// Aggregated signals for a specific category.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SignalGroup {
|
||||
pub category: String,
|
||||
pub count: usize,
|
||||
pub signals: Vec<SignalInstance>,
|
||||
/// Severity level (0-3: none, mild, moderate, severe).
|
||||
pub severity: u8,
|
||||
}
|
||||
|
||||
impl SignalGroup {
|
||||
pub fn new(category: impl Into<String>) -> Self {
|
||||
Self {
|
||||
category: category.into(),
|
||||
count: 0,
|
||||
signals: Vec::new(),
|
||||
severity: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_signal(&mut self, signal: SignalInstance) {
|
||||
self.signals.push(signal);
|
||||
self.count = self.signals.len();
|
||||
self.update_severity();
|
||||
}
|
||||
|
||||
fn update_severity(&mut self) {
|
||||
self.severity = match self.count {
|
||||
0 => 0,
|
||||
1..=2 => 1,
|
||||
3..=4 => 2,
|
||||
_ => 3,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/// Turn count and efficiency metrics, used by stagnation.dragging.
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||
pub struct TurnMetrics {
|
||||
pub total_turns: usize,
|
||||
pub user_turns: usize,
|
||||
pub assistant_turns: usize,
|
||||
pub is_dragging: bool,
|
||||
pub efficiency_score: f32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct InteractionSignals {
|
||||
pub misalignment: SignalGroup,
|
||||
pub stagnation: SignalGroup,
|
||||
pub disengagement: SignalGroup,
|
||||
pub satisfaction: SignalGroup,
|
||||
}
|
||||
|
||||
impl Default for InteractionSignals {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
misalignment: SignalGroup::new("misalignment"),
|
||||
stagnation: SignalGroup::new("stagnation"),
|
||||
disengagement: SignalGroup::new("disengagement"),
|
||||
satisfaction: SignalGroup::new("satisfaction"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl InteractionSignals {
|
||||
/// Ratio of misalignment instances to user turns. Used as a quality
|
||||
/// scoring input and as a threshold for the "high misalignment rate"
|
||||
/// summary callout. Mirrors `misalignment.count / max(user_turns, 1)`
|
||||
/// from the Python reference's `_assess_quality` and `_generate_summary`.
|
||||
pub fn misalignment_ratio(&self, user_turns: usize) -> f32 {
|
||||
let denom = user_turns.max(1) as f32;
|
||||
self.misalignment.count as f32 / denom
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ExecutionSignals {
|
||||
pub failure: SignalGroup,
|
||||
pub loops: SignalGroup,
|
||||
}
|
||||
|
||||
impl Default for ExecutionSignals {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
failure: SignalGroup::new("failure"),
|
||||
loops: SignalGroup::new("loops"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct EnvironmentSignals {
|
||||
pub exhaustion: SignalGroup,
|
||||
}
|
||||
|
||||
impl Default for EnvironmentSignals {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
exhaustion: SignalGroup::new("exhaustion"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Complete signal analysis report for a conversation.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SignalReport {
|
||||
pub interaction: InteractionSignals,
|
||||
pub execution: ExecutionSignals,
|
||||
pub environment: EnvironmentSignals,
|
||||
pub overall_quality: InteractionQuality,
|
||||
pub quality_score: f32,
|
||||
pub turn_metrics: TurnMetrics,
|
||||
pub summary: String,
|
||||
}
|
||||
|
||||
impl Default for SignalReport {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
interaction: InteractionSignals::default(),
|
||||
execution: ExecutionSignals::default(),
|
||||
environment: EnvironmentSignals::default(),
|
||||
overall_quality: InteractionQuality::Neutral,
|
||||
quality_score: 50.0,
|
||||
turn_metrics: TurnMetrics::default(),
|
||||
summary: String::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SignalReport {
|
||||
/// Iterate over every `SignalInstance` across all layers and groups.
|
||||
pub fn iter_signals(&self) -> impl Iterator<Item = &SignalInstance> {
|
||||
self.interaction
|
||||
.misalignment
|
||||
.signals
|
||||
.iter()
|
||||
.chain(self.interaction.stagnation.signals.iter())
|
||||
.chain(self.interaction.disengagement.signals.iter())
|
||||
.chain(self.interaction.satisfaction.signals.iter())
|
||||
.chain(self.execution.failure.signals.iter())
|
||||
.chain(self.execution.loops.signals.iter())
|
||||
.chain(self.environment.exhaustion.signals.iter())
|
||||
}
|
||||
|
||||
pub fn has_signal_type(&self, t: SignalType) -> bool {
|
||||
self.iter_signals().any(|s| s.signal_type == t)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn signal_type_strings_match_paper_taxonomy() {
|
||||
assert_eq!(
|
||||
SignalType::MisalignmentCorrection.as_str(),
|
||||
"interaction.misalignment.correction"
|
||||
);
|
||||
assert_eq!(
|
||||
SignalType::ExecutionFailureInvalidArgs.as_str(),
|
||||
"execution.failure.invalid_args"
|
||||
);
|
||||
assert_eq!(
|
||||
SignalType::EnvironmentExhaustionMalformed.as_str(),
|
||||
"environment.exhaustion.malformed_response"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn signal_type_layer_and_category() {
|
||||
assert_eq!(
|
||||
SignalType::MisalignmentRephrase.layer(),
|
||||
SignalLayer::Interaction
|
||||
);
|
||||
assert_eq!(SignalType::MisalignmentRephrase.category(), "misalignment");
|
||||
assert_eq!(
|
||||
SignalType::ExecutionLoopsRetry.layer(),
|
||||
SignalLayer::Execution
|
||||
);
|
||||
assert_eq!(SignalType::ExecutionLoopsRetry.category(), "loops");
|
||||
assert_eq!(
|
||||
SignalType::EnvironmentExhaustionTimeout.layer(),
|
||||
SignalLayer::Environment
|
||||
);
|
||||
assert_eq!(
|
||||
SignalType::EnvironmentExhaustionTimeout.category(),
|
||||
"exhaustion"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn signal_group_severity_buckets_match_python() {
|
||||
let mut g = SignalGroup::new("misalignment");
|
||||
assert_eq!(g.severity, 0);
|
||||
for n in 1..=2 {
|
||||
g.add_signal(SignalInstance::new(
|
||||
SignalType::MisalignmentCorrection,
|
||||
n,
|
||||
"x",
|
||||
));
|
||||
}
|
||||
assert_eq!(g.severity, 1);
|
||||
for n in 3..=4 {
|
||||
g.add_signal(SignalInstance::new(
|
||||
SignalType::MisalignmentCorrection,
|
||||
n,
|
||||
"x",
|
||||
));
|
||||
}
|
||||
assert_eq!(g.severity, 2);
|
||||
for n in 5..=6 {
|
||||
g.add_signal(SignalInstance::new(
|
||||
SignalType::MisalignmentCorrection,
|
||||
n,
|
||||
"x",
|
||||
));
|
||||
}
|
||||
assert_eq!(g.severity, 3);
|
||||
}
|
||||
}
|
||||
401
crates/brightstaff/src/signals/text_processing.rs
Normal file
401
crates/brightstaff/src/signals/text_processing.rs
Normal file
|
|
@ -0,0 +1,401 @@
|
|||
//! Text normalization and similarity primitives.
|
||||
//!
|
||||
//! Direct Rust port of `signals/text_processing.py` from the reference. The
|
||||
//! shapes (`NormalizedMessage`, `NormalizedPattern`) and similarity formulas
|
||||
//! match the Python implementation exactly so that pattern matching produces
|
||||
//! the same results on the same inputs.
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
/// Size of character n-grams used for fuzzy similarity (3 = trigrams).
|
||||
pub const NGRAM_SIZE: usize = 3;
|
||||
|
||||
const PUNCT_TRIM: &[char] = &[
|
||||
'!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '<', '=',
|
||||
'>', '?', '@', '[', '\\', ']', '^', '_', '`', '{', '|', '}', '~',
|
||||
];
|
||||
|
||||
/// Pre-processed message with normalized text and tokens for efficient matching.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct NormalizedMessage {
|
||||
pub raw: String,
|
||||
pub tokens: Vec<String>,
|
||||
pub token_set: HashSet<String>,
|
||||
pub bigram_set: HashSet<String>,
|
||||
pub char_ngram_set: HashSet<String>,
|
||||
pub token_frequency: HashMap<String, usize>,
|
||||
}
|
||||
|
||||
impl NormalizedMessage {
|
||||
/// Create a normalized message from raw text. Mirrors
|
||||
/// `NormalizedMessage.from_text` in the reference, including the
|
||||
/// head-20%/tail-80% truncation strategy when text exceeds `max_length`.
|
||||
pub fn from_text(text: &str, max_length: usize) -> Self {
|
||||
let char_count = text.chars().count();
|
||||
|
||||
let raw: String = if char_count <= max_length {
|
||||
text.to_string()
|
||||
} else {
|
||||
let head_len = max_length / 5;
|
||||
// Reserve one char for the joining space.
|
||||
let tail_len = max_length.saturating_sub(head_len + 1);
|
||||
let head: String = text.chars().take(head_len).collect();
|
||||
let tail: String = text
|
||||
.chars()
|
||||
.skip(char_count.saturating_sub(tail_len))
|
||||
.collect();
|
||||
format!("{} {}", head, tail)
|
||||
};
|
||||
|
||||
// Normalize unicode punctuation to ASCII equivalents.
|
||||
let normalized_unicode = raw
|
||||
.replace(['\u{2019}', '\u{2018}'], "'")
|
||||
.replace(['\u{201c}', '\u{201d}'], "\"")
|
||||
.replace(['\u{2013}', '\u{2014}'], "-");
|
||||
|
||||
// Lowercase + collapse whitespace (matches Python's `" ".join(s.split())`).
|
||||
let normalized: String = normalized_unicode
|
||||
.to_lowercase()
|
||||
.split_whitespace()
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ");
|
||||
|
||||
let mut tokens: Vec<String> = Vec::new();
|
||||
for word in normalized.split_whitespace() {
|
||||
let stripped: String = word.trim_matches(PUNCT_TRIM).to_string();
|
||||
if !stripped.is_empty() {
|
||||
tokens.push(stripped);
|
||||
}
|
||||
}
|
||||
|
||||
let token_set: HashSet<String> = tokens.iter().cloned().collect();
|
||||
|
||||
let mut bigram_set: HashSet<String> = HashSet::new();
|
||||
for i in 0..tokens.len().saturating_sub(1) {
|
||||
bigram_set.insert(format!("{} {}", tokens[i], tokens[i + 1]));
|
||||
}
|
||||
|
||||
let tokens_text = tokens.join(" ");
|
||||
let char_ngram_set = char_ngrams(&tokens_text, NGRAM_SIZE);
|
||||
|
||||
let mut token_frequency: HashMap<String, usize> = HashMap::new();
|
||||
for t in &tokens {
|
||||
*token_frequency.entry(t.clone()).or_insert(0) += 1;
|
||||
}
|
||||
|
||||
Self {
|
||||
raw,
|
||||
tokens,
|
||||
token_set,
|
||||
bigram_set,
|
||||
char_ngram_set,
|
||||
token_frequency,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn contains_token(&self, token: &str) -> bool {
|
||||
self.token_set.contains(token)
|
||||
}
|
||||
|
||||
pub fn contains_phrase(&self, phrase: &str) -> bool {
|
||||
let phrase_tokens: Vec<&str> = phrase.split_whitespace().collect();
|
||||
if phrase_tokens.is_empty() {
|
||||
return false;
|
||||
}
|
||||
if phrase_tokens.len() == 1 {
|
||||
return self.contains_token(phrase_tokens[0]);
|
||||
}
|
||||
if phrase_tokens.len() > self.tokens.len() {
|
||||
return false;
|
||||
}
|
||||
let n = phrase_tokens.len();
|
||||
for i in 0..=self.tokens.len() - n {
|
||||
if self.tokens[i..i + n]
|
||||
.iter()
|
||||
.zip(phrase_tokens.iter())
|
||||
.all(|(a, b)| a == b)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Character n-gram (Jaccard) similarity vs another normalized message.
|
||||
pub fn ngram_similarity_with_message(&self, other: &NormalizedMessage) -> f32 {
|
||||
jaccard(&self.char_ngram_set, &other.char_ngram_set)
|
||||
}
|
||||
|
||||
/// Character n-gram (Jaccard) similarity vs a raw pattern string.
|
||||
pub fn ngram_similarity_with_pattern(&self, pattern: &str) -> f32 {
|
||||
let normalized = strip_non_word_chars(&pattern.to_lowercase());
|
||||
let pattern_ngrams = char_ngrams(&normalized, NGRAM_SIZE);
|
||||
jaccard(&self.char_ngram_set, &pattern_ngrams)
|
||||
}
|
||||
|
||||
/// Fraction of pattern's ngrams contained in this message's ngram set.
|
||||
pub fn char_ngram_containment(&self, pattern: &str) -> f32 {
|
||||
let normalized = strip_non_word_chars(&pattern.to_lowercase());
|
||||
let pattern_ngrams = char_ngrams(&normalized, NGRAM_SIZE);
|
||||
if pattern_ngrams.is_empty() {
|
||||
return 0.0;
|
||||
}
|
||||
let contained = pattern_ngrams
|
||||
.iter()
|
||||
.filter(|ng| self.char_ngram_set.contains(*ng))
|
||||
.count();
|
||||
contained as f32 / pattern_ngrams.len() as f32
|
||||
}
|
||||
|
||||
/// Token-frequency cosine similarity vs a raw pattern string.
|
||||
pub fn token_cosine_similarity(&self, pattern: &str) -> f32 {
|
||||
let mut pattern_freq: HashMap<String, usize> = HashMap::new();
|
||||
for word in pattern.to_lowercase().split_whitespace() {
|
||||
let stripped = word.trim_matches(PUNCT_TRIM);
|
||||
if !stripped.is_empty() {
|
||||
*pattern_freq.entry(stripped.to_string()).or_insert(0) += 1;
|
||||
}
|
||||
}
|
||||
cosine_freq(&self.token_frequency, &pattern_freq)
|
||||
}
|
||||
|
||||
/// Layered match against a pre-normalized pattern. Mirrors
|
||||
/// `matches_normalized_pattern` from the reference: exact phrase ->
|
||||
/// char-ngram Jaccard -> token cosine.
|
||||
pub fn matches_normalized_pattern(
|
||||
&self,
|
||||
pattern: &NormalizedPattern,
|
||||
char_ngram_threshold: f32,
|
||||
token_cosine_threshold: f32,
|
||||
) -> bool {
|
||||
// Layer 0: exact phrase match using pre-tokenized message.
|
||||
let plen = pattern.tokens.len();
|
||||
let slen = self.tokens.len();
|
||||
if plen > 0 && plen <= slen {
|
||||
for i in 0..=slen - plen {
|
||||
if self.tokens[i..i + plen] == pattern.tokens[..] {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Layer 1: character n-gram Jaccard similarity.
|
||||
if !self.char_ngram_set.is_empty() && !pattern.char_ngram_set.is_empty() {
|
||||
let inter = self
|
||||
.char_ngram_set
|
||||
.intersection(&pattern.char_ngram_set)
|
||||
.count();
|
||||
let union = self.char_ngram_set.union(&pattern.char_ngram_set).count();
|
||||
if union > 0 {
|
||||
let sim = inter as f32 / union as f32;
|
||||
if sim >= char_ngram_threshold {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Layer 2: token frequency cosine similarity.
|
||||
if !self.token_frequency.is_empty() && !pattern.token_frequency.is_empty() {
|
||||
let sim = cosine_freq(&self.token_frequency, &pattern.token_frequency);
|
||||
if sim >= token_cosine_threshold {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Pre-processed pattern with normalized text and pre-computed n-grams/tokens.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct NormalizedPattern {
|
||||
pub raw: String,
|
||||
pub tokens: Vec<String>,
|
||||
pub char_ngram_set: HashSet<String>,
|
||||
pub token_frequency: HashMap<String, usize>,
|
||||
}
|
||||
|
||||
impl NormalizedPattern {
|
||||
pub fn from_text(pattern: &str) -> Self {
|
||||
let normalized = pattern
|
||||
.to_lowercase()
|
||||
.replace(['\u{2019}', '\u{2018}'], "'")
|
||||
.replace(['\u{201c}', '\u{201d}'], "\"")
|
||||
.replace(['\u{2013}', '\u{2014}'], "-");
|
||||
let normalized: String = normalized.split_whitespace().collect::<Vec<_>>().join(" ");
|
||||
|
||||
// Tokenize the same way as NormalizedMessage (trim boundary punctuation,
|
||||
// keep internal punctuation).
|
||||
let mut tokens: Vec<String> = Vec::new();
|
||||
for word in normalized.split_whitespace() {
|
||||
let stripped = word.trim_matches(PUNCT_TRIM);
|
||||
if !stripped.is_empty() {
|
||||
tokens.push(stripped.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
// For ngrams + cosine, strip ALL punctuation (matches Python's
|
||||
// `re.sub(r"[^\w\s]", "", normalized)`).
|
||||
let normalized_for_ngrams = strip_non_word_chars(&normalized);
|
||||
let char_ngram_set = char_ngrams(&normalized_for_ngrams, NGRAM_SIZE);
|
||||
|
||||
let tokens_no_punct: Vec<&str> = normalized_for_ngrams.split_whitespace().collect();
|
||||
let mut token_frequency: HashMap<String, usize> = HashMap::new();
|
||||
for t in &tokens_no_punct {
|
||||
*token_frequency.entry((*t).to_string()).or_insert(0) += 1;
|
||||
}
|
||||
|
||||
Self {
|
||||
raw: pattern.to_string(),
|
||||
tokens,
|
||||
char_ngram_set,
|
||||
token_frequency,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Convenience: normalize a list of raw pattern strings into `NormalizedPattern`s.
|
||||
pub fn normalize_patterns(patterns: &[&str]) -> Vec<NormalizedPattern> {
|
||||
patterns
|
||||
.iter()
|
||||
.map(|p| NormalizedPattern::from_text(p))
|
||||
.collect()
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Similarity primitives
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn char_ngrams(s: &str, n: usize) -> HashSet<String> {
|
||||
// Python iterates by character index, not byte; mirror that with .chars().
|
||||
let chars: Vec<char> = s.chars().collect();
|
||||
let mut out: HashSet<String> = HashSet::new();
|
||||
if chars.len() < n {
|
||||
return out;
|
||||
}
|
||||
for i in 0..=chars.len() - n {
|
||||
out.insert(chars[i..i + n].iter().collect());
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn jaccard(a: &HashSet<String>, b: &HashSet<String>) -> f32 {
|
||||
if a.is_empty() && b.is_empty() {
|
||||
return 1.0;
|
||||
}
|
||||
if a.is_empty() || b.is_empty() {
|
||||
return 0.0;
|
||||
}
|
||||
let inter = a.intersection(b).count();
|
||||
let union = a.union(b).count();
|
||||
if union == 0 {
|
||||
0.0
|
||||
} else {
|
||||
inter as f32 / union as f32
|
||||
}
|
||||
}
|
||||
|
||||
fn cosine_freq(a: &HashMap<String, usize>, b: &HashMap<String, usize>) -> f32 {
|
||||
if a.is_empty() && b.is_empty() {
|
||||
return 1.0;
|
||||
}
|
||||
if a.is_empty() || b.is_empty() {
|
||||
return 0.0;
|
||||
}
|
||||
let mut dot: f64 = 0.0;
|
||||
let mut n1_sq: f64 = 0.0;
|
||||
let mut n2_sq: f64 = 0.0;
|
||||
for (token, &freq2) in b {
|
||||
let freq1 = *a.get(token).unwrap_or(&0);
|
||||
dot += (freq1 * freq2) as f64;
|
||||
n2_sq += (freq2 * freq2) as f64;
|
||||
}
|
||||
for &freq1 in a.values() {
|
||||
n1_sq += (freq1 * freq1) as f64;
|
||||
}
|
||||
let n1 = n1_sq.sqrt();
|
||||
let n2 = n2_sq.sqrt();
|
||||
if n1 == 0.0 || n2 == 0.0 {
|
||||
0.0
|
||||
} else {
|
||||
(dot / (n1 * n2)) as f32
|
||||
}
|
||||
}
|
||||
|
||||
/// Python equivalent: `re.sub(r"[^\w\s]", "", text)` followed by whitespace
|
||||
/// collapse. Python's `\w` is `[A-Za-z0-9_]` plus unicode word characters; we
|
||||
/// use Rust's `char::is_alphanumeric()` plus `_` for an equivalent definition.
|
||||
fn strip_non_word_chars(text: &str) -> String {
|
||||
let mut out = String::with_capacity(text.len());
|
||||
for c in text.chars() {
|
||||
if c.is_alphanumeric() || c == '_' || c.is_whitespace() {
|
||||
out.push(c);
|
||||
}
|
||||
}
|
||||
out.split_whitespace().collect::<Vec<_>>().join(" ")
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn normalize_lowercases_and_strips_punctuation() {
|
||||
let m = NormalizedMessage::from_text("Hello, World!", 2000);
|
||||
assert_eq!(m.tokens, vec!["hello".to_string(), "world".to_string()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalizes_smart_quotes() {
|
||||
let m = NormalizedMessage::from_text("don\u{2019}t", 2000);
|
||||
assert!(m.tokens.contains(&"don't".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncates_long_text_with_head_tail() {
|
||||
let long = "a".repeat(3000);
|
||||
let m = NormalizedMessage::from_text(&long, 2000);
|
||||
// raw should be ~ 2000 chars (head + space + tail)
|
||||
assert!(m.raw.chars().count() <= 2001);
|
||||
assert!(m.raw.starts_with("aa"));
|
||||
assert!(m.raw.ends_with("aa"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn contains_phrase_matches_consecutive_tokens() {
|
||||
let m = NormalizedMessage::from_text("I think this is great work", 2000);
|
||||
assert!(m.contains_phrase("this is great"));
|
||||
assert!(!m.contains_phrase("great this"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn matches_pattern_via_exact_phrase() {
|
||||
let m = NormalizedMessage::from_text("No, I meant the second one", 2000);
|
||||
let p = NormalizedPattern::from_text("no i meant");
|
||||
assert!(m.matches_normalized_pattern(&p, 0.65, 0.6));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn matches_pattern_via_char_ngram_fuzziness() {
|
||||
// Typo in "meant" -> "ment" so layer 0 (exact phrase) cannot match,
|
||||
// forcing the matcher to fall back to layer 1 (char n-gram Jaccard).
|
||||
let m = NormalizedMessage::from_text("No I ment", 2000);
|
||||
let p = NormalizedPattern::from_text("no i meant");
|
||||
assert!(m.matches_normalized_pattern(&p, 0.4, 0.6));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaccard_identical_sets_is_one() {
|
||||
let a: HashSet<String> = ["abc", "bcd"].iter().map(|s| s.to_string()).collect();
|
||||
assert!((jaccard(&a, &a) - 1.0).abs() < 1e-6);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cosine_freq_orthogonal_is_zero() {
|
||||
let mut a: HashMap<String, usize> = HashMap::new();
|
||||
a.insert("hello".to_string(), 1);
|
||||
let mut b: HashMap<String, usize> = HashMap::new();
|
||||
b.insert("world".to_string(), 1);
|
||||
assert_eq!(cosine_freq(&a, &b), 0.0);
|
||||
}
|
||||
}
|
||||
|
|
@ -16,10 +16,134 @@ use tracing_opentelemetry::OpenTelemetrySpanExt;
|
|||
use crate::handlers::agents::pipeline::{PipelineError, PipelineProcessor};
|
||||
|
||||
const STREAM_BUFFER_SIZE: usize = 16;
|
||||
use crate::signals::{InteractionQuality, SignalAnalyzer, TextBasedSignalAnalyzer, FLAG_MARKER};
|
||||
use crate::tracing::{llm, set_service_name, signals as signal_constants};
|
||||
/// Cap on accumulated response bytes kept for usage extraction.
|
||||
/// Most chat responses are well under this; pathological ones are dropped without
|
||||
/// affecting pass-through streaming to the client.
|
||||
const USAGE_BUFFER_MAX: usize = 2 * 1024 * 1024;
|
||||
use crate::metrics as bs_metrics;
|
||||
use crate::metrics::labels as metric_labels;
|
||||
use crate::signals::otel::emit_signals_to_span;
|
||||
use crate::signals::{SignalAnalyzer, FLAG_MARKER};
|
||||
use crate::tracing::{llm, set_service_name};
|
||||
use hermesllm::apis::openai::Message;
|
||||
|
||||
/// Parsed usage + resolved-model details from a provider response.
|
||||
#[derive(Debug, Default, Clone)]
|
||||
struct ExtractedUsage {
|
||||
prompt_tokens: Option<i64>,
|
||||
completion_tokens: Option<i64>,
|
||||
total_tokens: Option<i64>,
|
||||
cached_input_tokens: Option<i64>,
|
||||
cache_creation_tokens: Option<i64>,
|
||||
reasoning_tokens: Option<i64>,
|
||||
/// The model the upstream actually used. For router aliases (e.g.
|
||||
/// `router:software-engineering`), this differs from the request model.
|
||||
resolved_model: Option<String>,
|
||||
}
|
||||
|
||||
impl ExtractedUsage {
|
||||
fn is_empty(&self) -> bool {
|
||||
self.prompt_tokens.is_none()
|
||||
&& self.completion_tokens.is_none()
|
||||
&& self.total_tokens.is_none()
|
||||
&& self.resolved_model.is_none()
|
||||
}
|
||||
|
||||
fn from_json(value: &serde_json::Value) -> Self {
|
||||
let mut out = Self::default();
|
||||
if let Some(model) = value.get("model").and_then(|v| v.as_str()) {
|
||||
if !model.is_empty() {
|
||||
out.resolved_model = Some(model.to_string());
|
||||
}
|
||||
}
|
||||
if let Some(u) = value.get("usage") {
|
||||
// OpenAI-shape usage
|
||||
out.prompt_tokens = u.get("prompt_tokens").and_then(|v| v.as_i64());
|
||||
out.completion_tokens = u.get("completion_tokens").and_then(|v| v.as_i64());
|
||||
out.total_tokens = u.get("total_tokens").and_then(|v| v.as_i64());
|
||||
out.cached_input_tokens = u
|
||||
.get("prompt_tokens_details")
|
||||
.and_then(|d| d.get("cached_tokens"))
|
||||
.and_then(|v| v.as_i64());
|
||||
out.reasoning_tokens = u
|
||||
.get("completion_tokens_details")
|
||||
.and_then(|d| d.get("reasoning_tokens"))
|
||||
.and_then(|v| v.as_i64());
|
||||
|
||||
// Anthropic-shape fallbacks
|
||||
if out.prompt_tokens.is_none() {
|
||||
out.prompt_tokens = u.get("input_tokens").and_then(|v| v.as_i64());
|
||||
}
|
||||
if out.completion_tokens.is_none() {
|
||||
out.completion_tokens = u.get("output_tokens").and_then(|v| v.as_i64());
|
||||
}
|
||||
if out.total_tokens.is_none() {
|
||||
if let (Some(p), Some(c)) = (out.prompt_tokens, out.completion_tokens) {
|
||||
out.total_tokens = Some(p + c);
|
||||
}
|
||||
}
|
||||
if out.cached_input_tokens.is_none() {
|
||||
out.cached_input_tokens = u.get("cache_read_input_tokens").and_then(|v| v.as_i64());
|
||||
}
|
||||
if out.cached_input_tokens.is_none() {
|
||||
out.cached_input_tokens =
|
||||
u.get("cached_content_token_count").and_then(|v| v.as_i64());
|
||||
}
|
||||
out.cache_creation_tokens = u
|
||||
.get("cache_creation_input_tokens")
|
||||
.and_then(|v| v.as_i64());
|
||||
if out.reasoning_tokens.is_none() {
|
||||
out.reasoning_tokens = u.get("thoughts_token_count").and_then(|v| v.as_i64());
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
}
|
||||
|
||||
/// Try to pull usage out of an accumulated response body.
|
||||
/// Handles both a single JSON object (non-streaming) and SSE streams where the
|
||||
/// final `data: {...}` event carries the `usage` field.
|
||||
fn extract_usage_from_bytes(buf: &[u8]) -> ExtractedUsage {
|
||||
if buf.is_empty() {
|
||||
return ExtractedUsage::default();
|
||||
}
|
||||
|
||||
// Fast path: full-body JSON (non-streaming).
|
||||
if let Ok(value) = serde_json::from_slice::<serde_json::Value>(buf) {
|
||||
let u = ExtractedUsage::from_json(&value);
|
||||
if !u.is_empty() {
|
||||
return u;
|
||||
}
|
||||
}
|
||||
|
||||
// SSE path: scan from the end for a `data:` line containing a usage object.
|
||||
let text = match std::str::from_utf8(buf) {
|
||||
Ok(t) => t,
|
||||
Err(_) => return ExtractedUsage::default(),
|
||||
};
|
||||
for line in text.lines().rev() {
|
||||
let trimmed = line.trim_start();
|
||||
let payload = match trimmed.strip_prefix("data:") {
|
||||
Some(p) => p.trim_start(),
|
||||
None => continue,
|
||||
};
|
||||
if payload == "[DONE]" || payload.is_empty() {
|
||||
continue;
|
||||
}
|
||||
if !payload.contains("\"usage\"") {
|
||||
continue;
|
||||
}
|
||||
if let Ok(value) = serde_json::from_str::<serde_json::Value>(payload) {
|
||||
let u = ExtractedUsage::from_json(&value);
|
||||
if !u.is_empty() {
|
||||
return u;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ExtractedUsage::default()
|
||||
}
|
||||
|
||||
/// Trait for processing streaming chunks
|
||||
/// Implementors can inject custom logic during streaming (e.g., hallucination detection, logging)
|
||||
pub trait StreamProcessor: Send + 'static {
|
||||
|
|
@ -51,6 +175,18 @@ impl StreamProcessor for Box<dyn StreamProcessor> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Optional Prometheus-metric context for an LLM upstream call. When present,
|
||||
/// [`ObservableStreamProcessor`] emits `brightstaff_llm_*` metrics at
|
||||
/// first-byte / complete / error callbacks.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct LlmMetricsCtx {
|
||||
pub provider: String,
|
||||
pub model: String,
|
||||
/// HTTP status of the upstream response. Used to pick `status_class` and
|
||||
/// `error_class` on `on_complete`.
|
||||
pub upstream_status: u16,
|
||||
}
|
||||
|
||||
/// A processor that tracks streaming metrics
|
||||
pub struct ObservableStreamProcessor {
|
||||
service_name: String,
|
||||
|
|
@ -60,6 +196,12 @@ pub struct ObservableStreamProcessor {
|
|||
start_time: Instant,
|
||||
time_to_first_token: Option<u128>,
|
||||
messages: Option<Vec<Message>>,
|
||||
/// Accumulated response bytes used only for best-effort usage extraction
|
||||
/// on `on_complete`. Capped at `USAGE_BUFFER_MAX`; excess chunks are dropped
|
||||
/// from the buffer (they still pass through to the client).
|
||||
response_buffer: Vec<u8>,
|
||||
llm_metrics: Option<LlmMetricsCtx>,
|
||||
metrics_recorded: bool,
|
||||
}
|
||||
|
||||
impl ObservableStreamProcessor {
|
||||
|
|
@ -93,21 +235,42 @@ impl ObservableStreamProcessor {
|
|||
start_time,
|
||||
time_to_first_token: None,
|
||||
messages,
|
||||
response_buffer: Vec::new(),
|
||||
llm_metrics: None,
|
||||
metrics_recorded: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Attach LLM upstream metric context so the processor emits
|
||||
/// `brightstaff_llm_*` metrics on first-byte / complete / error.
|
||||
pub fn with_llm_metrics(mut self, ctx: LlmMetricsCtx) -> Self {
|
||||
self.llm_metrics = Some(ctx);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl StreamProcessor for ObservableStreamProcessor {
|
||||
fn process_chunk(&mut self, chunk: Bytes) -> Result<Option<Bytes>, String> {
|
||||
self.total_bytes += chunk.len();
|
||||
self.chunk_count += 1;
|
||||
// Accumulate for best-effort usage extraction; drop further chunks once
|
||||
// the cap is reached so we don't retain huge response bodies in memory.
|
||||
if self.response_buffer.len() < USAGE_BUFFER_MAX {
|
||||
let remaining = USAGE_BUFFER_MAX - self.response_buffer.len();
|
||||
let take = chunk.len().min(remaining);
|
||||
self.response_buffer.extend_from_slice(&chunk[..take]);
|
||||
}
|
||||
Ok(Some(chunk))
|
||||
}
|
||||
|
||||
fn on_first_bytes(&mut self) {
|
||||
// Record time to first token (only for streaming)
|
||||
if self.time_to_first_token.is_none() {
|
||||
self.time_to_first_token = Some(self.start_time.elapsed().as_millis());
|
||||
let elapsed = self.start_time.elapsed();
|
||||
self.time_to_first_token = Some(elapsed.as_millis());
|
||||
if let Some(ref ctx) = self.llm_metrics {
|
||||
bs_metrics::record_llm_ttft(&ctx.provider, &ctx.model, elapsed);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -124,77 +287,98 @@ impl StreamProcessor for ObservableStreamProcessor {
|
|||
);
|
||||
}
|
||||
|
||||
// Analyze signals if messages are available and record as span attributes
|
||||
if let Some(ref messages) = self.messages {
|
||||
let analyzer: Box<dyn SignalAnalyzer> = Box::new(TextBasedSignalAnalyzer::new());
|
||||
let report = analyzer.analyze(messages);
|
||||
// Record total duration on the span for the observability console.
|
||||
let duration_ms = self.start_time.elapsed().as_millis() as i64;
|
||||
{
|
||||
let span = tracing::Span::current();
|
||||
let otel_context = span.context();
|
||||
let otel_span = otel_context.span();
|
||||
otel_span.set_attribute(KeyValue::new(llm::DURATION_MS, duration_ms));
|
||||
otel_span.set_attribute(KeyValue::new(llm::RESPONSE_BYTES, self.total_bytes as i64));
|
||||
}
|
||||
|
||||
// Best-effort usage extraction + emission (works for both streaming
|
||||
// SSE and non-streaming JSON responses that include a `usage` object).
|
||||
let usage = extract_usage_from_bytes(&self.response_buffer);
|
||||
if !usage.is_empty() {
|
||||
let span = tracing::Span::current();
|
||||
let otel_context = span.context();
|
||||
let otel_span = otel_context.span();
|
||||
if let Some(v) = usage.prompt_tokens {
|
||||
otel_span.set_attribute(KeyValue::new(llm::PROMPT_TOKENS, v));
|
||||
}
|
||||
if let Some(v) = usage.completion_tokens {
|
||||
otel_span.set_attribute(KeyValue::new(llm::COMPLETION_TOKENS, v));
|
||||
}
|
||||
if let Some(v) = usage.total_tokens {
|
||||
otel_span.set_attribute(KeyValue::new(llm::TOTAL_TOKENS, v));
|
||||
}
|
||||
if let Some(v) = usage.cached_input_tokens {
|
||||
otel_span.set_attribute(KeyValue::new(llm::CACHED_INPUT_TOKENS, v));
|
||||
}
|
||||
if let Some(v) = usage.cache_creation_tokens {
|
||||
otel_span.set_attribute(KeyValue::new(llm::CACHE_CREATION_TOKENS, v));
|
||||
}
|
||||
if let Some(v) = usage.reasoning_tokens {
|
||||
otel_span.set_attribute(KeyValue::new(llm::REASONING_TOKENS, v));
|
||||
}
|
||||
// Override `llm.model` with the model the upstream actually ran
|
||||
// (e.g. `openai-gpt-5.4` resolved from `router:software-engineering`).
|
||||
// Cost lookup keys off the real model, not the alias.
|
||||
if let Some(resolved) = usage.resolved_model.clone() {
|
||||
otel_span.set_attribute(KeyValue::new(llm::MODEL_NAME, resolved));
|
||||
}
|
||||
}
|
||||
|
||||
// Emit LLM upstream prometheus metrics (duration + tokens) if wired.
|
||||
// The upstream responded (we have a status), so status_class alone
|
||||
// carries the non-2xx signal — error_class stays "none".
|
||||
if let Some(ref ctx) = self.llm_metrics {
|
||||
bs_metrics::record_llm_upstream(
|
||||
&ctx.provider,
|
||||
&ctx.model,
|
||||
ctx.upstream_status,
|
||||
metric_labels::LLM_ERR_NONE,
|
||||
self.start_time.elapsed(),
|
||||
);
|
||||
if let Some(v) = usage.prompt_tokens {
|
||||
bs_metrics::record_llm_tokens(
|
||||
&ctx.provider,
|
||||
&ctx.model,
|
||||
metric_labels::TOKEN_KIND_PROMPT,
|
||||
v.max(0) as u64,
|
||||
);
|
||||
}
|
||||
if let Some(v) = usage.completion_tokens {
|
||||
bs_metrics::record_llm_tokens(
|
||||
&ctx.provider,
|
||||
&ctx.model,
|
||||
metric_labels::TOKEN_KIND_COMPLETION,
|
||||
v.max(0) as u64,
|
||||
);
|
||||
}
|
||||
if usage.prompt_tokens.is_none() && usage.completion_tokens.is_none() {
|
||||
bs_metrics::record_llm_tokens_usage_missing(&ctx.provider, &ctx.model);
|
||||
}
|
||||
self.metrics_recorded = true;
|
||||
}
|
||||
// Release the buffered bytes early; nothing downstream needs them.
|
||||
self.response_buffer.clear();
|
||||
self.response_buffer.shrink_to_fit();
|
||||
|
||||
// Analyze signals if messages are available and record as span
|
||||
// attributes + per-signal events. We dual-emit legacy aggregate keys
|
||||
// and the new layered taxonomy so existing dashboards keep working
|
||||
// while new consumers can opt into the richer hierarchy.
|
||||
if let Some(ref messages) = self.messages {
|
||||
let analyzer = SignalAnalyzer::default();
|
||||
let report = analyzer.analyze_openai(messages);
|
||||
|
||||
// Get the current OTel span to set signal attributes
|
||||
let span = tracing::Span::current();
|
||||
let otel_context = span.context();
|
||||
let otel_span = otel_context.span();
|
||||
|
||||
// Add overall quality
|
||||
otel_span.set_attribute(KeyValue::new(
|
||||
signal_constants::QUALITY,
|
||||
format!("{:?}", report.overall_quality),
|
||||
));
|
||||
|
||||
// Add repair/follow-up metrics if concerning
|
||||
if report.follow_up.is_concerning || report.follow_up.repair_count > 0 {
|
||||
otel_span.set_attribute(KeyValue::new(
|
||||
signal_constants::REPAIR_COUNT,
|
||||
report.follow_up.repair_count as i64,
|
||||
));
|
||||
otel_span.set_attribute(KeyValue::new(
|
||||
signal_constants::REPAIR_RATIO,
|
||||
format!("{:.3}", report.follow_up.repair_ratio),
|
||||
));
|
||||
}
|
||||
|
||||
// Add frustration metrics
|
||||
if report.frustration.has_frustration {
|
||||
otel_span.set_attribute(KeyValue::new(
|
||||
signal_constants::FRUSTRATION_COUNT,
|
||||
report.frustration.frustration_count as i64,
|
||||
));
|
||||
otel_span.set_attribute(KeyValue::new(
|
||||
signal_constants::FRUSTRATION_SEVERITY,
|
||||
report.frustration.severity as i64,
|
||||
));
|
||||
}
|
||||
|
||||
// Add repetition metrics
|
||||
if report.repetition.has_looping {
|
||||
otel_span.set_attribute(KeyValue::new(
|
||||
signal_constants::REPETITION_COUNT,
|
||||
report.repetition.repetition_count as i64,
|
||||
));
|
||||
}
|
||||
|
||||
// Add escalation metrics
|
||||
if report.escalation.escalation_requested {
|
||||
otel_span
|
||||
.set_attribute(KeyValue::new(signal_constants::ESCALATION_REQUESTED, true));
|
||||
}
|
||||
|
||||
// Add positive feedback metrics
|
||||
if report.positive_feedback.has_positive_feedback {
|
||||
otel_span.set_attribute(KeyValue::new(
|
||||
signal_constants::POSITIVE_FEEDBACK_COUNT,
|
||||
report.positive_feedback.positive_count as i64,
|
||||
));
|
||||
}
|
||||
|
||||
// Flag the span name if any concerning signal is detected
|
||||
let should_flag = report.frustration.has_frustration
|
||||
|| report.repetition.has_looping
|
||||
|| report.escalation.escalation_requested
|
||||
|| matches!(
|
||||
report.overall_quality,
|
||||
InteractionQuality::Poor | InteractionQuality::Severe
|
||||
);
|
||||
|
||||
let should_flag = emit_signals_to_span(&otel_span, &report);
|
||||
if should_flag {
|
||||
otel_span.update_name(format!("{} {}", self.operation_name, FLAG_MARKER));
|
||||
}
|
||||
|
|
@ -217,6 +401,18 @@ impl StreamProcessor for ObservableStreamProcessor {
|
|||
duration_ms = self.start_time.elapsed().as_millis(),
|
||||
"stream error"
|
||||
);
|
||||
if let Some(ref ctx) = self.llm_metrics {
|
||||
if !self.metrics_recorded {
|
||||
bs_metrics::record_llm_upstream(
|
||||
&ctx.provider,
|
||||
&ctx.model,
|
||||
ctx.upstream_status,
|
||||
metric_labels::LLM_ERR_STREAM,
|
||||
self.start_time.elapsed(),
|
||||
);
|
||||
self.metrics_recorded = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -404,3 +600,55 @@ pub fn truncate_message(message: &str, max_length: usize) -> String {
|
|||
message.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod usage_extraction_tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn non_streaming_openai_with_cached() {
|
||||
let body = br#"{"id":"x","model":"gpt-4o","choices":[],"usage":{"prompt_tokens":12,"completion_tokens":34,"total_tokens":46,"prompt_tokens_details":{"cached_tokens":5}}}"#;
|
||||
let u = extract_usage_from_bytes(body);
|
||||
assert_eq!(u.prompt_tokens, Some(12));
|
||||
assert_eq!(u.completion_tokens, Some(34));
|
||||
assert_eq!(u.total_tokens, Some(46));
|
||||
assert_eq!(u.cached_input_tokens, Some(5));
|
||||
assert_eq!(u.reasoning_tokens, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn non_streaming_anthropic_with_cache_creation() {
|
||||
let body = br#"{"id":"x","model":"claude","usage":{"input_tokens":100,"output_tokens":50,"cache_creation_input_tokens":20,"cache_read_input_tokens":30}}"#;
|
||||
let u = extract_usage_from_bytes(body);
|
||||
assert_eq!(u.prompt_tokens, Some(100));
|
||||
assert_eq!(u.completion_tokens, Some(50));
|
||||
assert_eq!(u.total_tokens, Some(150));
|
||||
assert_eq!(u.cached_input_tokens, Some(30));
|
||||
assert_eq!(u.cache_creation_tokens, Some(20));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn streaming_openai_final_chunk_has_usage() {
|
||||
let sse = b"data: {\"choices\":[{\"delta\":{\"content\":\"hi\"}}]}
|
||||
|
||||
data: {\"choices\":[{\"delta\":{}, \"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":7,\"completion_tokens\":3,\"total_tokens\":10}}
|
||||
|
||||
data: [DONE]
|
||||
|
||||
";
|
||||
let u = extract_usage_from_bytes(sse);
|
||||
assert_eq!(u.prompt_tokens, Some(7));
|
||||
assert_eq!(u.completion_tokens, Some(3));
|
||||
assert_eq!(u.total_tokens, Some(10));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_returns_default() {
|
||||
assert!(extract_usage_from_bytes(b"").is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_usage_in_body_returns_default() {
|
||||
assert!(extract_usage_from_bytes(br#"{"ok":true}"#).is_empty());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -80,6 +80,18 @@ pub mod llm {
|
|||
/// Total tokens used (prompt + completion)
|
||||
pub const TOTAL_TOKENS: &str = "llm.usage.total_tokens";
|
||||
|
||||
/// Tokens served from a prompt cache read
|
||||
/// (OpenAI `prompt_tokens_details.cached_tokens`, Anthropic `cache_read_input_tokens`,
|
||||
/// Google `cached_content_token_count`)
|
||||
pub const CACHED_INPUT_TOKENS: &str = "llm.usage.cached_input_tokens";
|
||||
|
||||
/// Tokens used to write a prompt cache entry (Anthropic `cache_creation_input_tokens`)
|
||||
pub const CACHE_CREATION_TOKENS: &str = "llm.usage.cache_creation_tokens";
|
||||
|
||||
/// Reasoning tokens for reasoning models
|
||||
/// (OpenAI `completion_tokens_details.reasoning_tokens`, Google `thoughts_token_count`)
|
||||
pub const REASONING_TOKENS: &str = "llm.usage.reasoning_tokens";
|
||||
|
||||
/// Temperature parameter used
|
||||
pub const TEMPERATURE: &str = "llm.temperature";
|
||||
|
||||
|
|
@ -119,6 +131,22 @@ pub mod routing {
|
|||
pub const SELECTION_REASON: &str = "routing.selection_reason";
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Span Attributes - Plano-specific
|
||||
// =============================================================================
|
||||
|
||||
/// Attributes specific to Plano (session affinity, routing decisions).
|
||||
pub mod plano {
|
||||
/// Session identifier propagated via the `x-model-affinity` header.
|
||||
/// Absent when the client did not send the header.
|
||||
pub const SESSION_ID: &str = "plano.session_id";
|
||||
|
||||
/// Matched route name from routing (e.g. "code", "summarization",
|
||||
/// "software-engineering"). Absent when the client routed directly
|
||||
/// to a concrete model.
|
||||
pub const ROUTE_NAME: &str = "plano.route.name";
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Span Attributes - Error Handling
|
||||
// =============================================================================
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ mod init;
|
|||
mod service_name_exporter;
|
||||
|
||||
pub use constants::{
|
||||
error, http, llm, operation_component, routing, signals, OperationNameBuilder,
|
||||
error, http, llm, operation_component, plano, routing, signals, OperationNameBuilder,
|
||||
};
|
||||
pub use custom_attributes::collect_custom_trace_attributes;
|
||||
pub use init::init_tracer;
|
||||
|
|
|
|||
|
|
@ -234,6 +234,7 @@ pub struct Overrides {
|
|||
pub llm_routing_model: Option<String>,
|
||||
pub agent_orchestration_model: Option<String>,
|
||||
pub orchestrator_model_context_length: Option<usize>,
|
||||
pub disable_signals: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||
|
|
@ -395,6 +396,8 @@ pub enum LlmProviderType {
|
|||
Vercel,
|
||||
#[serde(rename = "openrouter")]
|
||||
OpenRouter,
|
||||
#[serde(rename = "digitalocean")]
|
||||
DigitalOcean,
|
||||
}
|
||||
|
||||
impl Display for LlmProviderType {
|
||||
|
|
@ -418,6 +421,7 @@ impl Display for LlmProviderType {
|
|||
LlmProviderType::Plano => write!(f, "plano"),
|
||||
LlmProviderType::Vercel => write!(f, "vercel"),
|
||||
LlmProviderType::OpenRouter => write!(f, "openrouter"),
|
||||
LlmProviderType::DigitalOcean => write!(f, "digitalocean"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -753,4 +757,29 @@ mod test {
|
|||
assert!(model_ids.contains(&"openai-gpt4".to_string()));
|
||||
assert!(!model_ids.contains(&"plano-orchestrator".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_overrides_disable_signals_default_none() {
|
||||
let overrides = super::Overrides::default();
|
||||
assert_eq!(overrides.disable_signals, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_overrides_disable_signals_deserialize() {
|
||||
let yaml = r#"
|
||||
disable_signals: true
|
||||
"#;
|
||||
let overrides: super::Overrides = serde_yaml::from_str(yaml).unwrap();
|
||||
assert_eq!(overrides.disable_signals, Some(true));
|
||||
|
||||
let yaml_false = r#"
|
||||
disable_signals: false
|
||||
"#;
|
||||
let overrides: super::Overrides = serde_yaml::from_str(yaml_false).unwrap();
|
||||
assert_eq!(overrides.disable_signals, Some(false));
|
||||
|
||||
let yaml_missing = "{}";
|
||||
let overrides: super::Overrides = serde_yaml::from_str(yaml_missing).unwrap();
|
||||
assert_eq!(overrides.disable_signals, None);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -435,6 +435,12 @@ impl TokenUsage for MessagesResponse {
|
|||
fn total_tokens(&self) -> usize {
|
||||
(self.usage.input_tokens + self.usage.output_tokens) as usize
|
||||
}
|
||||
fn cached_input_tokens(&self) -> Option<usize> {
|
||||
self.usage.cache_read_input_tokens.map(|t| t as usize)
|
||||
}
|
||||
fn cache_creation_tokens(&self) -> Option<usize> {
|
||||
self.usage.cache_creation_input_tokens.map(|t| t as usize)
|
||||
}
|
||||
}
|
||||
|
||||
impl ProviderResponse for MessagesResponse {
|
||||
|
|
|
|||
|
|
@ -596,6 +596,18 @@ impl TokenUsage for Usage {
|
|||
fn total_tokens(&self) -> usize {
|
||||
self.total_tokens as usize
|
||||
}
|
||||
|
||||
fn cached_input_tokens(&self) -> Option<usize> {
|
||||
self.prompt_tokens_details
|
||||
.as_ref()
|
||||
.and_then(|d| d.cached_tokens.map(|t| t as usize))
|
||||
}
|
||||
|
||||
fn reasoning_tokens(&self) -> Option<usize> {
|
||||
self.completion_tokens_details
|
||||
.as_ref()
|
||||
.and_then(|d| d.reasoning_tokens.map(|t| t as usize))
|
||||
}
|
||||
}
|
||||
|
||||
/// Implementation of ProviderRequest for ChatCompletionsRequest
|
||||
|
|
|
|||
|
|
@ -710,6 +710,18 @@ impl crate::providers::response::TokenUsage for ResponseUsage {
|
|||
fn total_tokens(&self) -> usize {
|
||||
self.total_tokens as usize
|
||||
}
|
||||
|
||||
fn cached_input_tokens(&self) -> Option<usize> {
|
||||
self.input_tokens_details
|
||||
.as_ref()
|
||||
.map(|d| d.cached_tokens.max(0) as usize)
|
||||
}
|
||||
|
||||
fn reasoning_tokens(&self) -> Option<usize> {
|
||||
self.output_tokens_details
|
||||
.as_ref()
|
||||
.map(|d| d.reasoning_tokens.max(0) as usize)
|
||||
}
|
||||
}
|
||||
|
||||
/// Token details
|
||||
|
|
|
|||
|
|
@ -1,6 +1,9 @@
|
|||
use crate::apis::anthropic::MessagesStreamEvent;
|
||||
use crate::apis::anthropic::{
|
||||
MessagesMessageDelta, MessagesStopReason, MessagesStreamEvent, MessagesUsage,
|
||||
};
|
||||
use crate::apis::streaming_shapes::sse::{SseEvent, SseStreamBufferTrait};
|
||||
use crate::providers::streaming_response::ProviderStreamResponseType;
|
||||
use log::warn;
|
||||
use std::collections::HashSet;
|
||||
|
||||
/// SSE Stream Buffer for Anthropic Messages API streaming.
|
||||
|
|
@ -11,13 +14,24 @@ use std::collections::HashSet;
|
|||
///
|
||||
/// When converting from OpenAI to Anthropic format, this buffer injects the required
|
||||
/// ContentBlockStart and ContentBlockStop events to maintain proper Anthropic protocol.
|
||||
///
|
||||
/// Guarantees (Anthropic Messages API contract):
|
||||
/// 1. `message_stop` is never emitted unless a matching `message_start` was emitted first.
|
||||
/// 2. `message_stop` is emitted at most once per stream (no double-close).
|
||||
/// 3. If upstream terminates with no content (empty/filtered/errored response), a
|
||||
/// minimal but well-formed envelope is synthesized so the client's state machine
|
||||
/// stays consistent.
|
||||
pub struct AnthropicMessagesStreamBuffer {
|
||||
/// Buffered SSE events ready to be written to wire
|
||||
buffered_events: Vec<SseEvent>,
|
||||
|
||||
/// Track if we've seen a message_start event
|
||||
/// Track if we've emitted a message_start event
|
||||
message_started: bool,
|
||||
|
||||
/// Track if we've emitted a terminal message_stop event (for idempotency /
|
||||
/// double-close protection).
|
||||
message_stopped: bool,
|
||||
|
||||
/// Track content block indices that have received ContentBlockStart events
|
||||
content_block_start_indices: HashSet<i32>,
|
||||
|
||||
|
|
@ -42,6 +56,7 @@ impl AnthropicMessagesStreamBuffer {
|
|||
Self {
|
||||
buffered_events: Vec::new(),
|
||||
message_started: false,
|
||||
message_stopped: false,
|
||||
content_block_start_indices: HashSet::new(),
|
||||
needs_content_block_stop: false,
|
||||
seen_message_delta: false,
|
||||
|
|
@ -49,6 +64,66 @@ impl AnthropicMessagesStreamBuffer {
|
|||
}
|
||||
}
|
||||
|
||||
/// Inject a `message_start` event into the buffer if one hasn't been emitted yet.
|
||||
/// This is the single source of truth for opening a message — every handler
|
||||
/// that can legitimately be the first event on the wire must call this before
|
||||
/// pushing its own event.
|
||||
fn ensure_message_started(&mut self) {
|
||||
if self.message_started {
|
||||
return;
|
||||
}
|
||||
let model = self.model.as_deref().unwrap_or("unknown");
|
||||
let message_start = AnthropicMessagesStreamBuffer::create_message_start_event(model);
|
||||
self.buffered_events.push(message_start);
|
||||
self.message_started = true;
|
||||
}
|
||||
|
||||
/// Inject a synthetic `message_delta` with `end_turn` / zero usage.
|
||||
/// Used when we must close a message but upstream never produced a terminal
|
||||
/// event (e.g. `[DONE]` arrives with no prior `finish_reason`).
|
||||
fn push_synthetic_message_delta(&mut self) {
|
||||
let event = MessagesStreamEvent::MessageDelta {
|
||||
delta: MessagesMessageDelta {
|
||||
stop_reason: MessagesStopReason::EndTurn,
|
||||
stop_sequence: None,
|
||||
},
|
||||
usage: MessagesUsage {
|
||||
input_tokens: 0,
|
||||
output_tokens: 0,
|
||||
cache_creation_input_tokens: None,
|
||||
cache_read_input_tokens: None,
|
||||
},
|
||||
};
|
||||
let sse_string: String = event.clone().into();
|
||||
self.buffered_events.push(SseEvent {
|
||||
data: None,
|
||||
event: Some("message_delta".to_string()),
|
||||
raw_line: sse_string.clone(),
|
||||
sse_transformed_lines: sse_string,
|
||||
provider_stream_response: Some(ProviderStreamResponseType::MessagesStreamEvent(event)),
|
||||
});
|
||||
self.seen_message_delta = true;
|
||||
}
|
||||
|
||||
/// Inject a `message_stop` event into the buffer, marking the stream as closed.
|
||||
/// Idempotent — subsequent calls are no-ops.
|
||||
fn push_message_stop(&mut self) {
|
||||
if self.message_stopped {
|
||||
return;
|
||||
}
|
||||
let message_stop = MessagesStreamEvent::MessageStop;
|
||||
let sse_string: String = message_stop.into();
|
||||
self.buffered_events.push(SseEvent {
|
||||
data: None,
|
||||
event: Some("message_stop".to_string()),
|
||||
raw_line: sse_string.clone(),
|
||||
sse_transformed_lines: sse_string,
|
||||
provider_stream_response: None,
|
||||
});
|
||||
self.message_stopped = true;
|
||||
self.seen_message_delta = false;
|
||||
}
|
||||
|
||||
/// Check if a content_block_start event has been sent for the given index
|
||||
fn has_content_block_start_been_sent(&self, index: i32) -> bool {
|
||||
self.content_block_start_indices.contains(&index)
|
||||
|
|
@ -149,6 +224,27 @@ impl SseStreamBufferTrait for AnthropicMessagesStreamBuffer {
|
|||
// We match on a reference first to determine the type, then move the event
|
||||
match &event.provider_stream_response {
|
||||
Some(ProviderStreamResponseType::MessagesStreamEvent(evt)) => {
|
||||
// If the message has already been closed, drop any trailing events
|
||||
// to avoid emitting data after `message_stop` (protocol violation).
|
||||
// This typically indicates a duplicate `[DONE]` from upstream or a
|
||||
// replay of previously-buffered bytes — worth surfacing so we can
|
||||
// spot misbehaving providers.
|
||||
if self.message_stopped {
|
||||
warn!(
|
||||
"anthropic stream buffer: dropping event after message_stop (variant={})",
|
||||
match evt {
|
||||
MessagesStreamEvent::MessageStart { .. } => "message_start",
|
||||
MessagesStreamEvent::ContentBlockStart { .. } => "content_block_start",
|
||||
MessagesStreamEvent::ContentBlockDelta { .. } => "content_block_delta",
|
||||
MessagesStreamEvent::ContentBlockStop { .. } => "content_block_stop",
|
||||
MessagesStreamEvent::MessageDelta { .. } => "message_delta",
|
||||
MessagesStreamEvent::MessageStop => "message_stop",
|
||||
MessagesStreamEvent::Ping => "ping",
|
||||
}
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
match evt {
|
||||
MessagesStreamEvent::MessageStart { .. } => {
|
||||
// Add the message_start event
|
||||
|
|
@ -157,14 +253,7 @@ impl SseStreamBufferTrait for AnthropicMessagesStreamBuffer {
|
|||
}
|
||||
MessagesStreamEvent::ContentBlockStart { index, .. } => {
|
||||
let index = *index as i32;
|
||||
// Inject message_start if needed
|
||||
if !self.message_started {
|
||||
let model = self.model.as_deref().unwrap_or("unknown");
|
||||
let message_start =
|
||||
AnthropicMessagesStreamBuffer::create_message_start_event(model);
|
||||
self.buffered_events.push(message_start);
|
||||
self.message_started = true;
|
||||
}
|
||||
self.ensure_message_started();
|
||||
|
||||
// Add the content_block_start event (from tool calls or other sources)
|
||||
self.buffered_events.push(event);
|
||||
|
|
@ -173,14 +262,7 @@ impl SseStreamBufferTrait for AnthropicMessagesStreamBuffer {
|
|||
}
|
||||
MessagesStreamEvent::ContentBlockDelta { index, .. } => {
|
||||
let index = *index as i32;
|
||||
// Inject message_start if needed
|
||||
if !self.message_started {
|
||||
let model = self.model.as_deref().unwrap_or("unknown");
|
||||
let message_start =
|
||||
AnthropicMessagesStreamBuffer::create_message_start_event(model);
|
||||
self.buffered_events.push(message_start);
|
||||
self.message_started = true;
|
||||
}
|
||||
self.ensure_message_started();
|
||||
|
||||
// Check if ContentBlockStart was sent for this index
|
||||
if !self.has_content_block_start_been_sent(index) {
|
||||
|
|
@ -196,6 +278,11 @@ impl SseStreamBufferTrait for AnthropicMessagesStreamBuffer {
|
|||
self.buffered_events.push(event);
|
||||
}
|
||||
MessagesStreamEvent::MessageDelta { usage, .. } => {
|
||||
// `message_delta` is only meaningful inside an open message.
|
||||
// Upstream can send it with no prior content (empty completion,
|
||||
// content filter, etc.), so we must open a message first.
|
||||
self.ensure_message_started();
|
||||
|
||||
// Inject ContentBlockStop before message_delta
|
||||
if self.needs_content_block_stop {
|
||||
let content_block_stop =
|
||||
|
|
@ -230,15 +317,52 @@ impl SseStreamBufferTrait for AnthropicMessagesStreamBuffer {
|
|||
}
|
||||
MessagesStreamEvent::ContentBlockStop { .. } => {
|
||||
// ContentBlockStop received from upstream (e.g., Bedrock)
|
||||
self.ensure_message_started();
|
||||
// Clear the flag so we don't inject another one
|
||||
self.needs_content_block_stop = false;
|
||||
self.buffered_events.push(event);
|
||||
}
|
||||
MessagesStreamEvent::MessageStop => {
|
||||
// MessageStop received from upstream (e.g., OpenAI via [DONE])
|
||||
// Clear the flag so we don't inject another one
|
||||
self.seen_message_delta = false;
|
||||
// MessageStop received from upstream (e.g., OpenAI via [DONE]).
|
||||
//
|
||||
// The Anthropic protocol requires the full envelope
|
||||
// message_start → [content blocks] → message_delta → message_stop
|
||||
// so we must not emit a bare `message_stop`. Synthesize whatever
|
||||
// is missing to keep the client's state machine consistent.
|
||||
self.ensure_message_started();
|
||||
|
||||
if self.needs_content_block_stop {
|
||||
let content_block_stop =
|
||||
AnthropicMessagesStreamBuffer::create_content_block_stop_event();
|
||||
self.buffered_events.push(content_block_stop);
|
||||
self.needs_content_block_stop = false;
|
||||
}
|
||||
|
||||
// If no message_delta has been emitted yet (empty/filtered upstream
|
||||
// response), synthesize a minimal one carrying `end_turn`.
|
||||
if !self.seen_message_delta {
|
||||
// If we also never opened a content block, open and close one
|
||||
// so clients that expect at least one block are happy.
|
||||
if self.content_block_start_indices.is_empty() {
|
||||
let content_block_start =
|
||||
AnthropicMessagesStreamBuffer::create_content_block_start_event(
|
||||
);
|
||||
self.buffered_events.push(content_block_start);
|
||||
self.set_content_block_start_sent(0);
|
||||
let content_block_stop =
|
||||
AnthropicMessagesStreamBuffer::create_content_block_stop_event(
|
||||
);
|
||||
self.buffered_events.push(content_block_stop);
|
||||
}
|
||||
self.push_synthetic_message_delta();
|
||||
}
|
||||
|
||||
// Push the upstream-provided message_stop and mark closed.
|
||||
// `push_message_stop` is idempotent but we want to reuse the
|
||||
// original SseEvent so raw passthrough semantics are preserved.
|
||||
self.buffered_events.push(event);
|
||||
self.message_stopped = true;
|
||||
self.seen_message_delta = false;
|
||||
}
|
||||
_ => {
|
||||
// Other Anthropic event types (Ping, etc.), just accumulate
|
||||
|
|
@ -254,24 +378,23 @@ impl SseStreamBufferTrait for AnthropicMessagesStreamBuffer {
|
|||
}
|
||||
|
||||
fn to_bytes(&mut self) -> Vec<u8> {
|
||||
// Convert all accumulated events to bytes and clear buffer
|
||||
// Convert all accumulated events to bytes and clear buffer.
|
||||
//
|
||||
// NOTE: We do NOT inject ContentBlockStop here because it's injected when we see MessageDelta
|
||||
// or MessageStop. Injecting it here causes premature ContentBlockStop in the middle of streaming.
|
||||
|
||||
// Inject MessageStop after MessageDelta if we've seen one
|
||||
// This completes the Anthropic Messages API event sequence
|
||||
if self.seen_message_delta {
|
||||
let message_stop = MessagesStreamEvent::MessageStop;
|
||||
let sse_string: String = message_stop.into();
|
||||
let message_stop_event = SseEvent {
|
||||
data: None,
|
||||
event: Some("message_stop".to_string()),
|
||||
raw_line: sse_string.clone(),
|
||||
sse_transformed_lines: sse_string,
|
||||
provider_stream_response: None,
|
||||
};
|
||||
self.buffered_events.push(message_stop_event);
|
||||
self.seen_message_delta = false;
|
||||
//
|
||||
// Inject a synthetic `message_stop` only when:
|
||||
// 1. A `message_delta` has been seen (otherwise we'd violate the Anthropic
|
||||
// protocol by emitting `message_stop` without a preceding `message_delta`), AND
|
||||
// 2. We haven't already emitted `message_stop` (either synthetic from a
|
||||
// previous flush, or real from an upstream `[DONE]`).
|
||||
//
|
||||
// Without the `!message_stopped` guard, a stream whose `finish_reason` chunk
|
||||
// and `[DONE]` marker land in separate HTTP body chunks would receive two
|
||||
// `message_stop` events, triggering Claude Code's "Received message_stop
|
||||
// without a current message" error.
|
||||
if self.seen_message_delta && !self.message_stopped {
|
||||
self.push_message_stop();
|
||||
}
|
||||
|
||||
let mut buffer = Vec::new();
|
||||
|
|
@ -615,4 +738,133 @@ data: [DONE]"#;
|
|||
println!("✓ Stop reason: tool_use");
|
||||
println!("✓ Proper Anthropic tool_use protocol\n");
|
||||
}
|
||||
|
||||
/// Regression test for:
|
||||
/// Claude Code CLI error: "Received message_stop without a current message"
|
||||
///
|
||||
/// Reproduces the *double-close* scenario: OpenAI's final `finish_reason`
|
||||
/// chunk and the `[DONE]` marker arrive in **separate** HTTP body chunks, so
|
||||
/// `to_bytes()` is called between them. Before the fix, this produced two
|
||||
/// `message_stop` events on the wire (one synthetic, one from `[DONE]`).
|
||||
#[test]
|
||||
fn test_openai_to_anthropic_emits_single_message_stop_across_chunk_boundary() {
|
||||
let client_api = SupportedAPIsFromClient::AnthropicMessagesAPI(AnthropicApi::Messages);
|
||||
let upstream_api = SupportedUpstreamAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions);
|
||||
let mut buffer = AnthropicMessagesStreamBuffer::new();
|
||||
|
||||
// --- HTTP chunk 1: content + finish_reason (no [DONE] yet) -----------
|
||||
let chunk_1 = r#"data: {"id":"c1","object":"chat.completion.chunk","created":1,"model":"gpt-4o","choices":[{"index":0,"delta":{"role":"assistant","content":"Hi"},"finish_reason":null}]}
|
||||
|
||||
data: {"id":"c1","object":"chat.completion.chunk","created":1,"model":"gpt-4o","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}"#;
|
||||
|
||||
for raw in SseStreamIter::try_from(chunk_1.as_bytes()).unwrap() {
|
||||
let e = SseEvent::try_from((raw, &client_api, &upstream_api)).unwrap();
|
||||
buffer.add_transformed_event(e);
|
||||
}
|
||||
let out_1 = String::from_utf8(buffer.to_bytes()).unwrap();
|
||||
|
||||
// --- HTTP chunk 2: just the [DONE] marker ----------------------------
|
||||
let chunk_2 = "data: [DONE]";
|
||||
for raw in SseStreamIter::try_from(chunk_2.as_bytes()).unwrap() {
|
||||
let e = SseEvent::try_from((raw, &client_api, &upstream_api)).unwrap();
|
||||
buffer.add_transformed_event(e);
|
||||
}
|
||||
let out_2 = String::from_utf8(buffer.to_bytes()).unwrap();
|
||||
|
||||
let combined = format!("{}{}", out_1, out_2);
|
||||
let start_count = combined.matches("event: message_start").count();
|
||||
let stop_count = combined.matches("event: message_stop").count();
|
||||
|
||||
assert_eq!(
|
||||
start_count, 1,
|
||||
"Must emit exactly one message_start across chunks, got {start_count}. Output:\n{combined}"
|
||||
);
|
||||
assert_eq!(
|
||||
stop_count, 1,
|
||||
"Must emit exactly one message_stop across chunks (no double-close), got {stop_count}. Output:\n{combined}"
|
||||
);
|
||||
// Every message_stop must be preceded by a message_start earlier in the stream.
|
||||
let start_pos = combined.find("event: message_start").unwrap();
|
||||
let stop_pos = combined.find("event: message_stop").unwrap();
|
||||
assert!(
|
||||
start_pos < stop_pos,
|
||||
"message_start must come before message_stop. Output:\n{combined}"
|
||||
);
|
||||
}
|
||||
|
||||
/// Regression test for:
|
||||
/// "Received message_stop without a current message" on empty upstream responses.
|
||||
///
|
||||
/// OpenAI returns only `[DONE]` with no content deltas and no `finish_reason`
|
||||
/// (this happens with content filters, truncated upstream streams, and some
|
||||
/// 5xx recoveries). Before the fix, the buffer emitted a bare `message_stop`
|
||||
/// with no preceding `message_start`. After the fix, it synthesizes a
|
||||
/// minimal but well-formed envelope.
|
||||
#[test]
|
||||
fn test_openai_done_only_stream_synthesizes_valid_envelope() {
|
||||
let client_api = SupportedAPIsFromClient::AnthropicMessagesAPI(AnthropicApi::Messages);
|
||||
let upstream_api = SupportedUpstreamAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions);
|
||||
let mut buffer = AnthropicMessagesStreamBuffer::new();
|
||||
|
||||
let raw_input = "data: [DONE]";
|
||||
for raw in SseStreamIter::try_from(raw_input.as_bytes()).unwrap() {
|
||||
let e = SseEvent::try_from((raw, &client_api, &upstream_api)).unwrap();
|
||||
buffer.add_transformed_event(e);
|
||||
}
|
||||
let out = String::from_utf8(buffer.to_bytes()).unwrap();
|
||||
|
||||
assert!(
|
||||
out.contains("event: message_start"),
|
||||
"Empty upstream must still produce message_start. Output:\n{out}"
|
||||
);
|
||||
assert!(
|
||||
out.contains("event: message_delta"),
|
||||
"Empty upstream must produce a synthesized message_delta. Output:\n{out}"
|
||||
);
|
||||
assert_eq!(
|
||||
out.matches("event: message_stop").count(),
|
||||
1,
|
||||
"Empty upstream must produce exactly one message_stop. Output:\n{out}"
|
||||
);
|
||||
|
||||
// Protocol ordering: start < delta < stop.
|
||||
let p_start = out.find("event: message_start").unwrap();
|
||||
let p_delta = out.find("event: message_delta").unwrap();
|
||||
let p_stop = out.find("event: message_stop").unwrap();
|
||||
assert!(
|
||||
p_start < p_delta && p_delta < p_stop,
|
||||
"Bad ordering. Output:\n{out}"
|
||||
);
|
||||
}
|
||||
|
||||
/// Regression test: events arriving after `message_stop` (e.g. a stray `[DONE]`
|
||||
/// echo, or late-arriving deltas from a racing upstream) must be dropped
|
||||
/// rather than written after the terminal frame.
|
||||
#[test]
|
||||
fn test_events_after_message_stop_are_dropped() {
|
||||
let client_api = SupportedAPIsFromClient::AnthropicMessagesAPI(AnthropicApi::Messages);
|
||||
let upstream_api = SupportedUpstreamAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions);
|
||||
let mut buffer = AnthropicMessagesStreamBuffer::new();
|
||||
|
||||
let first = r#"data: {"id":"c1","object":"chat.completion.chunk","created":1,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"ok"},"finish_reason":"stop"}]}
|
||||
|
||||
data: [DONE]"#;
|
||||
for raw in SseStreamIter::try_from(first.as_bytes()).unwrap() {
|
||||
let e = SseEvent::try_from((raw, &client_api, &upstream_api)).unwrap();
|
||||
buffer.add_transformed_event(e);
|
||||
}
|
||||
let _ = buffer.to_bytes();
|
||||
|
||||
// Simulate a duplicate / late `[DONE]` after the stream was already closed.
|
||||
let late = "data: [DONE]";
|
||||
for raw in SseStreamIter::try_from(late.as_bytes()).unwrap() {
|
||||
let e = SseEvent::try_from((raw, &client_api, &upstream_api)).unwrap();
|
||||
buffer.add_transformed_event(e);
|
||||
}
|
||||
let tail = String::from_utf8(buffer.to_bytes()).unwrap();
|
||||
assert!(
|
||||
tail.is_empty(),
|
||||
"No bytes should be emitted after message_stop, got: {tail:?}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -95,6 +95,7 @@ providers:
|
|||
anthropic:
|
||||
- anthropic/claude-sonnet-4-6
|
||||
- anthropic/claude-opus-4-6
|
||||
- anthropic/claude-opus-4-7
|
||||
- anthropic/claude-opus-4-5-20251101
|
||||
- anthropic/claude-opus-4-5
|
||||
- anthropic/claude-haiku-4-5-20251001
|
||||
|
|
@ -328,7 +329,53 @@ providers:
|
|||
- xiaomi/mimo-v2-flash
|
||||
- xiaomi/mimo-v2-omni
|
||||
- xiaomi/mimo-v2-pro
|
||||
digitalocean:
|
||||
- digitalocean/openai-gpt-4.1
|
||||
- digitalocean/openai-gpt-4o
|
||||
- digitalocean/openai-gpt-4o-mini
|
||||
- digitalocean/openai-gpt-5
|
||||
- digitalocean/openai-gpt-5-mini
|
||||
- digitalocean/openai-gpt-5-nano
|
||||
- digitalocean/openai-gpt-5.1-codex-max
|
||||
- digitalocean/openai-gpt-5.2
|
||||
- digitalocean/openai-gpt-5.2-pro
|
||||
- digitalocean/openai-gpt-5.3-codex
|
||||
- digitalocean/openai-gpt-5.4
|
||||
- digitalocean/openai-gpt-5.4-mini
|
||||
- digitalocean/openai-gpt-5.4-nano
|
||||
- digitalocean/openai-gpt-5.4-pro
|
||||
- digitalocean/openai-gpt-oss-120b
|
||||
- digitalocean/openai-gpt-oss-20b
|
||||
- digitalocean/openai-o1
|
||||
- digitalocean/openai-o3
|
||||
- digitalocean/openai-o3-mini
|
||||
- digitalocean/anthropic-claude-4.1-opus
|
||||
- digitalocean/anthropic-claude-4.5-sonnet
|
||||
- digitalocean/anthropic-claude-4.6-sonnet
|
||||
- digitalocean/anthropic-claude-haiku-4.5
|
||||
- digitalocean/anthropic-claude-opus-4
|
||||
- digitalocean/anthropic-claude-opus-4.5
|
||||
- digitalocean/anthropic-claude-opus-4.6
|
||||
- digitalocean/anthropic-claude-opus-4.7
|
||||
- digitalocean/anthropic-claude-sonnet-4
|
||||
- digitalocean/alibaba-qwen3-32b
|
||||
- digitalocean/arcee-trinity-large-thinking
|
||||
- digitalocean/deepseek-3.2
|
||||
- digitalocean/deepseek-r1-distill-llama-70b
|
||||
- digitalocean/gemma-4-31B-it
|
||||
- digitalocean/glm-5
|
||||
- digitalocean/kimi-k2.5
|
||||
- digitalocean/llama3.3-70b-instruct
|
||||
- digitalocean/minimax-m2.5
|
||||
- digitalocean/nvidia-nemotron-3-super-120b
|
||||
- digitalocean/qwen3-coder-flash
|
||||
- digitalocean/qwen3.5-397b-a17b
|
||||
- digitalocean/all-mini-lm-l6-v2
|
||||
- digitalocean/gte-large-en-v1.5
|
||||
- digitalocean/multi-qa-mpnet-base-dot-v1
|
||||
- digitalocean/qwen3-embedding-0.6b
|
||||
- digitalocean/router:software-engineering
|
||||
metadata:
|
||||
total_providers: 11
|
||||
total_models: 316
|
||||
last_updated: 2026-04-03T23:14:46.956158+00:00
|
||||
total_providers: 12
|
||||
total_models: 361
|
||||
last_updated: 2026-04-16T00:00:00.000000+00:00
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@ pub enum ProviderId {
|
|||
AmazonBedrock,
|
||||
Vercel,
|
||||
OpenRouter,
|
||||
DigitalOcean,
|
||||
}
|
||||
|
||||
impl TryFrom<&str> for ProviderId {
|
||||
|
|
@ -75,6 +76,9 @@ impl TryFrom<&str> for ProviderId {
|
|||
"amazon" => Ok(ProviderId::AmazonBedrock), // alias
|
||||
"vercel" => Ok(ProviderId::Vercel),
|
||||
"openrouter" => Ok(ProviderId::OpenRouter),
|
||||
"digitalocean" => Ok(ProviderId::DigitalOcean),
|
||||
"do" => Ok(ProviderId::DigitalOcean), // alias
|
||||
"do_ai" => Ok(ProviderId::DigitalOcean), // alias
|
||||
_ => Err(format!("Unknown provider: {}", value)),
|
||||
}
|
||||
}
|
||||
|
|
@ -99,6 +103,7 @@ impl ProviderId {
|
|||
ProviderId::Moonshotai => "moonshotai",
|
||||
ProviderId::Zhipu => "z-ai",
|
||||
ProviderId::Qwen => "qwen",
|
||||
ProviderId::DigitalOcean => "digitalocean",
|
||||
// Vercel and OpenRouter are open-ended gateways; model lists are unbounded.
|
||||
// Users configure these with wildcards (e.g. vercel/*); no static expansion needed.
|
||||
ProviderId::Vercel | ProviderId::OpenRouter => return Vec::new(),
|
||||
|
|
@ -157,7 +162,8 @@ impl ProviderId {
|
|||
| ProviderId::Zhipu
|
||||
| ProviderId::Qwen
|
||||
| ProviderId::Vercel
|
||||
| ProviderId::OpenRouter,
|
||||
| ProviderId::OpenRouter
|
||||
| ProviderId::DigitalOcean,
|
||||
SupportedAPIsFromClient::AnthropicMessagesAPI(_),
|
||||
) => SupportedUpstreamAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions),
|
||||
|
||||
|
|
@ -178,7 +184,8 @@ impl ProviderId {
|
|||
| ProviderId::Zhipu
|
||||
| ProviderId::Qwen
|
||||
| ProviderId::Vercel
|
||||
| ProviderId::OpenRouter,
|
||||
| ProviderId::OpenRouter
|
||||
| ProviderId::DigitalOcean,
|
||||
SupportedAPIsFromClient::OpenAIChatCompletions(_),
|
||||
) => SupportedUpstreamAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions),
|
||||
|
||||
|
|
@ -247,6 +254,7 @@ impl Display for ProviderId {
|
|||
ProviderId::AmazonBedrock => write!(f, "amazon_bedrock"),
|
||||
ProviderId::Vercel => write!(f, "vercel"),
|
||||
ProviderId::OpenRouter => write!(f, "openrouter"),
|
||||
ProviderId::DigitalOcean => write!(f, "digitalocean"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,6 +23,31 @@ pub trait TokenUsage {
|
|||
fn completion_tokens(&self) -> usize;
|
||||
fn prompt_tokens(&self) -> usize;
|
||||
fn total_tokens(&self) -> usize;
|
||||
/// Tokens served from a prompt cache read (OpenAI `prompt_tokens_details.cached_tokens`,
|
||||
/// Anthropic `cache_read_input_tokens`, Google `cached_content_token_count`).
|
||||
fn cached_input_tokens(&self) -> Option<usize> {
|
||||
None
|
||||
}
|
||||
/// Tokens used to write a cache entry (Anthropic `cache_creation_input_tokens`).
|
||||
fn cache_creation_tokens(&self) -> Option<usize> {
|
||||
None
|
||||
}
|
||||
/// Reasoning tokens for reasoning models (OpenAI `completion_tokens_details.reasoning_tokens`,
|
||||
/// Google `thoughts_token_count`).
|
||||
fn reasoning_tokens(&self) -> Option<usize> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Rich usage breakdown extracted from a provider response.
|
||||
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
|
||||
pub struct UsageDetails {
|
||||
pub prompt_tokens: usize,
|
||||
pub completion_tokens: usize,
|
||||
pub total_tokens: usize,
|
||||
pub cached_input_tokens: Option<usize>,
|
||||
pub cache_creation_tokens: Option<usize>,
|
||||
pub reasoning_tokens: Option<usize>,
|
||||
}
|
||||
|
||||
pub trait ProviderResponse: Send + Sync {
|
||||
|
|
@ -34,6 +59,18 @@ pub trait ProviderResponse: Send + Sync {
|
|||
self.usage()
|
||||
.map(|u| (u.prompt_tokens(), u.completion_tokens(), u.total_tokens()))
|
||||
}
|
||||
|
||||
/// Extract a rich usage breakdown including cached/cache-creation/reasoning tokens.
|
||||
fn extract_usage_details(&self) -> Option<UsageDetails> {
|
||||
self.usage().map(|u| UsageDetails {
|
||||
prompt_tokens: u.prompt_tokens(),
|
||||
completion_tokens: u.completion_tokens(),
|
||||
total_tokens: u.total_tokens(),
|
||||
cached_input_tokens: u.cached_input_tokens(),
|
||||
cache_creation_tokens: u.cache_creation_tokens(),
|
||||
reasoning_tokens: u.reasoning_tokens(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl ProviderResponse for ProviderResponseType {
|
||||
|
|
|
|||
|
|
@ -346,12 +346,10 @@ impl TryFrom<(SseEvent, &SupportedAPIsFromClient, &SupportedUpstreamAPIs)> for S
|
|||
(
|
||||
SupportedAPIsFromClient::OpenAIChatCompletions(_),
|
||||
SupportedUpstreamAPIs::AnthropicMessagesAPI(_),
|
||||
) => {
|
||||
) if transformed_event.is_event_only() && transformed_event.event.is_some() => {
|
||||
// OpenAI clients don't expect separate event: lines
|
||||
// Suppress upstream Anthropic event-only lines
|
||||
if transformed_event.is_event_only() && transformed_event.event.is_some() {
|
||||
transformed_event.sse_transformed_lines = "\n".to_string();
|
||||
}
|
||||
transformed_event.sse_transformed_lines = "\n".to_string();
|
||||
}
|
||||
_ => {
|
||||
// Other cross-API combinations can be handled here as needed
|
||||
|
|
@ -371,12 +369,10 @@ impl TryFrom<(SseEvent, &SupportedAPIsFromClient, &SupportedUpstreamAPIs)> for S
|
|||
| (
|
||||
SupportedAPIsFromClient::OpenAIResponsesAPI(_),
|
||||
SupportedUpstreamAPIs::OpenAIResponsesAPI(_),
|
||||
) => {
|
||||
if transformed_event.is_event_only() && transformed_event.event.is_some() {
|
||||
// Mark as should-skip by clearing sse_transformed_lines
|
||||
// The event line is already included when the data line is transformed
|
||||
transformed_event.sse_transformed_lines = String::new();
|
||||
}
|
||||
) if transformed_event.is_event_only() && transformed_event.event.is_some() => {
|
||||
// Mark as should-skip by clearing sse_transformed_lines
|
||||
// The event line is already included when the data line is transformed
|
||||
transformed_event.sse_transformed_lines = String::new();
|
||||
}
|
||||
_ => {
|
||||
// Other passthrough combinations (OpenAI ChatCompletions, etc.) don't have this issue
|
||||
|
|
|
|||
|
|
@ -188,14 +188,13 @@ pub fn convert_openai_message_to_anthropic_content(
|
|||
|
||||
// Handle regular content
|
||||
match &message.content {
|
||||
Some(MessageContent::Text(text)) => {
|
||||
if !text.is_empty() {
|
||||
blocks.push(MessagesContentBlock::Text {
|
||||
text: text.clone(),
|
||||
cache_control: None,
|
||||
});
|
||||
}
|
||||
Some(MessageContent::Text(text)) if !text.is_empty() => {
|
||||
blocks.push(MessagesContentBlock::Text {
|
||||
text: text.clone(),
|
||||
cache_control: None,
|
||||
});
|
||||
}
|
||||
Some(MessageContent::Text(_)) => {}
|
||||
Some(MessageContent::Parts(parts)) => {
|
||||
for part in parts {
|
||||
match part {
|
||||
|
|
|
|||
|
|
@ -354,10 +354,10 @@ impl TryFrom<MessagesMessage> for BedrockMessage {
|
|||
MessagesMessageContent::Blocks(blocks) => {
|
||||
for block in blocks {
|
||||
match block {
|
||||
crate::apis::anthropic::MessagesContentBlock::Text { text, .. } => {
|
||||
if !text.is_empty() {
|
||||
content_blocks.push(ContentBlock::Text { text });
|
||||
}
|
||||
crate::apis::anthropic::MessagesContentBlock::Text { text, .. }
|
||||
if !text.is_empty() =>
|
||||
{
|
||||
content_blocks.push(ContentBlock::Text { text });
|
||||
}
|
||||
crate::apis::anthropic::MessagesContentBlock::ToolUse {
|
||||
id,
|
||||
|
|
|
|||
|
|
@ -317,11 +317,10 @@ impl TryFrom<Message> for BedrockMessage {
|
|||
Role::User => {
|
||||
// Convert user message content to content blocks
|
||||
match message.content {
|
||||
Some(MessageContent::Text(text)) => {
|
||||
if !text.is_empty() {
|
||||
content_blocks.push(ContentBlock::Text { text });
|
||||
}
|
||||
Some(MessageContent::Text(text)) if !text.is_empty() => {
|
||||
content_blocks.push(ContentBlock::Text { text });
|
||||
}
|
||||
Some(MessageContent::Text(_)) => {}
|
||||
Some(MessageContent::Parts(parts)) => {
|
||||
// Convert OpenAI content parts to Bedrock ContentBlocks
|
||||
for part in parts {
|
||||
|
|
|
|||
|
|
@ -177,24 +177,33 @@ impl StreamContext {
|
|||
}
|
||||
|
||||
fn modify_auth_headers(&mut self) -> Result<(), ServerError> {
|
||||
if self.llm_provider().passthrough_auth == Some(true) {
|
||||
// Check if client provided an Authorization header
|
||||
if self.get_http_request_header("Authorization").is_none() {
|
||||
warn!(
|
||||
"request_id={}: passthrough_auth enabled but no authorization header present in client request",
|
||||
self.request_identifier()
|
||||
);
|
||||
} else {
|
||||
debug!(
|
||||
"request_id={}: preserving client authorization header for provider '{}'",
|
||||
self.request_identifier(),
|
||||
self.llm_provider().name
|
||||
);
|
||||
// Determine the credential to forward upstream. Either the client
|
||||
// supplied one (passthrough_auth) or it's configured on the provider.
|
||||
let credential: String = if self.llm_provider().passthrough_auth == Some(true) {
|
||||
// Client auth may arrive in either Anthropic-style (`x-api-key`)
|
||||
// or OpenAI-style (`Authorization: Bearer ...`). Accept both so
|
||||
// clients using Anthropic SDKs (which default to `x-api-key`)
|
||||
// work when the upstream is OpenAI-compatible, and vice versa.
|
||||
let authorization = self.get_http_request_header("Authorization");
|
||||
let x_api_key = self.get_http_request_header("x-api-key");
|
||||
match extract_client_credential(authorization.as_deref(), x_api_key.as_deref()) {
|
||||
Some(key) => {
|
||||
debug!(
|
||||
"request_id={}: forwarding client credential to provider '{}'",
|
||||
self.request_identifier(),
|
||||
self.llm_provider().name
|
||||
);
|
||||
key
|
||||
}
|
||||
None => {
|
||||
warn!(
|
||||
"request_id={}: passthrough_auth enabled but no Authorization / x-api-key header present in client request",
|
||||
self.request_identifier()
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let llm_provider_api_key_value =
|
||||
} else {
|
||||
self.llm_provider()
|
||||
.access_key
|
||||
.as_ref()
|
||||
|
|
@ -203,15 +212,19 @@ impl StreamContext {
|
|||
"No access key configured for selected LLM Provider \"{}\"",
|
||||
self.llm_provider()
|
||||
),
|
||||
})?;
|
||||
})?
|
||||
.clone()
|
||||
};
|
||||
|
||||
// Set API-specific headers based on the resolved upstream API
|
||||
// Normalize the credential into whichever header the upstream expects.
|
||||
// This lets an Anthropic-SDK client reach an OpenAI-compatible upstream
|
||||
// (and vice versa) without the caller needing to know what format the
|
||||
// upstream uses.
|
||||
match self.resolved_api.as_ref() {
|
||||
Some(SupportedUpstreamAPIs::AnthropicMessagesAPI(_)) => {
|
||||
// Anthropic API requires x-api-key and anthropic-version headers
|
||||
// Remove any existing Authorization header since Anthropic doesn't use it
|
||||
// Anthropic expects `x-api-key` + `anthropic-version`.
|
||||
self.remove_http_request_header("Authorization");
|
||||
self.set_http_request_header("x-api-key", Some(llm_provider_api_key_value));
|
||||
self.set_http_request_header("x-api-key", Some(&credential));
|
||||
self.set_http_request_header("anthropic-version", Some("2023-06-01"));
|
||||
}
|
||||
Some(
|
||||
|
|
@ -221,10 +234,9 @@ impl StreamContext {
|
|||
| SupportedUpstreamAPIs::OpenAIResponsesAPI(_),
|
||||
)
|
||||
| None => {
|
||||
// OpenAI and default: use Authorization Bearer token
|
||||
// Remove any existing x-api-key header since OpenAI doesn't use it
|
||||
// OpenAI (and default): `Authorization: Bearer ...`.
|
||||
self.remove_http_request_header("x-api-key");
|
||||
let authorization_header_value = format!("Bearer {}", llm_provider_api_key_value);
|
||||
let authorization_header_value = format!("Bearer {}", credential);
|
||||
self.set_http_request_header("Authorization", Some(&authorization_header_value));
|
||||
}
|
||||
}
|
||||
|
|
@ -1235,3 +1247,86 @@ fn current_time_ns() -> u128 {
|
|||
}
|
||||
|
||||
impl Context for StreamContext {}
|
||||
|
||||
/// Extract the credential a client sent in either an OpenAI-style
|
||||
/// `Authorization` header or an Anthropic-style `x-api-key` header.
|
||||
///
|
||||
/// Returns `None` when neither header is present or both are empty/whitespace.
|
||||
/// The `Bearer ` prefix on the `Authorization` value is stripped if present;
|
||||
/// otherwise the value is taken verbatim (some clients send a raw token).
|
||||
fn extract_client_credential(
|
||||
authorization: Option<&str>,
|
||||
x_api_key: Option<&str>,
|
||||
) -> Option<String> {
|
||||
// Strip the optional "Bearer " / "Bearer" prefix (case-sensitive, matches
|
||||
// OpenAI SDK behavior) and trim surrounding whitespace before validating
|
||||
// non-empty.
|
||||
let from_authorization = authorization
|
||||
.map(|v| {
|
||||
v.strip_prefix("Bearer ")
|
||||
.or_else(|| v.strip_prefix("Bearer"))
|
||||
.unwrap_or(v)
|
||||
.trim()
|
||||
.to_string()
|
||||
})
|
||||
.filter(|s| !s.is_empty());
|
||||
if from_authorization.is_some() {
|
||||
return from_authorization;
|
||||
}
|
||||
x_api_key
|
||||
.map(str::trim)
|
||||
.filter(|s| !s.is_empty())
|
||||
.map(|s| s.to_string())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::extract_client_credential;
|
||||
|
||||
#[test]
|
||||
fn authorization_bearer_strips_prefix() {
|
||||
assert_eq!(
|
||||
extract_client_credential(Some("Bearer sk-abc"), None),
|
||||
Some("sk-abc".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn authorization_raw_token_preserved() {
|
||||
// Some clients send the raw token without "Bearer " — accept it.
|
||||
assert_eq!(
|
||||
extract_client_credential(Some("sk-abc"), None),
|
||||
Some("sk-abc".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn x_api_key_used_when_authorization_absent() {
|
||||
assert_eq!(
|
||||
extract_client_credential(None, Some("sk-ant-api-key")),
|
||||
Some("sk-ant-api-key".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn authorization_wins_when_both_present() {
|
||||
// If a client is particularly exotic and sends both, prefer the
|
||||
// OpenAI-style Authorization header.
|
||||
assert_eq!(
|
||||
extract_client_credential(Some("Bearer openai-key"), Some("anthropic-key")),
|
||||
Some("openai-key".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn returns_none_when_neither_present() {
|
||||
assert!(extract_client_credential(None, None).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_and_whitespace_headers_are_ignored() {
|
||||
assert!(extract_client_credential(Some(""), None).is_none());
|
||||
assert!(extract_client_credential(Some("Bearer "), None).is_none());
|
||||
assert!(extract_client_credential(Some(" "), Some(" ")).is_none());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ This demo shows how you can use user preferences to route user prompts to approp
|
|||
|
||||
## How to start the demo
|
||||
|
||||
Make sure you have Plano CLI installed (`pip install planoai==0.4.19` or `uv tool install planoai==0.4.19`).
|
||||
Make sure you have Plano CLI installed (`pip install planoai==0.4.20` or `uv tool install planoai==0.4.20`).
|
||||
|
||||
```bash
|
||||
cd demos/llm_routing/preference_based_routing
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ from sphinxawesome_theme.postprocess import Icons
|
|||
project = "Plano Docs"
|
||||
copyright = "2026, Katanemo Labs, a DigitalOcean Company"
|
||||
author = "Katanemo Labs, Inc"
|
||||
release = " v0.4.19"
|
||||
release = " v0.4.20"
|
||||
|
||||
# -- General configuration ---------------------------------------------------
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ Plano's CLI allows you to manage and interact with the Plano efficiently. To ins
|
|||
|
||||
.. code-block:: console
|
||||
|
||||
$ uv tool install planoai==0.4.19
|
||||
$ uv tool install planoai==0.4.20
|
||||
|
||||
**Option 2: Install with pip (Traditional)**
|
||||
|
||||
|
|
@ -51,7 +51,7 @@ Plano's CLI allows you to manage and interact with the Plano efficiently. To ins
|
|||
|
||||
$ python -m venv venv
|
||||
$ source venv/bin/activate # On Windows, use: venv\Scripts\activate
|
||||
$ pip install planoai==0.4.19
|
||||
$ pip install planoai==0.4.20
|
||||
|
||||
|
||||
.. _llm_routing_quickstart:
|
||||
|
|
@ -340,6 +340,67 @@ And to get the list of supported currencies:
|
|||
"Here is a list of the currencies that are supported for conversion from USD, along with their symbols:\n\n1. AUD - Australian Dollar\n2. BGN - Bulgarian Lev\n3. BRL - Brazilian Real\n4. CAD - Canadian Dollar\n5. CHF - Swiss Franc\n6. CNY - Chinese Renminbi Yuan\n7. CZK - Czech Koruna\n8. DKK - Danish Krone\n9. EUR - Euro\n10. GBP - British Pound\n11. HKD - Hong Kong Dollar\n12. HUF - Hungarian Forint\n13. IDR - Indonesian Rupiah\n14. ILS - Israeli New Sheqel\n15. INR - Indian Rupee\n16. ISK - Icelandic Króna\n17. JPY - Japanese Yen\n18. KRW - South Korean Won\n19. MXN - Mexican Peso\n20. MYR - Malaysian Ringgit\n21. NOK - Norwegian Krone\n22. NZD - New Zealand Dollar\n23. PHP - Philippine Peso\n24. PLN - Polish Złoty\n25. RON - Romanian Leu\n26. SEK - Swedish Krona\n27. SGD - Singapore Dollar\n28. THB - Thai Baht\n29. TRY - Turkish Lira\n30. USD - United States Dollar\n31. ZAR - South African Rand\n\nIf you want to convert USD to any of these currencies, you can select the one you are interested in."
|
||||
|
||||
|
||||
Observability
|
||||
-------------
|
||||
|
||||
Plano ships two CLI tools for visibility into LLM traffic. Both consume the same OTLP/gRPC span stream from brightstaff; they just slice it differently — use whichever (or both) fits the question you're answering.
|
||||
|
||||
===================== ============================================ =============================================================
|
||||
Command When to use Shows
|
||||
===================== ============================================ =============================================================
|
||||
``planoai obs`` Live view while you drive traffic Per-request rows + aggregates: tokens (prompt / completion / cached / cache-creation / reasoning), TTFT, latency, cost, session id, route name, totals by model
|
||||
``planoai trace`` Deep-dive into a single request after the fact Full span tree for a trace id: brightstaff → routing → upstream LLM, attributes on every span, status codes, errors
|
||||
===================== ============================================ =============================================================
|
||||
|
||||
Both require brightstaff to be exporting spans. If you're running the zero-config path (``planoai up`` with no config file), tracing is auto-wired to ``http://localhost:4317``. If you have your own ``plano_config.yaml``, add:
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
tracing:
|
||||
random_sampling: 100
|
||||
opentracing_grpc_endpoint: http://localhost:4317
|
||||
|
||||
Live console — ``planoai obs``
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ planoai obs
|
||||
# In another terminal:
|
||||
$ planoai up
|
||||
|
||||
Cost is populated automatically from DigitalOcean's public pricing catalog — no signup or token required.
|
||||
|
||||
With no API keys set, every provider runs in pass-through mode — supply the ``Authorization`` header yourself on each request:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ curl localhost:12000/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer $DO_API_KEY" \
|
||||
-d '{"model":"digitalocean/router:software-engineering",
|
||||
"messages":[{"role":"user","content":"write code to print prime numbers in python"}],
|
||||
"stream":false}'
|
||||
|
||||
When you export ``OPENAI_API_KEY`` / ``ANTHROPIC_API_KEY`` / ``DO_API_KEY`` / etc. before ``planoai up``, Plano picks them up and clients no longer need to send ``Authorization``.
|
||||
|
||||
Press ``Ctrl-C`` in the obs terminal to exit. Data lives in memory only — nothing is persisted to disk.
|
||||
|
||||
Single-request traces — ``planoai trace``
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
When you need to understand what happened on one specific request (which model was picked, how long each hop took, what an upstream returned), use ``trace``:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ planoai trace listen # start the OTLP listener (daemon)
|
||||
# drive some traffic through localhost:12000 ...
|
||||
$ planoai trace # show the most recent trace
|
||||
$ planoai trace <trace-id> # show a specific trace by id
|
||||
$ planoai trace --list # list the last 50 trace ids
|
||||
|
||||
Use ``obs`` to spot that p95 latency spiked for ``openai-gpt-5.4``; switch to ``trace`` on one of those slow request ids to see which hop burned the time.
|
||||
|
||||
Next Steps
|
||||
==========
|
||||
|
||||
|
|
|
|||
|
|
@ -75,3 +75,54 @@ are some sample configuration files for both, respectively.
|
|||
isDefault: true
|
||||
access: proxy
|
||||
editable: true
|
||||
|
||||
Brightstaff metrics
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
In addition to Envoy's stats on ``:9901``, the brightstaff dataplane
|
||||
process exposes its own Prometheus endpoint on ``0.0.0.0:9092`` (override
|
||||
with ``METRICS_BIND_ADDRESS``). It publishes:
|
||||
|
||||
* HTTP RED — ``brightstaff_http_requests_total``,
|
||||
``brightstaff_http_request_duration_seconds``,
|
||||
``brightstaff_http_in_flight_requests`` (labels: ``handler``, ``method``,
|
||||
``status_class``).
|
||||
* LLM upstream — ``brightstaff_llm_upstream_requests_total``,
|
||||
``brightstaff_llm_upstream_duration_seconds``,
|
||||
``brightstaff_llm_time_to_first_token_seconds``,
|
||||
``brightstaff_llm_tokens_total`` (labels: ``provider``, ``model``,
|
||||
``error_class``, ``kind``).
|
||||
* Routing — ``brightstaff_router_decisions_total``,
|
||||
``brightstaff_router_decision_duration_seconds``,
|
||||
``brightstaff_routing_service_requests_total``,
|
||||
``brightstaff_session_cache_events_total``.
|
||||
* Process & build — ``process_resident_memory_bytes``,
|
||||
``process_cpu_seconds_total``, ``brightstaff_build_info``.
|
||||
|
||||
A self-contained Prometheus + Grafana stack is shipped under
|
||||
``config/grafana/``. With Plano already running on the host, bring it up
|
||||
with one command:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
cd config/grafana
|
||||
docker compose up -d
|
||||
open http://localhost:3000 # admin / admin (anonymous viewer also enabled)
|
||||
|
||||
Grafana auto-loads the Prometheus datasource and the brightstaff
|
||||
dashboard (look under the *Plano* folder). Prometheus scrapes the host's
|
||||
``:9092`` and ``:9901`` via ``host.docker.internal``.
|
||||
|
||||
Files:
|
||||
|
||||
* ``config/grafana/docker-compose.yaml`` — one-command Prom + Grafana
|
||||
stack with provisioning.
|
||||
* ``config/grafana/prometheus_scrape.yaml`` — complete Prometheus config
|
||||
with ``envoy`` and ``brightstaff`` scrape jobs (mounted by the
|
||||
compose).
|
||||
* ``config/grafana/brightstaff_dashboard.json`` — 19-panel dashboard
|
||||
across HTTP RED, LLM upstream, Routing service, and Process & Envoy
|
||||
link rows. Auto-provisioned by the compose; can also be imported by
|
||||
hand via *Dashboards → New → Import*.
|
||||
* ``config/grafana/provisioning/`` — Grafana provisioning files for the
|
||||
datasource and dashboard provider.
|
||||
|
|
|
|||
|
|
@ -65,7 +65,7 @@ Create a ``docker-compose.yml`` file with the following configuration:
|
|||
# docker-compose.yml
|
||||
services:
|
||||
plano:
|
||||
image: katanemo/plano:0.4.19
|
||||
image: katanemo/plano:0.4.20
|
||||
container_name: plano
|
||||
ports:
|
||||
- "10000:10000" # ingress (client -> plano)
|
||||
|
|
@ -153,7 +153,7 @@ Create a ``plano-deployment.yaml``:
|
|||
spec:
|
||||
containers:
|
||||
- name: plano
|
||||
image: katanemo/plano:0.4.19
|
||||
image: katanemo/plano:0.4.20
|
||||
ports:
|
||||
- containerPort: 12000 # LLM gateway (chat completions, model routing)
|
||||
name: llm-gateway
|
||||
|
|
|
|||
|
|
@ -173,6 +173,9 @@ overrides:
|
|||
llm_routing_model: Plano-Orchestrator
|
||||
# Model used for agent orchestration (must be listed in model_providers)
|
||||
agent_orchestration_model: Plano-Orchestrator
|
||||
# Disable agentic signal analysis (frustration, repetition, escalation, etc.)
|
||||
# on LLM responses to save CPU. Default: false.
|
||||
disable_signals: false
|
||||
|
||||
# Model affinity — pin routing decisions for agentic loops
|
||||
routing:
|
||||
|
|
|
|||
|
|
@ -170,6 +170,7 @@ model_providers:
|
|||
provider_interface: plano
|
||||
overrides:
|
||||
agent_orchestration_model: Plano-Orchestrator
|
||||
disable_signals: false
|
||||
llm_routing_model: Plano-Orchestrator
|
||||
optimize_context_window: true
|
||||
prompt_target_intent_matching_threshold: 0.7
|
||||
|
|
|
|||
2109
skills/AGENTS.md
Normal file
2109
skills/AGENTS.md
Normal file
File diff suppressed because it is too large
Load diff
243
skills/README.md
Normal file
243
skills/README.md
Normal file
|
|
@ -0,0 +1,243 @@
|
|||
# Plano Agent Skills
|
||||
|
||||
A structured repository of best practices for building agents and agentic applications with [Plano](https://github.com/katanemo/archgw) — the AI-native proxy and dataplane. Optimized for coding agents and LLMs.
|
||||
|
||||
## What Are Skills?
|
||||
|
||||
Skills are principle-based guides that help coding agents (Claude Code, Cursor, Copilot, etc.) make better decisions when working with Plano. They cover configuration patterns, routing strategies, agent orchestration, observability, and CLI workflows — acting as operating principles, not documentation replacements.
|
||||
|
||||
## Installing
|
||||
|
||||
```bash
|
||||
# Install via npx skills
|
||||
npx skills add katanemo/plano
|
||||
```
|
||||
|
||||
This skills collection is published from the `skills/` directory in the `katanemo/plano` monorepo.
|
||||
|
||||
Install a specific skill:
|
||||
|
||||
```bash
|
||||
npx skills add katanemo/plano --skill plano-routing-model-selection
|
||||
```
|
||||
|
||||
List available skills before install:
|
||||
|
||||
```bash
|
||||
npx skills add katanemo/plano --list
|
||||
```
|
||||
|
||||
## Using Skills in Agents
|
||||
|
||||
After installation, these skills are available to your coding agent and can be invoked with normal language. You do not need special syntax unless your tooling requires it.
|
||||
|
||||
### Natural Language Invocation Examples
|
||||
|
||||
- "Use the Plano skills to validate this `config.yaml` and fix issues."
|
||||
- "Apply Plano routing best practices to improve model/provider selection."
|
||||
- "Review this agent listener config with the orchestration rules."
|
||||
- "Refactor this filter chain to follow guardrail ordering best practices."
|
||||
- "Audit this setup against Plano deployment and security recommendations."
|
||||
|
||||
### Prompting Tips for Better Results
|
||||
|
||||
- Name your goal and file: "Harden `config.yaml` for production."
|
||||
- Ask for an action: "Generate a patch," "fix directly," or "explain the changes."
|
||||
- Include runtime context when relevant: trace output, logs, listener errors.
|
||||
- Ask for verification: "Run a final validation check after edits."
|
||||
|
||||
### Invoke by Skill Area (Optional)
|
||||
|
||||
- **Configuration:** "Use Plano configuration fundamentals on this config."
|
||||
- **Routing:** "Use routing/model-selection skills to tune defaults and aliases."
|
||||
- **Agent orchestration:** "Use agent orchestration skills to improve routing accuracy."
|
||||
- **Filters/guardrails:** "Use filter-chain skills to harden input/output safety."
|
||||
- **Observability:** "Use observability skills to add traceability and debug routing."
|
||||
- **CLI/deployment:** "Use CLI and deployment skills to produce a startup checklist."
|
||||
|
||||
## Available Skills
|
||||
|
||||
- `plano-agent-skills` - Umbrella skill covering all Plano areas
|
||||
- `plano-config-fundamentals` - Config versioning, listeners, providers, secrets
|
||||
- `plano-routing-model-selection` - Defaults, aliases, passthrough auth, preferences
|
||||
- `plano-agent-orchestration` - Agent registration and routing descriptions
|
||||
- `plano-filter-guardrails` - MCP filters, guardrail messaging, filter ordering
|
||||
- `plano-observability-debugging` - Tracing setup, span attributes, trace analysis
|
||||
- `plano-cli-operations` - `planoai up`, `cli_agent`, init, prompt target generation
|
||||
- `plano-deployment-security` - Docker networking, health checks, state storage
|
||||
- `plano-advanced-patterns` - Multi-listener architecture and prompt target schema design
|
||||
|
||||
## Local Testing
|
||||
|
||||
```bash
|
||||
# From repo root
|
||||
npx skills add ./skills --list
|
||||
npx skills add ./skills --skill plano-agent-skills -y
|
||||
npx skills list
|
||||
```
|
||||
|
||||
## Structure
|
||||
|
||||
```
|
||||
skills/
|
||||
├── rules/ # Individual rule files (one per rule)
|
||||
│ ├── _sections.md # Section metadata and prefix definitions
|
||||
│ ├── _template.md # Template for creating new rules
|
||||
│ ├── config-*.md # Section 1: Configuration Fundamentals
|
||||
│ ├── routing-*.md # Section 2: Routing & Model Selection
|
||||
│ ├── agent-*.md # Section 3: Agent Orchestration
|
||||
│ ├── filter-*.md # Section 4: Filter Chains & Guardrails
|
||||
│ ├── observe-*.md # Section 5: Observability & Debugging
|
||||
│ ├── cli-*.md # Section 6: CLI Operations
|
||||
│ ├── deploy-*.md # Section 7: Deployment & Security
|
||||
│ └── advanced-*.md # Section 8: Advanced Patterns
|
||||
├── src/
|
||||
│ ├── build.ts # Compiles rules/ into AGENTS.md
|
||||
│ ├── validate.ts # Validates rule files
|
||||
│ └── extract-tests.ts # Extracts test cases for LLM evaluation
|
||||
├── metadata.json # Document metadata
|
||||
├── AGENTS.md # Compiled output (generated — do not edit directly)
|
||||
├── test-cases.json # Test cases for LLM evaluation (generated)
|
||||
└── package.json
|
||||
```
|
||||
|
||||
## Sections
|
||||
|
||||
| # | Prefix | Section | Rules |
|
||||
|---|--------|---------|-------|
|
||||
| 1 | `config-` | Configuration Fundamentals | Version, listeners, providers, secrets, timeouts |
|
||||
| 2 | `routing-` | Routing & Model Selection | Preferences, aliases, defaults, passthrough |
|
||||
| 3 | `agent-` | Agent Orchestration | Descriptions, agent registration |
|
||||
| 4 | `filter-` | Filter Chains & Guardrails | Ordering, MCP integration, guardrails |
|
||||
| 5 | `observe-` | Observability & Debugging | Tracing, trace inspection, span attributes |
|
||||
| 6 | `cli-` | CLI Operations | Startup, CLI agent, init, code generation |
|
||||
| 7 | `deploy-` | Deployment & Security | Docker networking, state storage, health checks |
|
||||
| 8 | `advanced-` | Advanced Patterns | Prompt targets, rate limits, multi-listener |
|
||||
|
||||
## Getting Started
|
||||
|
||||
```bash
|
||||
# Install dependencies
|
||||
npm install
|
||||
|
||||
# Validate all rule files
|
||||
npm run validate
|
||||
|
||||
# Build AGENTS.md from rules
|
||||
npm run build
|
||||
|
||||
# Extract test cases for LLM evaluation
|
||||
npm run extract-tests
|
||||
|
||||
# Run all of the above
|
||||
npm run dev
|
||||
```
|
||||
|
||||
## Creating a New Rule
|
||||
|
||||
1. Copy `rules/_template.md` to `rules/<prefix>-<description>.md`
|
||||
|
||||
2. Choose the correct prefix for your section:
|
||||
- `config-` — Configuration Fundamentals
|
||||
- `routing-` — Routing & Model Selection
|
||||
- `agent-` — Agent Orchestration
|
||||
- `filter-` — Filter Chains & Guardrails
|
||||
- `observe-` — Observability & Debugging
|
||||
- `cli-` — CLI Operations
|
||||
- `deploy-` — Deployment & Security
|
||||
- `advanced-` — Advanced Patterns
|
||||
|
||||
3. Fill in the frontmatter:
|
||||
```yaml
|
||||
---
|
||||
title: Clear, Actionable Rule Title
|
||||
impact: HIGH
|
||||
impactDescription: One-line description of why this matters
|
||||
tags: config, routing, relevant-tags
|
||||
---
|
||||
```
|
||||
|
||||
4. Write the rule body with:
|
||||
- Brief explanation of the principle and why it matters
|
||||
- **Incorrect** example (YAML config or CLI command showing the wrong pattern)
|
||||
- **Correct** example (the right pattern with comments)
|
||||
- Optional explanatory notes
|
||||
|
||||
5. Run `npm run dev` to validate and regenerate
|
||||
|
||||
## Rule File Structure
|
||||
|
||||
```markdown
|
||||
---
|
||||
title: Rule Title Here
|
||||
impact: CRITICAL
|
||||
impactDescription: One sentence on the impact
|
||||
tags: tag1, tag2, tag3
|
||||
---
|
||||
|
||||
## Rule Title Here
|
||||
|
||||
Brief explanation of the rule and why it matters for Plano developers.
|
||||
|
||||
**Incorrect (describe what's wrong):**
|
||||
|
||||
```yaml
|
||||
# Bad example
|
||||
```
|
||||
|
||||
**Correct (describe what's right):**
|
||||
|
||||
```yaml
|
||||
# Good example with comments explaining the decisions
|
||||
```
|
||||
|
||||
Optional explanatory text, lists, or tables.
|
||||
|
||||
Reference: https://github.com/katanemo/archgw
|
||||
|
||||
|
||||
|
||||
## Impact Levels
|
||||
|
||||
| Level | Description |
|
||||
|-------|-------------|
|
||||
| `CRITICAL` | Causes startup failures or silent misbehavior — always fix |
|
||||
| `HIGH` | Significantly degrades routing accuracy, security, or reliability |
|
||||
| `MEDIUM-HIGH` | Important for production deployments |
|
||||
| `MEDIUM` | Best practice for maintainability and developer experience |
|
||||
| `LOW-MEDIUM` | Incremental improvements |
|
||||
| `LOW` | Nice to have |
|
||||
|
||||
## Key Rules at a Glance
|
||||
|
||||
- **Always set `version: v0.3.0`** — config is rejected without it
|
||||
- **Use `host.docker.internal`** for agent/filter URLs — `localhost` doesn't work inside Docker
|
||||
- **Set exactly one `default: true` provider** — unmatched requests need a fallback
|
||||
- **Write specific routing preference descriptions** — vague descriptions cause misroutes
|
||||
- **Order filter chains: guards → rewriters → context builders** — never build context before blocking bad input
|
||||
- **Use `$VAR_NAME` for all secrets** — never hardcode API keys in config.yaml
|
||||
- **Enable tracing with `--with-tracing`** — traces are the primary debugging tool
|
||||
|
||||
## Scripts
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `npm run build` | Compile `rules/` into `AGENTS.md` |
|
||||
| `npm run validate` | Validate all rule files for required fields and structure |
|
||||
| `npm run extract-tests` | Generate `test-cases.json` for LLM evaluation |
|
||||
| `npm run dev` | Validate + build + extract tests |
|
||||
|
||||
## Contributing
|
||||
|
||||
Rules are automatically sorted alphabetically by title within each section — no need to manage numbers. IDs (`1.1`, `1.2`, etc.) are assigned during build.
|
||||
|
||||
When adding rules:
|
||||
1. Use the correct filename prefix for your section
|
||||
2. Follow `_template.md` structure
|
||||
3. Include clear bad/good YAML or CLI examples
|
||||
4. Add relevant tags
|
||||
5. Run `npm run dev` to validate and regenerate
|
||||
|
||||
## License
|
||||
|
||||
Apache-2.0 — see [LICENSE](../LICENSE)
|
||||
8
skills/metadata.json
Normal file
8
skills/metadata.json
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
{
|
||||
"version": "1.0.0",
|
||||
"organization": "Plano",
|
||||
"name": "plano-agent-skills",
|
||||
"abstract": "Best practices for building agents and agentic applications with Plano — the AI-native proxy and dataplane. Covers configuration, routing, agent orchestration, filter chains, observability, CLI operations, and deployment patterns.",
|
||||
"homepage": "https://github.com/katanemo/archgw",
|
||||
"license": "Apache-2.0"
|
||||
}
|
||||
594
skills/package-lock.json
generated
Normal file
594
skills/package-lock.json
generated
Normal file
|
|
@ -0,0 +1,594 @@
|
|||
{
|
||||
"name": "plano-agent-skills",
|
||||
"version": "1.0.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "plano-agent-skills",
|
||||
"version": "1.0.0",
|
||||
"license": "Apache-2.0",
|
||||
"devDependencies": {
|
||||
"@types/node": "^24.3.0",
|
||||
"tsx": "^4.20.5",
|
||||
"typescript": "^5.9.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/aix-ppc64": {
|
||||
"version": "0.27.3",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.27.3.tgz",
|
||||
"integrity": "sha512-9fJMTNFTWZMh5qwrBItuziu834eOCUcEqymSH7pY+zoMVEZg3gcPuBNxH1EvfVYe9h0x/Ptw8KBzv7qxb7l8dg==",
|
||||
"cpu": [
|
||||
"ppc64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"aix"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/android-arm": {
|
||||
"version": "0.27.3",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.27.3.tgz",
|
||||
"integrity": "sha512-i5D1hPY7GIQmXlXhs2w8AWHhenb00+GxjxRncS2ZM7YNVGNfaMxgzSGuO8o8SJzRc/oZwU2bcScvVERk03QhzA==",
|
||||
"cpu": [
|
||||
"arm"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"android"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/android-arm64": {
|
||||
"version": "0.27.3",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.27.3.tgz",
|
||||
"integrity": "sha512-YdghPYUmj/FX2SYKJ0OZxf+iaKgMsKHVPF1MAq/P8WirnSpCStzKJFjOjzsW0QQ7oIAiccHdcqjbHmJxRb/dmg==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"android"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/android-x64": {
|
||||
"version": "0.27.3",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.27.3.tgz",
|
||||
"integrity": "sha512-IN/0BNTkHtk8lkOM8JWAYFg4ORxBkZQf9zXiEOfERX/CzxW3Vg1ewAhU7QSWQpVIzTW+b8Xy+lGzdYXV6UZObQ==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"android"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/darwin-arm64": {
|
||||
"version": "0.27.3",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.27.3.tgz",
|
||||
"integrity": "sha512-Re491k7ByTVRy0t3EKWajdLIr0gz2kKKfzafkth4Q8A5n1xTHrkqZgLLjFEHVD+AXdUGgQMq+Godfq45mGpCKg==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/darwin-x64": {
|
||||
"version": "0.27.3",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.27.3.tgz",
|
||||
"integrity": "sha512-vHk/hA7/1AckjGzRqi6wbo+jaShzRowYip6rt6q7VYEDX4LEy1pZfDpdxCBnGtl+A5zq8iXDcyuxwtv3hNtHFg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/freebsd-arm64": {
|
||||
"version": "0.27.3",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.27.3.tgz",
|
||||
"integrity": "sha512-ipTYM2fjt3kQAYOvo6vcxJx3nBYAzPjgTCk7QEgZG8AUO3ydUhvelmhrbOheMnGOlaSFUoHXB6un+A7q4ygY9w==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"freebsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/freebsd-x64": {
|
||||
"version": "0.27.3",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.27.3.tgz",
|
||||
"integrity": "sha512-dDk0X87T7mI6U3K9VjWtHOXqwAMJBNN2r7bejDsc+j03SEjtD9HrOl8gVFByeM0aJksoUuUVU9TBaZa2rgj0oA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"freebsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-arm": {
|
||||
"version": "0.27.3",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.27.3.tgz",
|
||||
"integrity": "sha512-s6nPv2QkSupJwLYyfS+gwdirm0ukyTFNl3KTgZEAiJDd+iHZcbTPPcWCcRYH+WlNbwChgH2QkE9NSlNrMT8Gfw==",
|
||||
"cpu": [
|
||||
"arm"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-arm64": {
|
||||
"version": "0.27.3",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.27.3.tgz",
|
||||
"integrity": "sha512-sZOuFz/xWnZ4KH3YfFrKCf1WyPZHakVzTiqji3WDc0BCl2kBwiJLCXpzLzUBLgmp4veFZdvN5ChW4Eq/8Fc2Fg==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-ia32": {
|
||||
"version": "0.27.3",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.27.3.tgz",
|
||||
"integrity": "sha512-yGlQYjdxtLdh0a3jHjuwOrxQjOZYD/C9PfdbgJJF3TIZWnm/tMd/RcNiLngiu4iwcBAOezdnSLAwQDPqTmtTYg==",
|
||||
"cpu": [
|
||||
"ia32"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-loong64": {
|
||||
"version": "0.27.3",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.27.3.tgz",
|
||||
"integrity": "sha512-WO60Sn8ly3gtzhyjATDgieJNet/KqsDlX5nRC5Y3oTFcS1l0KWba+SEa9Ja1GfDqSF1z6hif/SkpQJbL63cgOA==",
|
||||
"cpu": [
|
||||
"loong64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-mips64el": {
|
||||
"version": "0.27.3",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.27.3.tgz",
|
||||
"integrity": "sha512-APsymYA6sGcZ4pD6k+UxbDjOFSvPWyZhjaiPyl/f79xKxwTnrn5QUnXR5prvetuaSMsb4jgeHewIDCIWljrSxw==",
|
||||
"cpu": [
|
||||
"mips64el"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-ppc64": {
|
||||
"version": "0.27.3",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.27.3.tgz",
|
||||
"integrity": "sha512-eizBnTeBefojtDb9nSh4vvVQ3V9Qf9Df01PfawPcRzJH4gFSgrObw+LveUyDoKU3kxi5+9RJTCWlj4FjYXVPEA==",
|
||||
"cpu": [
|
||||
"ppc64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-riscv64": {
|
||||
"version": "0.27.3",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.27.3.tgz",
|
||||
"integrity": "sha512-3Emwh0r5wmfm3ssTWRQSyVhbOHvqegUDRd0WhmXKX2mkHJe1SFCMJhagUleMq+Uci34wLSipf8Lagt4LlpRFWQ==",
|
||||
"cpu": [
|
||||
"riscv64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-s390x": {
|
||||
"version": "0.27.3",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.27.3.tgz",
|
||||
"integrity": "sha512-pBHUx9LzXWBc7MFIEEL0yD/ZVtNgLytvx60gES28GcWMqil8ElCYR4kvbV2BDqsHOvVDRrOxGySBM9Fcv744hw==",
|
||||
"cpu": [
|
||||
"s390x"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-x64": {
|
||||
"version": "0.27.3",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.27.3.tgz",
|
||||
"integrity": "sha512-Czi8yzXUWIQYAtL/2y6vogER8pvcsOsk5cpwL4Gk5nJqH5UZiVByIY8Eorm5R13gq+DQKYg0+JyQoytLQas4dA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/netbsd-arm64": {
|
||||
"version": "0.27.3",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.27.3.tgz",
|
||||
"integrity": "sha512-sDpk0RgmTCR/5HguIZa9n9u+HVKf40fbEUt+iTzSnCaGvY9kFP0YKBWZtJaraonFnqef5SlJ8/TiPAxzyS+UoA==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"netbsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/netbsd-x64": {
|
||||
"version": "0.27.3",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.27.3.tgz",
|
||||
"integrity": "sha512-P14lFKJl/DdaE00LItAukUdZO5iqNH7+PjoBm+fLQjtxfcfFE20Xf5CrLsmZdq5LFFZzb5JMZ9grUwvtVYzjiA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"netbsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/openbsd-arm64": {
|
||||
"version": "0.27.3",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.27.3.tgz",
|
||||
"integrity": "sha512-AIcMP77AvirGbRl/UZFTq5hjXK+2wC7qFRGoHSDrZ5v5b8DK/GYpXW3CPRL53NkvDqb9D+alBiC/dV0Fb7eJcw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"openbsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/openbsd-x64": {
|
||||
"version": "0.27.3",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.27.3.tgz",
|
||||
"integrity": "sha512-DnW2sRrBzA+YnE70LKqnM3P+z8vehfJWHXECbwBmH/CU51z6FiqTQTHFenPlHmo3a8UgpLyH3PT+87OViOh1AQ==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"openbsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/openharmony-arm64": {
|
||||
"version": "0.27.3",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/openharmony-arm64/-/openharmony-arm64-0.27.3.tgz",
|
||||
"integrity": "sha512-NinAEgr/etERPTsZJ7aEZQvvg/A6IsZG/LgZy+81wON2huV7SrK3e63dU0XhyZP4RKGyTm7aOgmQk0bGp0fy2g==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"openharmony"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/sunos-x64": {
|
||||
"version": "0.27.3",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.27.3.tgz",
|
||||
"integrity": "sha512-PanZ+nEz+eWoBJ8/f8HKxTTD172SKwdXebZ0ndd953gt1HRBbhMsaNqjTyYLGLPdoWHy4zLU7bDVJztF5f3BHA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"sunos"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/win32-arm64": {
|
||||
"version": "0.27.3",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.27.3.tgz",
|
||||
"integrity": "sha512-B2t59lWWYrbRDw/tjiWOuzSsFh1Y/E95ofKz7rIVYSQkUYBjfSgf6oeYPNWHToFRr2zx52JKApIcAS/D5TUBnA==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/win32-ia32": {
|
||||
"version": "0.27.3",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.27.3.tgz",
|
||||
"integrity": "sha512-QLKSFeXNS8+tHW7tZpMtjlNb7HKau0QDpwm49u0vUp9y1WOF+PEzkU84y9GqYaAVW8aH8f3GcBck26jh54cX4Q==",
|
||||
"cpu": [
|
||||
"ia32"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/win32-x64": {
|
||||
"version": "0.27.3",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.27.3.tgz",
|
||||
"integrity": "sha512-4uJGhsxuptu3OcpVAzli+/gWusVGwZZHTlS63hh++ehExkVT8SgiEf7/uC/PclrPPkLhZqGgCTjd0VWLo6xMqA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/node": {
|
||||
"version": "24.11.0",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-24.11.0.tgz",
|
||||
"integrity": "sha512-fPxQqz4VTgPI/IQ+lj9r0h+fDR66bzoeMGHp8ASee+32OSGIkeASsoZuJixsQoVef1QJbeubcPBxKk22QVoWdw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"undici-types": "~7.16.0"
|
||||
}
|
||||
},
|
||||
"node_modules/esbuild": {
|
||||
"version": "0.27.3",
|
||||
"resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.27.3.tgz",
|
||||
"integrity": "sha512-8VwMnyGCONIs6cWue2IdpHxHnAjzxnw2Zr7MkVxB2vjmQ2ivqGFb4LEG3SMnv0Gb2F/G/2yA8zUaiL1gywDCCg==",
|
||||
"dev": true,
|
||||
"hasInstallScript": true,
|
||||
"license": "MIT",
|
||||
"bin": {
|
||||
"esbuild": "bin/esbuild"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@esbuild/aix-ppc64": "0.27.3",
|
||||
"@esbuild/android-arm": "0.27.3",
|
||||
"@esbuild/android-arm64": "0.27.3",
|
||||
"@esbuild/android-x64": "0.27.3",
|
||||
"@esbuild/darwin-arm64": "0.27.3",
|
||||
"@esbuild/darwin-x64": "0.27.3",
|
||||
"@esbuild/freebsd-arm64": "0.27.3",
|
||||
"@esbuild/freebsd-x64": "0.27.3",
|
||||
"@esbuild/linux-arm": "0.27.3",
|
||||
"@esbuild/linux-arm64": "0.27.3",
|
||||
"@esbuild/linux-ia32": "0.27.3",
|
||||
"@esbuild/linux-loong64": "0.27.3",
|
||||
"@esbuild/linux-mips64el": "0.27.3",
|
||||
"@esbuild/linux-ppc64": "0.27.3",
|
||||
"@esbuild/linux-riscv64": "0.27.3",
|
||||
"@esbuild/linux-s390x": "0.27.3",
|
||||
"@esbuild/linux-x64": "0.27.3",
|
||||
"@esbuild/netbsd-arm64": "0.27.3",
|
||||
"@esbuild/netbsd-x64": "0.27.3",
|
||||
"@esbuild/openbsd-arm64": "0.27.3",
|
||||
"@esbuild/openbsd-x64": "0.27.3",
|
||||
"@esbuild/openharmony-arm64": "0.27.3",
|
||||
"@esbuild/sunos-x64": "0.27.3",
|
||||
"@esbuild/win32-arm64": "0.27.3",
|
||||
"@esbuild/win32-ia32": "0.27.3",
|
||||
"@esbuild/win32-x64": "0.27.3"
|
||||
}
|
||||
},
|
||||
"node_modules/fsevents": {
|
||||
"version": "2.3.3",
|
||||
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
|
||||
"integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
|
||||
"dev": true,
|
||||
"hasInstallScript": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": "^8.16.0 || ^10.6.0 || >=11.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/get-tsconfig": {
|
||||
"version": "4.13.6",
|
||||
"resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.13.6.tgz",
|
||||
"integrity": "sha512-shZT/QMiSHc/YBLxxOkMtgSid5HFoauqCE3/exfsEcwg1WkeqjG+V40yBbBrsD+jW2HDXcs28xOfcbm2jI8Ddw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"resolve-pkg-maps": "^1.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/privatenumber/get-tsconfig?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/resolve-pkg-maps": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz",
|
||||
"integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"funding": {
|
||||
"url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/tsx": {
|
||||
"version": "4.21.0",
|
||||
"resolved": "https://registry.npmjs.org/tsx/-/tsx-4.21.0.tgz",
|
||||
"integrity": "sha512-5C1sg4USs1lfG0GFb2RLXsdpXqBSEhAaA/0kPL01wxzpMqLILNxIxIOKiILz+cdg/pLnOUxFYOR5yhHU666wbw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"esbuild": "~0.27.0",
|
||||
"get-tsconfig": "^4.7.5"
|
||||
},
|
||||
"bin": {
|
||||
"tsx": "dist/cli.mjs"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"fsevents": "~2.3.3"
|
||||
}
|
||||
},
|
||||
"node_modules/typescript": {
|
||||
"version": "5.9.3",
|
||||
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz",
|
||||
"integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"bin": {
|
||||
"tsc": "bin/tsc",
|
||||
"tsserver": "bin/tsserver"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=14.17"
|
||||
}
|
||||
},
|
||||
"node_modules/undici-types": {
|
||||
"version": "7.16.0",
|
||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz",
|
||||
"integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
}
|
||||
}
|
||||
}
|
||||
31
skills/package.json
Normal file
31
skills/package.json
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
{
|
||||
"name": "plano-agent-skills",
|
||||
"version": "1.0.0",
|
||||
"description": "Best practices for building agents and agentic applications with Plano — installable via npx skills add",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"typecheck": "tsc --noEmit",
|
||||
"build": "tsx src/build.ts",
|
||||
"validate": "tsx src/validate.ts",
|
||||
"extract-tests": "tsx src/extract-tests.ts",
|
||||
"dev": "npm run typecheck && npm run validate && npm run build && npm run extract-tests"
|
||||
},
|
||||
"keywords": [
|
||||
"plano",
|
||||
"archgw",
|
||||
"ai-gateway",
|
||||
"agent",
|
||||
"llm",
|
||||
"skills",
|
||||
"best-practices"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^24.3.0",
|
||||
"tsx": "^4.20.5",
|
||||
"typescript": "^5.9.2"
|
||||
}
|
||||
}
|
||||
32
skills/plano-advanced-patterns/SKILL.md
Normal file
32
skills/plano-advanced-patterns/SKILL.md
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
---
|
||||
name: plano-advanced-patterns
|
||||
description: Design advanced Plano architectures. Use for multi-listener systems, prompt target schema quality, and layered orchestration patterns.
|
||||
license: Apache-2.0
|
||||
metadata:
|
||||
author: katanemo
|
||||
version: "1.0.0"
|
||||
---
|
||||
|
||||
# Plano Advanced Patterns
|
||||
|
||||
Use this skill for higher-order architecture decisions once fundamentals are stable.
|
||||
|
||||
## When To Use
|
||||
|
||||
- "Design a multi-listener Plano architecture"
|
||||
- "Improve prompt target schema precision"
|
||||
- "Combine model, prompt, and agent listeners"
|
||||
- "Refine advanced routing/function-calling behavior"
|
||||
|
||||
## Apply These Rules
|
||||
|
||||
- `advanced-multi-listener`
|
||||
- `advanced-prompt-targets`
|
||||
|
||||
## Execution Checklist
|
||||
|
||||
1. Use multiple listeners only when interfaces are truly distinct.
|
||||
2. Keep provider/routing definitions shared and consistent.
|
||||
3. Define prompt target parameters with strict, explicit schemas.
|
||||
4. Minimize ambiguity that causes malformed tool calls.
|
||||
5. Provide migration-safe recommendations and test scenarios.
|
||||
32
skills/plano-agent-orchestration/SKILL.md
Normal file
32
skills/plano-agent-orchestration/SKILL.md
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
---
|
||||
name: plano-agent-orchestration
|
||||
description: Improve multi-agent orchestration in Plano. Use for agent registration, agent listener wiring, and capability-focused agent descriptions for accurate routing.
|
||||
license: Apache-2.0
|
||||
metadata:
|
||||
author: katanemo
|
||||
version: "1.0.0"
|
||||
---
|
||||
|
||||
# Plano Agent Orchestration
|
||||
|
||||
Use this skill for agent listener quality, sub-agent registration, and route accuracy.
|
||||
|
||||
## When To Use
|
||||
|
||||
- "Fix multi-agent routing"
|
||||
- "Validate agents vs listeners.agents config"
|
||||
- "Improve agent descriptions"
|
||||
- "Set up a reliable orchestrator"
|
||||
|
||||
## Apply These Rules
|
||||
|
||||
- `agent-orchestration`
|
||||
- `agent-descriptions`
|
||||
|
||||
## Execution Checklist
|
||||
|
||||
1. Verify each agent exists in both `agents` and `listeners[].agents`.
|
||||
2. Ensure one fallback/default agent where appropriate.
|
||||
3. Rewrite descriptions to be capability-focused and non-overlapping.
|
||||
4. Keep descriptions specific, concise, and example-driven.
|
||||
5. Provide test prompts to validate routing outcomes.
|
||||
53
skills/plano-agent-skills/SKILL.md
Normal file
53
skills/plano-agent-skills/SKILL.md
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
---
|
||||
name: plano-agent-skills
|
||||
description: Best practices for building agents and agentic applications with Plano, including configuration, routing, orchestration, guardrails, observability, and deployment.
|
||||
license: Apache-2.0
|
||||
metadata:
|
||||
author: katanemo
|
||||
version: "1.0.0"
|
||||
---
|
||||
|
||||
# Plano Agent Skills
|
||||
|
||||
Comprehensive Plano guidance for coding agents. Use this umbrella skill when a task spans multiple areas (config, routing, orchestration, filters, observability, CLI, deployment).
|
||||
|
||||
## When To Use
|
||||
|
||||
- Validating or fixing Plano `config.yaml`
|
||||
- Designing listener architecture (`model`, `prompt`, `agent`)
|
||||
- Improving model/provider routing quality and fallback behavior
|
||||
- Hardening filter chains and prompt guardrails
|
||||
- Debugging routing with traces and CLI workflows
|
||||
- Preparing deployment and production readiness checks
|
||||
|
||||
## How To Use
|
||||
|
||||
1. Classify the request by scope (single section vs. cross-cutting).
|
||||
2. For focused work, prefer a section-specific skill (for example `plano-routing-model-selection`).
|
||||
3. For broad work, apply this umbrella skill and reference section rules from `skills/AGENTS.md`.
|
||||
4. Produce concrete edits first, then concise reasoning and validation steps.
|
||||
|
||||
## Operating Workflow
|
||||
|
||||
1. Identify the task area first: config, routing, orchestration, filters, observability, CLI, or deployment.
|
||||
2. Apply the smallest correct change that satisfies the requested behavior.
|
||||
3. Preserve security and reliability defaults:
|
||||
- `version: v0.3.0`
|
||||
- exactly one `default: true` model provider
|
||||
- secrets via `$ENV_VAR` substitution only
|
||||
- `host.docker.internal` for host services from inside Docker
|
||||
- guardrails before enrichment in filter chains
|
||||
4. For debugging, prioritize traces over guesswork (`planoai up --with-tracing`, `planoai trace`).
|
||||
5. Return concrete diffs and a short validation checklist.
|
||||
|
||||
## Response Style
|
||||
|
||||
- Prefer actionable edits over generic advice.
|
||||
- Be explicit about why a config choice is correct.
|
||||
- Call out risky patterns (hardcoded secrets, missing default provider, bad filter ordering).
|
||||
- Keep examples minimal and production-viable.
|
||||
|
||||
## References
|
||||
|
||||
- Repo: https://github.com/katanemo/plano
|
||||
- Full rulebook: `skills/AGENTS.md`
|
||||
34
skills/plano-cli-operations/SKILL.md
Normal file
34
skills/plano-cli-operations/SKILL.md
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
---
|
||||
name: plano-cli-operations
|
||||
description: Apply Plano CLI best practices. Use for startup troubleshooting, cli_agent workflows, prompt target generation, and template-based project bootstrapping.
|
||||
license: Apache-2.0
|
||||
metadata:
|
||||
author: katanemo
|
||||
version: "1.0.0"
|
||||
---
|
||||
|
||||
# Plano CLI Operations
|
||||
|
||||
Use this skill when the task is primarily operational and CLI-driven.
|
||||
|
||||
## When To Use
|
||||
|
||||
- "Fix `planoai up` failures"
|
||||
- "Use `planoai cli_agent` with coding agents"
|
||||
- "Generate prompt targets from Python functions"
|
||||
- "Bootstrap a project with `planoai init` templates"
|
||||
|
||||
## Apply These Rules
|
||||
|
||||
- `cli-startup`
|
||||
- `cli-agent`
|
||||
- `cli-generate`
|
||||
- `cli-init`
|
||||
|
||||
## Execution Checklist
|
||||
|
||||
1. Follow startup validation order before deep debugging.
|
||||
2. Use `cli_agent` to route coding-agent traffic through Plano.
|
||||
3. Generate prompt target schema, then wire endpoint details explicitly.
|
||||
4. Start from templates for reliable first-time setup.
|
||||
5. Provide a compact runbook with exact CLI commands.
|
||||
34
skills/plano-config-fundamentals/SKILL.md
Normal file
34
skills/plano-config-fundamentals/SKILL.md
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
---
|
||||
name: plano-config-fundamentals
|
||||
description: Validate and fix Plano config fundamentals. Use for config versioning, listener types, provider registration, secrets handling, and startup validation failures.
|
||||
license: Apache-2.0
|
||||
metadata:
|
||||
author: katanemo
|
||||
version: "1.0.0"
|
||||
---
|
||||
|
||||
# Plano Configuration Fundamentals
|
||||
|
||||
Use this skill for foundational `config.yaml` correctness.
|
||||
|
||||
## When To Use
|
||||
|
||||
- "Validate this Plano config"
|
||||
- "Fix startup config errors"
|
||||
- "Check listeners/providers/secrets"
|
||||
- "Why does `planoai up` fail schema validation?"
|
||||
|
||||
## Apply These Rules
|
||||
|
||||
- `config-version`
|
||||
- `config-listeners`
|
||||
- `config-providers`
|
||||
- `config-secrets`
|
||||
|
||||
## Execution Checklist
|
||||
|
||||
1. Ensure `version: v0.3.0` is present.
|
||||
2. Confirm listener type matches intended architecture.
|
||||
3. Verify provider names/interfaces and exactly one default provider.
|
||||
4. Replace hardcoded secrets with `$ENV_VAR` substitution.
|
||||
5. Return minimal patch and a `planoai up` verification plan.
|
||||
33
skills/plano-deployment-security/SKILL.md
Normal file
33
skills/plano-deployment-security/SKILL.md
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
---
|
||||
name: plano-deployment-security
|
||||
description: Apply Plano deployment and production security practices. Use for Docker networking, state storage choices, readiness checks, and environment-based secret handling.
|
||||
license: Apache-2.0
|
||||
metadata:
|
||||
author: katanemo
|
||||
version: "1.0.0"
|
||||
---
|
||||
|
||||
# Plano Deployment and Security
|
||||
|
||||
Use this skill to harden production deployments and reduce runtime surprises.
|
||||
|
||||
## When To Use
|
||||
|
||||
- "Fix unreachable agents in Docker"
|
||||
- "Configure persistent conversation state"
|
||||
- "Add readiness and health checks"
|
||||
- "Prepare production deployment checklist"
|
||||
|
||||
## Apply These Rules
|
||||
|
||||
- `deploy-docker`
|
||||
- `deploy-state`
|
||||
- `deploy-health`
|
||||
|
||||
## Execution Checklist
|
||||
|
||||
1. Use `host.docker.internal` for host-side services from inside Plano container.
|
||||
2. Prefer PostgreSQL state storage for production multi-turn workloads.
|
||||
3. Verify `/healthz` before traffic or CI assertions.
|
||||
4. Ensure secrets remain environment-based, never hardcoded.
|
||||
5. Return deployment checks with failure-mode diagnostics.
|
||||
33
skills/plano-filter-guardrails/SKILL.md
Normal file
33
skills/plano-filter-guardrails/SKILL.md
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
---
|
||||
name: plano-filter-guardrails
|
||||
description: Harden Plano filter chains and guardrails. Use for MCP filter setup, prompt guard responses, and safe filter ordering.
|
||||
license: Apache-2.0
|
||||
metadata:
|
||||
author: katanemo
|
||||
version: "1.0.0"
|
||||
---
|
||||
|
||||
# Plano Filter Chains and Guardrails
|
||||
|
||||
Use this skill when safety controls or filter pipelines need correction.
|
||||
|
||||
## When To Use
|
||||
|
||||
- "Fix filter chain ordering"
|
||||
- "Set up MCP filters correctly"
|
||||
- "Improve guardrail rejection behavior"
|
||||
- "Harden request processing for safety"
|
||||
|
||||
## Apply These Rules
|
||||
|
||||
- `filter-mcp`
|
||||
- `filter-guardrails`
|
||||
- `filter-ordering`
|
||||
|
||||
## Execution Checklist
|
||||
|
||||
1. Configure filter `type`, `transport`, and `tool` explicitly for MCP.
|
||||
2. Ensure rejection messages are clear and actionable.
|
||||
3. Order chain as guards -> rewriters -> enrichment -> output checks.
|
||||
4. Prevent expensive enrichment on unsafe requests.
|
||||
5. Verify with representative blocked and allowed test prompts.
|
||||
33
skills/plano-observability-debugging/SKILL.md
Normal file
33
skills/plano-observability-debugging/SKILL.md
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
---
|
||||
name: plano-observability-debugging
|
||||
description: Improve Plano tracing and debugging workflows. Use for sampling strategy, span attributes, and trace query-based root-cause analysis.
|
||||
license: Apache-2.0
|
||||
metadata:
|
||||
author: katanemo
|
||||
version: "1.0.0"
|
||||
---
|
||||
|
||||
# Plano Observability and Debugging
|
||||
|
||||
Use this skill to make routing and latency behavior inspectable and debuggable.
|
||||
|
||||
## When To Use
|
||||
|
||||
- "Enable tracing correctly"
|
||||
- "Add useful span attributes"
|
||||
- "Debug why a request routed incorrectly"
|
||||
- "Inspect filter/model latency from traces"
|
||||
|
||||
## Apply These Rules
|
||||
|
||||
- `observe-tracing`
|
||||
- `observe-span-attributes`
|
||||
- `observe-trace-query`
|
||||
|
||||
## Execution Checklist
|
||||
|
||||
1. Enable tracing with environment-appropriate sampling.
|
||||
2. Add useful static and header-derived span attributes.
|
||||
3. Use `planoai trace` filters to isolate route and latency issues.
|
||||
4. Prefer trace evidence over assumptions in recommendations.
|
||||
5. Return exact commands to reproduce and validate findings.
|
||||
34
skills/plano-routing-model-selection/SKILL.md
Normal file
34
skills/plano-routing-model-selection/SKILL.md
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
---
|
||||
name: plano-routing-model-selection
|
||||
description: Optimize Plano model routing and selection. Use for provider defaults, model aliases, passthrough auth, and routing preference quality.
|
||||
license: Apache-2.0
|
||||
metadata:
|
||||
author: katanemo
|
||||
version: "1.0.0"
|
||||
---
|
||||
|
||||
# Plano Routing and Model Selection
|
||||
|
||||
Use this skill when requests are routed to the wrong model, costs are high, or fallback behavior is unclear.
|
||||
|
||||
## When To Use
|
||||
|
||||
- "Improve model routing"
|
||||
- "Add aliases and defaults"
|
||||
- "Fix passthrough auth with proxy providers"
|
||||
- "Tune routing preferences for better classification"
|
||||
|
||||
## Apply These Rules
|
||||
|
||||
- `routing-default`
|
||||
- `routing-aliases`
|
||||
- `routing-passthrough`
|
||||
- `routing-preferences`
|
||||
|
||||
## Execution Checklist
|
||||
|
||||
1. Ensure exactly one `default: true` provider.
|
||||
2. Add semantic aliases for stable client contracts.
|
||||
3. Configure passthrough auth only where required.
|
||||
4. Rewrite vague preference descriptions with concrete task scopes.
|
||||
5. Validate routing behavior using trace-based checks.
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue