diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 9e8d3223..0882479d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -133,13 +133,13 @@ jobs:
load: true
tags: |
${{ env.PLANO_DOCKER_IMAGE }}
- ${{ env.DOCKER_IMAGE }}:0.4.19
+ ${{ env.DOCKER_IMAGE }}:0.4.21
${{ env.DOCKER_IMAGE }}:latest
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Save image as artifact
- run: docker save ${{ env.PLANO_DOCKER_IMAGE }} ${{ env.DOCKER_IMAGE }}:0.4.19 ${{ env.DOCKER_IMAGE }}:latest -o /tmp/plano-image.tar
+ run: docker save ${{ env.PLANO_DOCKER_IMAGE }} ${{ env.DOCKER_IMAGE }}:0.4.21 ${{ env.DOCKER_IMAGE }}:latest -o /tmp/plano-image.tar
- name: Upload image artifact
uses: actions/upload-artifact@v6
diff --git a/apps/www/src/components/Hero.tsx b/apps/www/src/components/Hero.tsx
index 05e615b9..aa9a2298 100644
--- a/apps/www/src/components/Hero.tsx
+++ b/apps/www/src/components/Hero.tsx
@@ -24,7 +24,7 @@ export function Hero() {
>
- v0.4.19
+ v0.4.21
—
diff --git a/build_filter_image.sh b/build_filter_image.sh
index 73e51b61..a0dd2498 100644
--- a/build_filter_image.sh
+++ b/build_filter_image.sh
@@ -1 +1 @@
-docker build -f Dockerfile . -t katanemo/plano -t katanemo/plano:0.4.19
+docker build -f Dockerfile . -t katanemo/plano -t katanemo/plano:0.4.21
diff --git a/cli/planoai/__init__.py b/cli/planoai/__init__.py
index 2492d40c..ac0015d7 100644
--- a/cli/planoai/__init__.py
+++ b/cli/planoai/__init__.py
@@ -1,3 +1,3 @@
"""Plano CLI - Intelligent Prompt Gateway."""
-__version__ = "0.4.19"
+__version__ = "0.4.21"
diff --git a/cli/planoai/chatgpt_auth.py b/cli/planoai/chatgpt_auth.py
new file mode 100644
index 00000000..dbbde3ac
--- /dev/null
+++ b/cli/planoai/chatgpt_auth.py
@@ -0,0 +1,290 @@
+"""
+ChatGPT subscription OAuth device-flow authentication.
+
+Implements the device code flow used by OpenAI Codex CLI to authenticate
+with a ChatGPT Plus/Pro subscription. Tokens are stored locally in
+~/.plano/chatgpt/auth.json and auto-refreshed when expired.
+"""
+
+import base64
+import json
+import os
+import time
+from typing import Any, Dict, Optional, Tuple
+
+import requests
+
+from planoai.consts import PLANO_HOME
+
+# OAuth + API constants (derived from openai/codex)
+CHATGPT_AUTH_BASE = "https://auth.openai.com"
+CHATGPT_DEVICE_CODE_URL = f"{CHATGPT_AUTH_BASE}/api/accounts/deviceauth/usercode"
+CHATGPT_DEVICE_TOKEN_URL = f"{CHATGPT_AUTH_BASE}/api/accounts/deviceauth/token"
+CHATGPT_OAUTH_TOKEN_URL = f"{CHATGPT_AUTH_BASE}/oauth/token"
+CHATGPT_DEVICE_VERIFY_URL = f"{CHATGPT_AUTH_BASE}/codex/device"
+CHATGPT_API_BASE = "https://chatgpt.com/backend-api/codex"
+CHATGPT_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann"
+
+# Local storage
+CHATGPT_AUTH_DIR = os.path.join(PLANO_HOME, "chatgpt")
+CHATGPT_AUTH_FILE = os.path.join(CHATGPT_AUTH_DIR, "auth.json")
+
+# Timeouts
+TOKEN_EXPIRY_SKEW_SECONDS = 60
+DEVICE_CODE_TIMEOUT_SECONDS = 15 * 60
+DEVICE_CODE_POLL_SECONDS = 5
+
+
+def _ensure_auth_dir():
+ os.makedirs(CHATGPT_AUTH_DIR, exist_ok=True)
+
+
+def load_auth() -> Optional[Dict[str, Any]]:
+ """Load auth data from disk."""
+ try:
+ with open(CHATGPT_AUTH_FILE, "r") as f:
+ return json.load(f)
+ except (IOError, json.JSONDecodeError):
+ return None
+
+
+def save_auth(data: Dict[str, Any]):
+ """Save auth data to disk."""
+ _ensure_auth_dir()
+ fd = os.open(CHATGPT_AUTH_FILE, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
+ with os.fdopen(fd, "w") as f:
+ json.dump(data, f, indent=2)
+
+
+def delete_auth():
+ """Remove stored credentials."""
+ try:
+ os.remove(CHATGPT_AUTH_FILE)
+ except FileNotFoundError:
+ pass
+
+
+def _decode_jwt_claims(token: str) -> Dict[str, Any]:
+ """Decode JWT payload without verification."""
+ try:
+ parts = token.split(".")
+ if len(parts) < 2:
+ return {}
+ payload_b64 = parts[1]
+ payload_b64 += "=" * (-len(payload_b64) % 4)
+ return json.loads(base64.urlsafe_b64decode(payload_b64).decode("utf-8"))
+ except Exception:
+ return {}
+
+
+def _get_expires_at(token: str) -> Optional[int]:
+ """Extract expiration time from JWT."""
+ claims = _decode_jwt_claims(token)
+ exp = claims.get("exp")
+ return int(exp) if isinstance(exp, (int, float)) else None
+
+
+def _extract_account_id(token: Optional[str]) -> Optional[str]:
+ """Extract ChatGPT account ID from JWT claims."""
+ if not token:
+ return None
+ claims = _decode_jwt_claims(token)
+ auth_claims = claims.get("https://api.openai.com/auth")
+ if isinstance(auth_claims, dict):
+ account_id = auth_claims.get("chatgpt_account_id")
+ if isinstance(account_id, str) and account_id:
+ return account_id
+ return None
+
+
+def _is_token_expired(auth_data: Dict[str, Any]) -> bool:
+ """Check if the access token is expired."""
+ expires_at = auth_data.get("expires_at")
+ if expires_at is None:
+ access_token = auth_data.get("access_token")
+ if access_token:
+ expires_at = _get_expires_at(access_token)
+ if expires_at:
+ auth_data["expires_at"] = expires_at
+ save_auth(auth_data)
+ if expires_at is None:
+ return True
+ return time.time() >= float(expires_at) - TOKEN_EXPIRY_SKEW_SECONDS
+
+
+def _refresh_tokens(refresh_token: str) -> Dict[str, str]:
+ """Refresh the access token using the refresh token."""
+ resp = requests.post(
+ CHATGPT_OAUTH_TOKEN_URL,
+ json={
+ "client_id": CHATGPT_CLIENT_ID,
+ "grant_type": "refresh_token",
+ "refresh_token": refresh_token,
+ "scope": "openid profile email",
+ },
+ )
+ resp.raise_for_status()
+ data = resp.json()
+
+ access_token = data.get("access_token")
+ id_token = data.get("id_token")
+ if not access_token or not id_token:
+ raise RuntimeError(f"Refresh response missing fields: {data}")
+
+ return {
+ "access_token": access_token,
+ "refresh_token": data.get("refresh_token", refresh_token),
+ "id_token": id_token,
+ }
+
+
+def _build_auth_record(tokens: Dict[str, str]) -> Dict[str, Any]:
+ """Build the auth record to persist."""
+ access_token = tokens.get("access_token")
+ id_token = tokens.get("id_token")
+ expires_at = _get_expires_at(access_token) if access_token else None
+ account_id = _extract_account_id(id_token or access_token)
+ return {
+ "access_token": access_token,
+ "refresh_token": tokens.get("refresh_token"),
+ "id_token": id_token,
+ "expires_at": expires_at,
+ "account_id": account_id,
+ }
+
+
+def request_device_code() -> Dict[str, str]:
+ """Request a device code from OpenAI's device auth endpoint."""
+ resp = requests.post(
+ CHATGPT_DEVICE_CODE_URL,
+ json={"client_id": CHATGPT_CLIENT_ID},
+ )
+ resp.raise_for_status()
+ data = resp.json()
+
+ device_auth_id = data.get("device_auth_id")
+ user_code = data.get("user_code") or data.get("usercode")
+ interval = data.get("interval")
+ if not device_auth_id or not user_code:
+ raise RuntimeError(f"Device code response missing fields: {data}")
+
+ return {
+ "device_auth_id": device_auth_id,
+ "user_code": user_code,
+ "interval": str(interval or "5"),
+ }
+
+
+def poll_for_authorization(device_code: Dict[str, str]) -> Dict[str, str]:
+ """Poll until the user completes authorization. Returns code_data."""
+ interval = int(device_code.get("interval", "5"))
+ start_time = time.time()
+
+ while time.time() - start_time < DEVICE_CODE_TIMEOUT_SECONDS:
+ try:
+ resp = requests.post(
+ CHATGPT_DEVICE_TOKEN_URL,
+ json={
+ "device_auth_id": device_code["device_auth_id"],
+ "user_code": device_code["user_code"],
+ },
+ )
+ if resp.status_code == 200:
+ data = resp.json()
+ if all(
+ key in data
+ for key in ("authorization_code", "code_challenge", "code_verifier")
+ ):
+ return data
+ if resp.status_code in (403, 404):
+ time.sleep(max(interval, DEVICE_CODE_POLL_SECONDS))
+ continue
+ resp.raise_for_status()
+ except requests.HTTPError as exc:
+ if exc.response is not None and exc.response.status_code in (403, 404):
+ time.sleep(max(interval, DEVICE_CODE_POLL_SECONDS))
+ continue
+ raise RuntimeError(f"Polling failed: {exc}") from exc
+
+ time.sleep(max(interval, DEVICE_CODE_POLL_SECONDS))
+
+ raise RuntimeError("Timed out waiting for device authorization")
+
+
+def exchange_code_for_tokens(code_data: Dict[str, str]) -> Dict[str, str]:
+ """Exchange the authorization code for access/refresh/id tokens."""
+ redirect_uri = f"{CHATGPT_AUTH_BASE}/deviceauth/callback"
+ body = (
+ "grant_type=authorization_code"
+ f"&code={code_data['authorization_code']}"
+ f"&redirect_uri={redirect_uri}"
+ f"&client_id={CHATGPT_CLIENT_ID}"
+ f"&code_verifier={code_data['code_verifier']}"
+ )
+ resp = requests.post(
+ CHATGPT_OAUTH_TOKEN_URL,
+ headers={"Content-Type": "application/x-www-form-urlencoded"},
+ data=body,
+ )
+ resp.raise_for_status()
+ data = resp.json()
+
+ if not all(key in data for key in ("access_token", "refresh_token", "id_token")):
+ raise RuntimeError(f"Token exchange response missing fields: {data}")
+
+ return {
+ "access_token": data["access_token"],
+ "refresh_token": data["refresh_token"],
+ "id_token": data["id_token"],
+ }
+
+
+def login() -> Dict[str, Any]:
+ """Run the full device code login flow. Returns the auth record."""
+ device_code = request_device_code()
+ auth_record = _build_auth_record({})
+ auth_record["device_code_requested_at"] = time.time()
+ save_auth(auth_record)
+
+ print(
+ "\nSign in with your ChatGPT account:\n"
+ f" 1) Visit: {CHATGPT_DEVICE_VERIFY_URL}\n"
+ f" 2) Enter code: {device_code['user_code']}\n\n"
+ "Device codes are a common phishing target. Never share this code.\n",
+ flush=True,
+ )
+
+ code_data = poll_for_authorization(device_code)
+ tokens = exchange_code_for_tokens(code_data)
+ auth_record = _build_auth_record(tokens)
+ save_auth(auth_record)
+ return auth_record
+
+
+def get_access_token() -> Tuple[str, Optional[str]]:
+ """
+ Get a valid access token and account ID.
+ Refreshes automatically if expired. Raises if no auth data exists.
+ Returns (access_token, account_id).
+ """
+ auth_data = load_auth()
+ if not auth_data:
+ raise RuntimeError(
+ "No ChatGPT credentials found. Run 'planoai chatgpt login' first."
+ )
+
+ access_token = auth_data.get("access_token")
+ if access_token and not _is_token_expired(auth_data):
+ return access_token, auth_data.get("account_id")
+
+ # Try refresh
+ refresh_token = auth_data.get("refresh_token")
+ if refresh_token:
+ tokens = _refresh_tokens(refresh_token)
+ auth_record = _build_auth_record(tokens)
+ save_auth(auth_record)
+ return auth_record["access_token"], auth_record.get("account_id")
+
+ raise RuntimeError(
+ "ChatGPT token expired and refresh failed. Run 'planoai chatgpt login' again."
+ )
diff --git a/cli/planoai/chatgpt_cmd.py b/cli/planoai/chatgpt_cmd.py
new file mode 100644
index 00000000..b61068c4
--- /dev/null
+++ b/cli/planoai/chatgpt_cmd.py
@@ -0,0 +1,86 @@
+"""
+CLI commands for ChatGPT subscription management.
+
+Usage:
+ planoai chatgpt login - Authenticate with ChatGPT via device code flow
+ planoai chatgpt status - Check authentication status
+ planoai chatgpt logout - Remove stored credentials
+"""
+
+import datetime
+
+import click
+from rich.console import Console
+
+from planoai import chatgpt_auth
+
+console = Console()
+
+
+@click.group()
+def chatgpt():
+ """ChatGPT subscription management."""
+ pass
+
+
+@chatgpt.command()
+def login():
+ """Authenticate with your ChatGPT subscription using device code flow."""
+ try:
+ auth_record = chatgpt_auth.login()
+ account_id = auth_record.get("account_id", "unknown")
+ console.print(
+ f"\n[green]Successfully authenticated with ChatGPT![/green]"
+ f"\nAccount ID: {account_id}"
+ f"\nCredentials saved to: {chatgpt_auth.CHATGPT_AUTH_FILE}"
+ )
+ except Exception as e:
+ console.print(f"\n[red]Authentication failed:[/red] {e}")
+ raise SystemExit(1)
+
+
+@chatgpt.command()
+def status():
+ """Check ChatGPT authentication status."""
+ auth_data = chatgpt_auth.load_auth()
+ if not auth_data or not auth_data.get("access_token"):
+ console.print(
+ "[yellow]Not authenticated.[/yellow] Run 'planoai chatgpt login'."
+ )
+ return
+
+ account_id = auth_data.get("account_id", "unknown")
+ expires_at = auth_data.get("expires_at")
+
+ if expires_at:
+ expiry_time = datetime.datetime.fromtimestamp(
+ expires_at, tz=datetime.timezone.utc
+ )
+ now = datetime.datetime.now(tz=datetime.timezone.utc)
+ if expiry_time > now:
+ remaining = expiry_time - now
+ console.print(
+ f"[green]Authenticated[/green]"
+ f"\n Account ID: {account_id}"
+ f"\n Token expires: {expiry_time.strftime('%Y-%m-%d %H:%M:%S UTC')}"
+ f" ({remaining.seconds // 60}m remaining)"
+ )
+ else:
+ console.print(
+ f"[yellow]Token expired[/yellow]"
+ f"\n Account ID: {account_id}"
+ f"\n Expired at: {expiry_time.strftime('%Y-%m-%d %H:%M:%S UTC')}"
+ f"\n Will auto-refresh on next use, or run 'planoai chatgpt login'."
+ )
+ else:
+ console.print(
+ f"[green]Authenticated[/green] (no expiry info)"
+ f"\n Account ID: {account_id}"
+ )
+
+
+@chatgpt.command()
+def logout():
+ """Remove stored ChatGPT credentials."""
+ chatgpt_auth.delete_auth()
+ console.print("[green]ChatGPT credentials removed.[/green]")
diff --git a/cli/planoai/config_generator.py b/cli/planoai/config_generator.py
index 5a3d4f63..cb07767e 100644
--- a/cli/planoai/config_generator.py
+++ b/cli/planoai/config_generator.py
@@ -1,5 +1,6 @@
import json
import os
+import uuid
from planoai.utils import convert_legacy_listeners
from jinja2 import Environment, FileSystemLoader
import yaml
@@ -28,8 +29,16 @@ SUPPORTED_PROVIDERS_WITHOUT_BASE_URL = [
"xai",
"moonshotai",
"zhipu",
+ "chatgpt",
+ "digitalocean",
+ "vercel",
+ "openrouter",
]
+CHATGPT_API_BASE = "https://chatgpt.com/backend-api/codex"
+CHATGPT_DEFAULT_ORIGINATOR = "codex_cli_rs"
+CHATGPT_DEFAULT_USER_AGENT = "codex_cli_rs/0.0.0 (Unknown 0; unknown) unknown"
+
SUPPORTED_PROVIDERS = (
SUPPORTED_PROVIDERS_WITHOUT_BASE_URL + SUPPORTED_PROVIDERS_WITH_BASE_URL
)
@@ -49,6 +58,110 @@ def get_endpoint_and_port(endpoint, protocol):
return endpoint, port
+def migrate_inline_routing_preferences(config_yaml):
+ """Lift v0.3.0-style inline ``routing_preferences`` under each
+ ``model_providers`` entry to the v0.4.0 top-level ``routing_preferences``
+ list with ``models: [...]``.
+
+ This function is a no-op for configs whose ``version`` is already
+ ``v0.4.0`` or newer — those are assumed to be on the canonical
+ top-level shape and are passed through untouched.
+
+ For older configs, the version is bumped to ``v0.4.0`` up front so
+ brightstaff's v0.4.0 gate for top-level ``routing_preferences``
+ accepts the rendered config, then inline preferences under each
+ provider are lifted into the top-level list. Preferences with the
+ same ``name`` across multiple providers are merged into a single
+ top-level entry whose ``models`` list contains every provider's
+ full ``/`` string in declaration order. The first
+ ``description`` encountered wins; conflicts are warned, not errored,
+ so existing v0.3.0 configs keep compiling. Any top-level preference
+ already defined by the user is preserved as-is.
+ """
+ current_version = str(config_yaml.get("version", ""))
+ if _version_tuple(current_version) >= (0, 4, 0):
+ return
+
+ config_yaml["version"] = "v0.4.0"
+
+ model_providers = config_yaml.get("model_providers") or []
+ if not model_providers:
+ return
+
+ migrated = {}
+ for model_provider in model_providers:
+ inline_prefs = model_provider.get("routing_preferences")
+ if not inline_prefs:
+ continue
+
+ full_model_name = model_provider.get("model")
+ if not full_model_name:
+ continue
+
+ if "/" in full_model_name and full_model_name.split("/")[-1].strip() == "*":
+ raise Exception(
+ f"Model {full_model_name} has routing_preferences but uses wildcard (*). Models with routing preferences cannot be wildcards."
+ )
+
+ for pref in inline_prefs:
+ name = pref.get("name")
+ description = pref.get("description", "")
+ if not name:
+ continue
+ if name in migrated:
+ entry = migrated[name]
+ if description and description != entry["description"]:
+ print(
+ f"WARNING: routing preference '{name}' has conflicting descriptions across providers; keeping the first one."
+ )
+ if full_model_name not in entry["models"]:
+ entry["models"].append(full_model_name)
+ else:
+ migrated[name] = {
+ "name": name,
+ "description": description,
+ "models": [full_model_name],
+ }
+
+ if not migrated:
+ return
+
+ for model_provider in model_providers:
+ if "routing_preferences" in model_provider:
+ del model_provider["routing_preferences"]
+
+ existing_top_level = config_yaml.get("routing_preferences") or []
+ existing_names = {entry.get("name") for entry in existing_top_level}
+ merged = list(existing_top_level)
+ for name, entry in migrated.items():
+ if name in existing_names:
+ continue
+ merged.append(entry)
+ config_yaml["routing_preferences"] = merged
+
+ print(
+ "WARNING: inline routing_preferences under model_providers is deprecated "
+ "and has been auto-migrated to top-level routing_preferences. Update your "
+ "config to v0.4.0 top-level form. See docs/routing-api.md"
+ )
+
+
+def _version_tuple(version_string):
+ stripped = version_string.strip().lstrip("vV")
+ if not stripped:
+ return (0, 0, 0)
+ parts = stripped.split("-", 1)[0].split(".")
+ out = []
+ for part in parts[:3]:
+ try:
+ out.append(int(part))
+ except ValueError:
+ out.append(0)
+ while len(out) < 3:
+ out.append(0)
+ return tuple(out)
+
+
def validate_and_render_schema():
ENVOY_CONFIG_TEMPLATE_FILE = os.getenv(
"ENVOY_CONFIG_TEMPLATE_FILE", "envoy.template.yaml"
@@ -92,6 +205,8 @@ def validate_and_render_schema():
config_yaml["model_providers"] = config_yaml["llm_providers"]
del config_yaml["llm_providers"]
+ migrate_inline_routing_preferences(config_yaml)
+
listeners, llm_gateway, prompt_gateway = convert_legacy_listeners(
config_yaml.get("listeners"), config_yaml.get("model_providers")
)
@@ -191,7 +306,16 @@ def validate_and_render_schema():
model_provider_name_set = set()
llms_with_usage = []
model_name_keys = set()
- model_usage_name_keys = set()
+
+ top_level_preferences = config_yaml.get("routing_preferences") or []
+ seen_pref_names = set()
+ for pref in top_level_preferences:
+ pref_name = pref.get("name")
+ if pref_name in seen_pref_names:
+ raise Exception(
+ f'Duplicate routing preference name "{pref_name}", please provide unique name for each routing preference'
+ )
+ seen_pref_names.add(pref_name)
print("listeners: ", listeners)
@@ -250,10 +374,6 @@ def validate_and_render_schema():
raise Exception(
f"Model {model_name} is configured as default but uses wildcard (*). Default models cannot be wildcards."
)
- if model_provider.get("routing_preferences"):
- raise Exception(
- f"Model {model_name} has routing_preferences but uses wildcard (*). Models with routing preferences cannot be wildcards."
- )
# Validate azure_openai and ollama provider requires base_url
if (provider in SUPPORTED_PROVIDERS_WITH_BASE_URL) and model_provider.get(
@@ -302,13 +422,6 @@ def validate_and_render_schema():
)
model_name_keys.add(model_id)
- for routing_preference in model_provider.get("routing_preferences", []):
- if routing_preference.get("name") in model_usage_name_keys:
- raise Exception(
- f'Duplicate routing preference name "{routing_preference.get("name")}", please provide unique name for each routing preference'
- )
- model_usage_name_keys.add(routing_preference.get("name"))
-
# Warn if both passthrough_auth and access_key are configured
if model_provider.get("passthrough_auth") and model_provider.get(
"access_key"
@@ -331,6 +444,25 @@ def validate_and_render_schema():
provider = model_provider["provider"]
model_provider["provider_interface"] = provider
del model_provider["provider"]
+
+ # Auto-wire ChatGPT provider: inject base_url, passthrough_auth, and extra headers
+ if provider == "chatgpt":
+ if not model_provider.get("base_url"):
+ model_provider["base_url"] = CHATGPT_API_BASE
+ if not model_provider.get("access_key") and not model_provider.get(
+ "passthrough_auth"
+ ):
+ model_provider["passthrough_auth"] = True
+ headers = model_provider.get("headers", {})
+ headers.setdefault(
+ "ChatGPT-Account-Id",
+ os.environ.get("CHATGPT_ACCOUNT_ID", ""),
+ )
+ headers.setdefault("originator", CHATGPT_DEFAULT_ORIGINATOR)
+ headers.setdefault("user-agent", CHATGPT_DEFAULT_USER_AGENT)
+ headers.setdefault("session_id", str(uuid.uuid4()))
+ model_provider["headers"] = headers
+
updated_model_providers.append(model_provider)
if model_provider.get("base_url", None):
@@ -377,7 +509,7 @@ def validate_and_render_schema():
router_model_id = (
router_model.split("/", 1)[1] if "/" in router_model else router_model
)
- if len(model_usage_name_keys) > 0 and router_model_id not in model_name_set:
+ if len(seen_pref_names) > 0 and router_model_id not in model_name_set:
updated_model_providers.append(
{
"name": "plano-orchestrator",
diff --git a/cli/planoai/consts.py b/cli/planoai/consts.py
index af76d7cf..fc7b6f1a 100644
--- a/cli/planoai/consts.py
+++ b/cli/planoai/consts.py
@@ -5,7 +5,7 @@ PLANO_COLOR = "#969FF4"
SERVICE_NAME_ARCHGW = "plano"
PLANO_DOCKER_NAME = "plano"
-PLANO_DOCKER_IMAGE = os.getenv("PLANO_DOCKER_IMAGE", "katanemo/plano:0.4.19")
+PLANO_DOCKER_IMAGE = os.getenv("PLANO_DOCKER_IMAGE", "katanemo/plano:0.4.21")
DEFAULT_OTEL_TRACING_GRPC_ENDPOINT = "http://localhost:4317"
# Native mode constants
diff --git a/cli/planoai/defaults.py b/cli/planoai/defaults.py
new file mode 100644
index 00000000..1d9468ff
--- /dev/null
+++ b/cli/planoai/defaults.py
@@ -0,0 +1,178 @@
+"""Default config synthesizer for zero-config ``planoai up``.
+
+When the user runs ``planoai up`` in a directory with no ``config.yaml`` /
+``plano_config.yaml``, we synthesize a pass-through config that covers the
+common LLM providers and auto-wires OTel export to ``localhost:4317`` so
+``planoai obs`` works out of the box.
+
+Auth handling:
+- If the provider's env var is set, bind ``access_key: $ENV_VAR``.
+- Otherwise set ``passthrough_auth: true`` so the client's own Authorization
+ header is forwarded. No env var is required to start the proxy.
+"""
+
+from __future__ import annotations
+
+import os
+from dataclasses import dataclass
+
+DEFAULT_LLM_LISTENER_PORT = 12000
+# plano_config validation requires an http:// scheme on the OTLP endpoint.
+DEFAULT_OTLP_ENDPOINT = "http://localhost:4317"
+
+
+@dataclass(frozen=True)
+class ProviderDefault:
+ name: str
+ env_var: str
+ base_url: str
+ model_pattern: str
+ # Only set for providers whose prefix in the model pattern is NOT one of the
+ # built-in SUPPORTED_PROVIDERS in cli/planoai/config_generator.py. For
+ # built-ins, the validator infers the interface from the model prefix and
+ # rejects configs that set this field explicitly.
+ provider_interface: str | None = None
+
+
+# Keep ordering stable so synthesized configs diff cleanly across runs.
+PROVIDER_DEFAULTS: list[ProviderDefault] = [
+ ProviderDefault(
+ name="openai",
+ env_var="OPENAI_API_KEY",
+ base_url="https://api.openai.com/v1",
+ model_pattern="openai/*",
+ ),
+ ProviderDefault(
+ name="anthropic",
+ env_var="ANTHROPIC_API_KEY",
+ base_url="https://api.anthropic.com/v1",
+ model_pattern="anthropic/*",
+ ),
+ ProviderDefault(
+ name="gemini",
+ env_var="GEMINI_API_KEY",
+ base_url="https://generativelanguage.googleapis.com/v1beta",
+ model_pattern="gemini/*",
+ ),
+ ProviderDefault(
+ name="groq",
+ env_var="GROQ_API_KEY",
+ base_url="https://api.groq.com/openai/v1",
+ model_pattern="groq/*",
+ ),
+ ProviderDefault(
+ name="deepseek",
+ env_var="DEEPSEEK_API_KEY",
+ base_url="https://api.deepseek.com/v1",
+ model_pattern="deepseek/*",
+ ),
+ ProviderDefault(
+ name="mistral",
+ env_var="MISTRAL_API_KEY",
+ base_url="https://api.mistral.ai/v1",
+ model_pattern="mistral/*",
+ ),
+ # DigitalOcean Gradient is a first-class provider post-#889 — the
+ # `digitalocean/` model prefix routes to the built-in Envoy cluster, no
+ # base_url needed at runtime.
+ ProviderDefault(
+ name="digitalocean",
+ env_var="DO_API_KEY",
+ base_url="https://inference.do-ai.run/v1",
+ model_pattern="digitalocean/*",
+ ),
+ ProviderDefault(
+ name="vercel",
+ env_var="AI_GATEWAY_API_KEY",
+ base_url="https://ai-gateway.vercel.sh/v1",
+ model_pattern="vercel/*",
+ ),
+ # OpenRouter is a first-class provider — the `openrouter/` model prefix is
+ # accepted by the schema and brightstaff's ProviderId parser, so no
+ # provider_interface override is needed.
+ ProviderDefault(
+ name="openrouter",
+ env_var="OPENROUTER_API_KEY",
+ base_url="https://openrouter.ai/api/v1",
+ model_pattern="openrouter/*",
+ ),
+]
+
+
+@dataclass
+class DetectionResult:
+ with_keys: list[ProviderDefault]
+ passthrough: list[ProviderDefault]
+
+ @property
+ def summary(self) -> str:
+ parts = []
+ if self.with_keys:
+ parts.append("env-keyed: " + ", ".join(p.name for p in self.with_keys))
+ if self.passthrough:
+ parts.append("pass-through: " + ", ".join(p.name for p in self.passthrough))
+ return " | ".join(parts) if parts else "no providers"
+
+
+def detect_providers(env: dict[str, str] | None = None) -> DetectionResult:
+ env = env if env is not None else dict(os.environ)
+ with_keys: list[ProviderDefault] = []
+ passthrough: list[ProviderDefault] = []
+ for p in PROVIDER_DEFAULTS:
+ val = env.get(p.env_var)
+ if val:
+ with_keys.append(p)
+ else:
+ passthrough.append(p)
+ return DetectionResult(with_keys=with_keys, passthrough=passthrough)
+
+
+def synthesize_default_config(
+ env: dict[str, str] | None = None,
+ *,
+ listener_port: int = DEFAULT_LLM_LISTENER_PORT,
+ otel_endpoint: str = DEFAULT_OTLP_ENDPOINT,
+) -> dict:
+ """Build a pass-through config dict suitable for validation + envoy rendering.
+
+ The returned dict can be dumped to YAML and handed to the existing `planoai up`
+ pipeline unchanged.
+ """
+ detection = detect_providers(env)
+
+ def _entry(p: ProviderDefault, base: dict) -> dict:
+ row: dict = {"name": p.name, "model": p.model_pattern, "base_url": p.base_url}
+ if p.provider_interface is not None:
+ row["provider_interface"] = p.provider_interface
+ row.update(base)
+ return row
+
+ model_providers: list[dict] = []
+ for p in detection.with_keys:
+ model_providers.append(_entry(p, {"access_key": f"${p.env_var}"}))
+ for p in detection.passthrough:
+ model_providers.append(_entry(p, {"passthrough_auth": True}))
+
+ # No explicit `default: true` entry is synthesized: the plano config
+ # validator rejects wildcard models as defaults, and brightstaff already
+ # registers bare model names as lookup keys during wildcard expansion
+ # (crates/common/src/llm_providers.rs), so `{"model": "gpt-4o-mini"}`
+ # without a prefix resolves via the openai wildcard without needing
+ # `default: true`. See discussion on #890.
+
+ return {
+ "version": "v0.4.0",
+ "listeners": [
+ {
+ "name": "llm",
+ "type": "model",
+ "port": listener_port,
+ "address": "0.0.0.0",
+ }
+ ],
+ "model_providers": model_providers,
+ "tracing": {
+ "random_sampling": 100,
+ "opentracing_grpc_endpoint": otel_endpoint,
+ },
+ }
diff --git a/cli/planoai/main.py b/cli/planoai/main.py
index c8659a3c..8e766cf8 100644
--- a/cli/planoai/main.py
+++ b/cli/planoai/main.py
@@ -6,7 +6,13 @@ import sys
import contextlib
import logging
import rich_click as click
+import yaml
from planoai import targets
+from planoai.defaults import (
+ DEFAULT_LLM_LISTENER_PORT,
+ detect_providers,
+ synthesize_default_config,
+)
# Brand color - Plano purple
PLANO_COLOR = "#969FF4"
@@ -31,6 +37,8 @@ from planoai.core import (
)
from planoai.init_cmd import init as init_cmd
from planoai.trace_cmd import trace as trace_cmd, start_trace_listener_background
+from planoai.chatgpt_cmd import chatgpt as chatgpt_cmd
+from planoai.obs_cmd import obs as obs_cmd
from planoai.consts import (
DEFAULT_OTEL_TRACING_GRPC_ENDPOINT,
DEFAULT_NATIVE_OTEL_TRACING_GRPC_ENDPOINT,
@@ -118,6 +126,28 @@ def _temporary_cli_log_level(level: str | None):
set_log_level(current_level)
+def _inject_chatgpt_tokens_if_needed(config, env, console):
+ """If config uses chatgpt providers, resolve tokens from ~/.plano/chatgpt/auth.json."""
+ providers = config.get("model_providers") or config.get("llm_providers") or []
+ has_chatgpt = any(str(p.get("model", "")).startswith("chatgpt/") for p in providers)
+ if not has_chatgpt:
+ return
+
+ try:
+ from planoai.chatgpt_auth import get_access_token
+
+ access_token, account_id = get_access_token()
+ env["CHATGPT_ACCESS_TOKEN"] = access_token
+ if account_id:
+ env["CHATGPT_ACCOUNT_ID"] = account_id
+ except Exception as e:
+ console.print(
+ f"\n[red]ChatGPT auth error:[/red] {e}\n"
+ f"[dim]Run 'planoai chatgpt login' to authenticate.[/dim]\n"
+ )
+ sys.exit(1)
+
+
def _print_missing_keys(console, missing_keys: list[str]) -> None:
console.print(f"\n[red]✗[/red] [red]Missing API keys![/red]\n")
for key in missing_keys:
@@ -317,7 +347,23 @@ def build(docker):
help="Show detailed startup logs with timestamps.",
is_flag=True,
)
-def up(file, path, foreground, with_tracing, tracing_port, docker, verbose):
+@click.option(
+ "--listener-port",
+ default=DEFAULT_LLM_LISTENER_PORT,
+ type=int,
+ show_default=True,
+ help="Override the LLM listener port when running without a config file. Ignored when a config file is present.",
+)
+def up(
+ file,
+ path,
+ foreground,
+ with_tracing,
+ tracing_port,
+ docker,
+ verbose,
+ listener_port,
+):
"""Starts Plano."""
from rich.status import Status
@@ -328,12 +374,23 @@ def up(file, path, foreground, with_tracing, tracing_port, docker, verbose):
# Use the utility function to find config file
plano_config_file = find_config_file(path, file)
- # Check if the file exists
+ # Zero-config fallback: when no user config is present, synthesize a
+ # pass-through config that covers the common LLM providers and
+ # auto-wires OTel export to ``planoai obs``. See cli/planoai/defaults.py.
if not os.path.exists(plano_config_file):
+ detection = detect_providers()
+ cfg_dict = synthesize_default_config(listener_port=listener_port)
+
+ default_dir = os.path.expanduser("~/.plano")
+ os.makedirs(default_dir, exist_ok=True)
+ synthesized_path = os.path.join(default_dir, "default_config.yaml")
+ with open(synthesized_path, "w") as fh:
+ yaml.safe_dump(cfg_dict, fh, sort_keys=False)
+ plano_config_file = synthesized_path
console.print(
- f"[red]✗[/red] Config file not found: [dim]{plano_config_file}[/dim]"
+ f"[dim]No plano config found; using defaults ({detection.summary}). "
+ f"Listening on :{listener_port}, tracing -> http://localhost:4317.[/dim]"
)
- sys.exit(1)
if not docker:
from planoai.native_runner import native_validate_config
@@ -384,6 +441,14 @@ def up(file, path, foreground, with_tracing, tracing_port, docker, verbose):
env = os.environ.copy()
env.pop("PATH", None)
+ import yaml
+
+ with open(plano_config_file, "r") as f:
+ plano_config = yaml.safe_load(f)
+
+ # Inject ChatGPT tokens from ~/.plano/chatgpt/auth.json if any provider needs them
+ _inject_chatgpt_tokens_if_needed(plano_config, env, console)
+
# Check access keys
access_keys = get_llm_provider_access_keys(plano_config_file=plano_config_file)
access_keys = set(access_keys)
@@ -681,6 +746,8 @@ main.add_command(cli_agent)
main.add_command(generate_prompt_targets)
main.add_command(init_cmd, name="init")
main.add_command(trace_cmd, name="trace")
+main.add_command(chatgpt_cmd, name="chatgpt")
+main.add_command(obs_cmd, name="obs")
if __name__ == "__main__":
main()
diff --git a/cli/planoai/native_runner.py b/cli/planoai/native_runner.py
index bbbbfd3e..1b58b36d 100644
--- a/cli/planoai/native_runner.py
+++ b/cli/planoai/native_runner.py
@@ -253,6 +253,7 @@ def start_native(
log.info("Plano is running (native mode)")
for port in gateway_ports:
log.info(f" http://localhost:{port}")
+
break
# Check if processes are still alive
@@ -367,8 +368,11 @@ def _kill_pid(pid):
pass
-def stop_native():
- """Stop natively-running Envoy and brightstaff processes.
+def stop_native(skip_pids: set | None = None):
+ """Stop natively-running Envoy, brightstaff, and watchdog processes.
+
+ Args:
+ skip_pids: Set of PIDs to skip (used by the watchdog to avoid self-termination).
Returns:
bool: True if at least one process was running and received a stop signal,
@@ -385,7 +389,12 @@ def stop_native():
brightstaff_pid = pids.get("brightstaff_pid")
had_running_process = False
- for name, pid in [("envoy", envoy_pid), ("brightstaff", brightstaff_pid)]:
+ for name, pid in [
+ ("envoy", envoy_pid),
+ ("brightstaff", brightstaff_pid),
+ ]:
+ if skip_pids and pid in skip_pids:
+ continue
if pid is None:
continue
try:
diff --git a/cli/planoai/obs/__init__.py b/cli/planoai/obs/__init__.py
new file mode 100644
index 00000000..2f4e14af
--- /dev/null
+++ b/cli/planoai/obs/__init__.py
@@ -0,0 +1,6 @@
+"""Plano observability console: in-memory live view of LLM traffic."""
+
+from planoai.obs.collector import LLMCall, LLMCallStore, ObsCollector
+from planoai.obs.pricing import PricingCatalog
+
+__all__ = ["LLMCall", "LLMCallStore", "ObsCollector", "PricingCatalog"]
diff --git a/cli/planoai/obs/collector.py b/cli/planoai/obs/collector.py
new file mode 100644
index 00000000..7f4cae36
--- /dev/null
+++ b/cli/planoai/obs/collector.py
@@ -0,0 +1,266 @@
+"""In-memory collector for LLM calls, fed by OTLP/gRPC spans from brightstaff."""
+
+from __future__ import annotations
+
+import threading
+from collections import deque
+from concurrent import futures
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from typing import Any, Iterable
+
+import grpc
+from opentelemetry.proto.collector.trace.v1 import (
+ trace_service_pb2,
+ trace_service_pb2_grpc,
+)
+
+DEFAULT_GRPC_PORT = 4317
+DEFAULT_CAPACITY = 1000
+
+
+@dataclass
+class LLMCall:
+ """One LLM call as reconstructed from a brightstaff LLM span.
+
+ Fields default to ``None`` when the underlying span attribute was absent.
+ """
+
+ request_id: str
+ timestamp: datetime
+ model: str
+ provider: str | None = None
+ request_model: str | None = None
+ session_id: str | None = None
+ route_name: str | None = None
+ is_streaming: bool | None = None
+ status_code: int | None = None
+ prompt_tokens: int | None = None
+ completion_tokens: int | None = None
+ total_tokens: int | None = None
+ cached_input_tokens: int | None = None
+ cache_creation_tokens: int | None = None
+ reasoning_tokens: int | None = None
+ ttft_ms: float | None = None
+ duration_ms: float | None = None
+ routing_strategy: str | None = None
+ routing_reason: str | None = None
+ cost_usd: float | None = None
+
+ @property
+ def tpt_ms(self) -> float | None:
+ if self.duration_ms is None or self.completion_tokens in (None, 0):
+ return None
+ ttft = self.ttft_ms or 0.0
+ generate_ms = max(0.0, self.duration_ms - ttft)
+ if generate_ms <= 0:
+ return None
+ return generate_ms / self.completion_tokens
+
+ @property
+ def tokens_per_sec(self) -> float | None:
+ tpt = self.tpt_ms
+ if tpt is None or tpt <= 0:
+ return None
+ return 1000.0 / tpt
+
+
+class LLMCallStore:
+ """Thread-safe ring buffer of recent LLM calls."""
+
+ def __init__(self, capacity: int = DEFAULT_CAPACITY) -> None:
+ self._capacity = capacity
+ self._calls: deque[LLMCall] = deque(maxlen=capacity)
+ self._lock = threading.Lock()
+
+ @property
+ def capacity(self) -> int:
+ return self._capacity
+
+ def add(self, call: LLMCall) -> None:
+ with self._lock:
+ self._calls.append(call)
+
+ def clear(self) -> None:
+ with self._lock:
+ self._calls.clear()
+
+ def snapshot(self) -> list[LLMCall]:
+ with self._lock:
+ return list(self._calls)
+
+ def __len__(self) -> int:
+ with self._lock:
+ return len(self._calls)
+
+
+# Span attribute keys used below are the canonical OTel / Plano keys emitted by
+# brightstaff — see crates/brightstaff/src/tracing/constants.rs for the source
+# of truth.
+
+
+def _anyvalue_to_python(value: Any) -> Any: # AnyValue from OTLP
+ kind = value.WhichOneof("value")
+ if kind == "string_value":
+ return value.string_value
+ if kind == "bool_value":
+ return value.bool_value
+ if kind == "int_value":
+ return value.int_value
+ if kind == "double_value":
+ return value.double_value
+ return None
+
+
+def _attrs_to_dict(attrs: Iterable[Any]) -> dict[str, Any]:
+ out: dict[str, Any] = {}
+ for kv in attrs:
+ py = _anyvalue_to_python(kv.value)
+ if py is not None:
+ out[kv.key] = py
+ return out
+
+
+def _maybe_int(value: Any) -> int | None:
+ if value is None:
+ return None
+ try:
+ return int(value)
+ except (TypeError, ValueError):
+ return None
+
+
+def _maybe_float(value: Any) -> float | None:
+ if value is None:
+ return None
+ try:
+ return float(value)
+ except (TypeError, ValueError):
+ return None
+
+
+def span_to_llm_call(
+ span: Any, service_name: str, pricing: Any | None = None
+) -> LLMCall | None:
+ """Convert an OTLP span into an LLMCall, or return None if it isn't one.
+
+ A span is considered an LLM call iff it carries the ``llm.model`` attribute.
+ """
+ attrs = _attrs_to_dict(span.attributes)
+ model = attrs.get("llm.model")
+ if not model:
+ return None
+
+ # Prefer explicit span attributes; fall back to likely aliases.
+ request_id = next(
+ (
+ str(attrs[key])
+ for key in ("request_id", "http.request_id")
+ if key in attrs and attrs[key] is not None
+ ),
+ span.span_id.hex() if span.span_id else "",
+ )
+ start_ns = span.start_time_unix_nano or 0
+ ts = (
+ datetime.fromtimestamp(start_ns / 1_000_000_000, tz=timezone.utc).astimezone()
+ if start_ns
+ else datetime.now().astimezone()
+ )
+
+ call = LLMCall(
+ request_id=str(request_id),
+ timestamp=ts,
+ model=str(model),
+ provider=(
+ str(attrs["llm.provider"]) if "llm.provider" in attrs else service_name
+ ),
+ request_model=(
+ str(attrs["model.requested"]) if "model.requested" in attrs else None
+ ),
+ session_id=(
+ str(attrs["plano.session_id"]) if "plano.session_id" in attrs else None
+ ),
+ route_name=(
+ str(attrs["plano.route.name"]) if "plano.route.name" in attrs else None
+ ),
+ is_streaming=(
+ bool(attrs["llm.is_streaming"]) if "llm.is_streaming" in attrs else None
+ ),
+ status_code=_maybe_int(attrs.get("http.status_code")),
+ prompt_tokens=_maybe_int(attrs.get("llm.usage.prompt_tokens")),
+ completion_tokens=_maybe_int(attrs.get("llm.usage.completion_tokens")),
+ total_tokens=_maybe_int(attrs.get("llm.usage.total_tokens")),
+ cached_input_tokens=_maybe_int(attrs.get("llm.usage.cached_input_tokens")),
+ cache_creation_tokens=_maybe_int(attrs.get("llm.usage.cache_creation_tokens")),
+ reasoning_tokens=_maybe_int(attrs.get("llm.usage.reasoning_tokens")),
+ ttft_ms=_maybe_float(attrs.get("llm.time_to_first_token")),
+ duration_ms=_maybe_float(attrs.get("llm.duration_ms")),
+ routing_strategy=(
+ str(attrs["routing.strategy"]) if "routing.strategy" in attrs else None
+ ),
+ routing_reason=(
+ str(attrs["routing.selection_reason"])
+ if "routing.selection_reason" in attrs
+ else None
+ ),
+ )
+
+ if pricing is not None:
+ call.cost_usd = pricing.cost_for_call(call)
+
+ return call
+
+
+class _ObsServicer(trace_service_pb2_grpc.TraceServiceServicer):
+ def __init__(self, store: LLMCallStore, pricing: Any | None) -> None:
+ self._store = store
+ self._pricing = pricing
+
+ def Export(self, request, context): # noqa: N802 — gRPC generated name
+ for resource_spans in request.resource_spans:
+ service_name = "unknown"
+ for attr in resource_spans.resource.attributes:
+ if attr.key == "service.name":
+ val = _anyvalue_to_python(attr.value)
+ if val is not None:
+ service_name = str(val)
+ break
+ for scope_spans in resource_spans.scope_spans:
+ for span in scope_spans.spans:
+ call = span_to_llm_call(span, service_name, self._pricing)
+ if call is not None:
+ self._store.add(call)
+ return trace_service_pb2.ExportTraceServiceResponse()
+
+
+@dataclass
+class ObsCollector:
+ """Owns the OTLP/gRPC server and the in-memory LLMCall ring buffer."""
+
+ store: LLMCallStore = field(default_factory=LLMCallStore)
+ pricing: Any | None = None
+ host: str = "0.0.0.0"
+ port: int = DEFAULT_GRPC_PORT
+ _server: grpc.Server | None = field(default=None, init=False, repr=False)
+
+ def start(self) -> None:
+ if self._server is not None:
+ return
+ server = grpc.server(futures.ThreadPoolExecutor(max_workers=4))
+ trace_service_pb2_grpc.add_TraceServiceServicer_to_server(
+ _ObsServicer(self.store, self.pricing), server
+ )
+ address = f"{self.host}:{self.port}"
+ bound = server.add_insecure_port(address)
+ if bound == 0:
+ raise OSError(
+ f"Failed to bind OTLP listener on {address}: port already in use. "
+ "Stop tracing via `planoai trace down` or pick another port with --port."
+ )
+ server.start()
+ self._server = server
+
+ def stop(self, grace: float = 2.0) -> None:
+ if self._server is not None:
+ self._server.stop(grace)
+ self._server = None
diff --git a/cli/planoai/obs/pricing.py b/cli/planoai/obs/pricing.py
new file mode 100644
index 00000000..6f2ce5b4
--- /dev/null
+++ b/cli/planoai/obs/pricing.py
@@ -0,0 +1,321 @@
+"""DigitalOcean Gradient pricing catalog for the obs console.
+
+Ported loosely from ``crates/brightstaff/src/router/model_metrics.rs::fetch_do_pricing``.
+Single-source: one fetch at startup, cached for the life of the process.
+"""
+
+from __future__ import annotations
+
+import logging
+import re
+import threading
+from dataclasses import dataclass
+from typing import Any
+
+import requests
+
+DEFAULT_PRICING_URL = "https://api.digitalocean.com/v2/gen-ai/models/catalog"
+FETCH_TIMEOUT_SECS = 5.0
+
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass(frozen=True)
+class ModelPrice:
+ """Input/output $/token rates. Token counts are multiplied by these."""
+
+ input_per_token_usd: float
+ output_per_token_usd: float
+ cached_input_per_token_usd: float | None = None
+
+
+class PricingCatalog:
+ """In-memory pricing lookup keyed by model id.
+
+ DO's catalog uses ids like ``openai-gpt-5.4``; Plano's resolved model names
+ may arrive as ``do/openai-gpt-5.4`` or bare ``openai-gpt-5.4``. We strip the
+ leading provider prefix when looking up.
+ """
+
+ def __init__(self, prices: dict[str, ModelPrice] | None = None) -> None:
+ self._prices: dict[str, ModelPrice] = prices or {}
+ self._lock = threading.Lock()
+
+ def __len__(self) -> int:
+ with self._lock:
+ return len(self._prices)
+
+ def sample_models(self, n: int = 5) -> list[str]:
+ with self._lock:
+ return list(self._prices.keys())[:n]
+
+ @classmethod
+ def fetch(cls, url: str = DEFAULT_PRICING_URL) -> "PricingCatalog":
+ """Fetch pricing from DO's catalog endpoint. On failure, returns an
+ empty catalog (cost column will be blank).
+
+ The catalog endpoint is public — no auth required, no signup — so
+ ``planoai obs`` gets cost data on first run out of the box.
+ """
+ try:
+ resp = requests.get(url, timeout=FETCH_TIMEOUT_SECS)
+ resp.raise_for_status()
+ data = resp.json()
+ except Exception as exc: # noqa: BLE001 — best-effort; never fatal
+ logger.warning(
+ "DO pricing fetch failed: %s; cost column will be blank.",
+ exc,
+ )
+ return cls()
+
+ prices = _parse_do_pricing(data)
+ if not prices:
+ # Dump the first entry's raw shape so we can see which fields DO
+ # actually returned — helps when the catalog adds new fields or
+ # the response doesn't match our parser.
+ import json as _json
+
+ sample_items = _coerce_items(data)
+ sample = sample_items[0] if sample_items else data
+ logger.warning(
+ "DO pricing response had no parseable entries; cost column "
+ "will be blank. Sample entry: %s",
+ _json.dumps(sample, default=str)[:400],
+ )
+ return cls(prices)
+
+ def price_for(self, model_name: str | None) -> ModelPrice | None:
+ if not model_name:
+ return None
+ with self._lock:
+ # Try the full name first, then stripped prefix, then lowercased variants.
+ for candidate in _model_key_candidates(model_name):
+ hit = self._prices.get(candidate)
+ if hit is not None:
+ return hit
+ return None
+
+ def cost_for_call(self, call: Any) -> float | None:
+ """Compute USD cost for an LLMCall. Returns None when pricing is unknown."""
+ price = self.price_for(getattr(call, "model", None)) or self.price_for(
+ getattr(call, "request_model", None)
+ )
+ if price is None:
+ return None
+ prompt = int(getattr(call, "prompt_tokens", 0) or 0)
+ completion = int(getattr(call, "completion_tokens", 0) or 0)
+ cached = int(getattr(call, "cached_input_tokens", 0) or 0)
+
+ # Cached input tokens are priced separately at the cached rate when known;
+ # otherwise they're already counted in prompt tokens at the regular rate.
+ fresh_prompt = prompt
+ if price.cached_input_per_token_usd is not None and cached:
+ fresh_prompt = max(0, prompt - cached)
+ cost_cached = cached * price.cached_input_per_token_usd
+ else:
+ cost_cached = 0.0
+
+ cost = (
+ fresh_prompt * price.input_per_token_usd
+ + completion * price.output_per_token_usd
+ + cost_cached
+ )
+ return round(cost, 6)
+
+
+_DATE_SUFFIX_RE = re.compile(r"-\d{8}$")
+_PROVIDER_PREFIXES = ("anthropic", "openai", "google", "meta", "cohere", "mistral")
+_ANTHROPIC_FAMILIES = {"opus", "sonnet", "haiku"}
+
+
+def _model_key_candidates(model_name: str) -> list[str]:
+ """Lookup-side variants of a Plano-emitted model name.
+
+ Plano resolves names like ``claude-haiku-4-5-20251001``; the catalog stores
+ them as ``anthropic-claude-haiku-4.5``. We strip the date suffix and the
+ ``provider/`` prefix here; the catalog itself registers the dash/dot and
+ family-order aliases at parse time (see :func:`_expand_aliases`).
+ """
+ base = model_name.strip()
+ out = [base]
+ if "/" in base:
+ out.append(base.split("/", 1)[1])
+ for k in list(out):
+ stripped = _DATE_SUFFIX_RE.sub("", k)
+ if stripped != k:
+ out.append(stripped)
+ out.extend([v.lower() for v in list(out)])
+ seen: set[str] = set()
+ uniq = []
+ for key in out:
+ if key not in seen:
+ seen.add(key)
+ uniq.append(key)
+ return uniq
+
+
+def _expand_aliases(model_id: str) -> set[str]:
+ """Catalog-side variants of a DO model id.
+
+ DO publishes Anthropic models under ids like ``anthropic-claude-opus-4.7``
+ or ``anthropic-claude-4.6-sonnet`` while Plano emits ``claude-opus-4-7`` /
+ ``claude-sonnet-4-6``. Generate a set covering provider-prefix stripping,
+ dash↔dot in version segments, and family↔version word order so a single
+ catalog entry matches every name shape we'll see at lookup.
+ """
+ aliases: set[str] = set()
+
+ def add(name: str) -> None:
+ if not name:
+ return
+ aliases.add(name)
+ aliases.add(name.lower())
+
+ add(model_id)
+
+ base = model_id
+ head, _, rest = base.partition("-")
+ if head.lower() in _PROVIDER_PREFIXES and rest:
+ add(rest)
+ base = rest
+
+ for key in list(aliases):
+ if "." in key:
+ add(key.replace(".", "-"))
+
+ parts = base.split("-")
+ if len(parts) >= 3 and parts[0].lower() == "claude":
+ rest_parts = parts[1:]
+ for i, p in enumerate(rest_parts):
+ if p.lower() in _ANTHROPIC_FAMILIES:
+ others = rest_parts[:i] + rest_parts[i + 1 :]
+ if not others:
+ break
+ family_last = "claude-" + "-".join(others) + "-" + p
+ family_first = "claude-" + p + "-" + "-".join(others)
+ add(family_last)
+ add(family_first)
+ add(family_last.replace(".", "-"))
+ add(family_first.replace(".", "-"))
+ break
+
+ return aliases
+
+
+def _parse_do_pricing(data: Any) -> dict[str, ModelPrice]:
+ """Parse DO catalog response into a ModelPrice map keyed by model id.
+
+ DO's shape (as of 2026-04):
+ {
+ "data": [
+ {"model_id": "openai-gpt-5.4",
+ "pricing": {"input_price_per_million": 5.0,
+ "output_price_per_million": 15.0}},
+ ...
+ ]
+ }
+
+ Older/alternate shapes are also accepted (flat top-level fields, or the
+ ``id``/``model``/``name`` key).
+ """
+ prices: dict[str, ModelPrice] = {}
+ items = _coerce_items(data)
+ for item in items:
+ model_id = (
+ item.get("model_id")
+ or item.get("id")
+ or item.get("model")
+ or item.get("name")
+ )
+ if not model_id:
+ continue
+
+ # DO nests rates under `pricing`; try that first, then fall back to
+ # top-level fields for alternate response shapes.
+ sources = [item]
+ if isinstance(item.get("pricing"), dict):
+ sources.insert(0, item["pricing"])
+
+ input_rate = _extract_rate_from_sources(
+ sources,
+ ["input_per_token", "input_token_price", "price_input"],
+ ["input_price_per_million", "input_per_million", "input_per_mtok"],
+ )
+ output_rate = _extract_rate_from_sources(
+ sources,
+ ["output_per_token", "output_token_price", "price_output"],
+ ["output_price_per_million", "output_per_million", "output_per_mtok"],
+ )
+ cached_rate = _extract_rate_from_sources(
+ sources,
+ [
+ "cached_input_per_token",
+ "cached_input_token_price",
+ "prompt_cache_read_per_token",
+ ],
+ [
+ "cached_input_price_per_million",
+ "cached_input_per_million",
+ "cached_input_per_mtok",
+ ],
+ )
+
+ if input_rate is None or output_rate is None:
+ continue
+ # Treat 0-rate entries as "unknown" so cost falls back to `—` rather
+ # than showing a misleading $0.0000. DO's catalog sometimes omits
+ # rates for promo/open-weight models.
+ if input_rate == 0 and output_rate == 0:
+ continue
+ price = ModelPrice(
+ input_per_token_usd=input_rate,
+ output_per_token_usd=output_rate,
+ cached_input_per_token_usd=cached_rate,
+ )
+ for alias in _expand_aliases(str(model_id)):
+ prices.setdefault(alias, price)
+ return prices
+
+
+def _coerce_items(data: Any) -> list[dict]:
+ if isinstance(data, list):
+ return [x for x in data if isinstance(x, dict)]
+ if isinstance(data, dict):
+ for key in ("data", "models", "pricing", "items"):
+ val = data.get(key)
+ if isinstance(val, list):
+ return [x for x in val if isinstance(x, dict)]
+ return []
+
+
+def _extract_rate_from_sources(
+ sources: list[dict],
+ per_token_keys: list[str],
+ per_million_keys: list[str],
+) -> float | None:
+ """Return a per-token rate in USD, or None if unknown.
+
+ Some DO catalog responses put per-token values under a field whose name
+ says ``_per_million`` (e.g. ``input_price_per_million: 5E-8`` — that's
+ $5e-8 per token, not per million). Heuristic: values < 1 are already
+ per-token (real per-million rates are ~0.1 to ~100); values >= 1 are
+ treated as per-million and divided by 1,000,000.
+ """
+ for src in sources:
+ for key in per_token_keys:
+ if key in src and src[key] is not None:
+ try:
+ return float(src[key])
+ except (TypeError, ValueError):
+ continue
+ for key in per_million_keys:
+ if key in src and src[key] is not None:
+ try:
+ v = float(src[key])
+ except (TypeError, ValueError):
+ continue
+ if v >= 1:
+ return v / 1_000_000
+ return v
+ return None
diff --git a/cli/planoai/obs/render.py b/cli/planoai/obs/render.py
new file mode 100644
index 00000000..e3583747
--- /dev/null
+++ b/cli/planoai/obs/render.py
@@ -0,0 +1,634 @@
+"""Rich TUI renderer for the observability console."""
+
+from __future__ import annotations
+
+from collections import Counter
+from dataclasses import dataclass
+from datetime import datetime
+from http import HTTPStatus
+
+from rich.align import Align
+from rich.box import SIMPLE, SIMPLE_HEAVY
+from rich.console import Group
+from rich.panel import Panel
+from rich.table import Table
+from rich.text import Text
+
+MAX_WIDTH = 160
+
+from planoai.obs.collector import LLMCall
+
+
+@dataclass
+class AggregateStats:
+ count: int
+ total_cost_usd: float
+ total_input_tokens: int
+ total_output_tokens: int
+ distinct_sessions: int
+ current_session: str | None
+ p50_latency_ms: float | None = None
+ p95_latency_ms: float | None = None
+ p99_latency_ms: float | None = None
+ p50_ttft_ms: float | None = None
+ p95_ttft_ms: float | None = None
+ p99_ttft_ms: float | None = None
+ error_count: int = 0
+ errors_4xx: int = 0
+ errors_5xx: int = 0
+ has_cost: bool = False
+
+
+@dataclass
+class ModelRollup:
+ model: str
+ requests: int
+ input_tokens: int
+ output_tokens: int
+ cache_write: int
+ cache_read: int
+ cost_usd: float
+ has_cost: bool = False
+ avg_tokens_per_sec: float | None = None
+
+
+def _percentile(values: list[float], pct: float) -> float | None:
+ if not values:
+ return None
+ s = sorted(values)
+ k = max(0, min(len(s) - 1, int(round((pct / 100.0) * (len(s) - 1)))))
+ return s[k]
+
+
+def aggregates(calls: list[LLMCall]) -> AggregateStats:
+ total_cost = sum((c.cost_usd or 0.0) for c in calls)
+ total_input = sum(int(c.prompt_tokens or 0) for c in calls)
+ total_output = sum(int(c.completion_tokens or 0) for c in calls)
+ session_ids = {c.session_id for c in calls if c.session_id}
+ current = next(
+ (c.session_id for c in reversed(calls) if c.session_id is not None), None
+ )
+ durations = [c.duration_ms for c in calls if c.duration_ms is not None]
+ ttfts = [c.ttft_ms for c in calls if c.ttft_ms is not None]
+ errors_4xx = sum(
+ 1 for c in calls if c.status_code is not None and 400 <= c.status_code < 500
+ )
+ errors_5xx = sum(
+ 1 for c in calls if c.status_code is not None and c.status_code >= 500
+ )
+ has_cost = any(c.cost_usd is not None for c in calls)
+ return AggregateStats(
+ count=len(calls),
+ total_cost_usd=total_cost,
+ total_input_tokens=total_input,
+ total_output_tokens=total_output,
+ distinct_sessions=len(session_ids),
+ current_session=current,
+ p50_latency_ms=_percentile(durations, 50),
+ p95_latency_ms=_percentile(durations, 95),
+ p99_latency_ms=_percentile(durations, 99),
+ p50_ttft_ms=_percentile(ttfts, 50),
+ p95_ttft_ms=_percentile(ttfts, 95),
+ p99_ttft_ms=_percentile(ttfts, 99),
+ error_count=errors_4xx + errors_5xx,
+ errors_4xx=errors_4xx,
+ errors_5xx=errors_5xx,
+ has_cost=has_cost,
+ )
+
+
+def model_rollups(calls: list[LLMCall]) -> list[ModelRollup]:
+ buckets: dict[str, dict[str, float | int | bool]] = {}
+ tps_samples: dict[str, list[float]] = {}
+ for c in calls:
+ key = c.model
+ b = buckets.setdefault(
+ key,
+ {
+ "requests": 0,
+ "input": 0,
+ "output": 0,
+ "cache_write": 0,
+ "cache_read": 0,
+ "cost": 0.0,
+ "has_cost": False,
+ },
+ )
+ b["requests"] = int(b["requests"]) + 1
+ b["input"] = int(b["input"]) + int(c.prompt_tokens or 0)
+ b["output"] = int(b["output"]) + int(c.completion_tokens or 0)
+ b["cache_write"] = int(b["cache_write"]) + int(c.cache_creation_tokens or 0)
+ b["cache_read"] = int(b["cache_read"]) + int(c.cached_input_tokens or 0)
+ b["cost"] = float(b["cost"]) + (c.cost_usd or 0.0)
+ if c.cost_usd is not None:
+ b["has_cost"] = True
+ tps = c.tokens_per_sec
+ if tps is not None:
+ tps_samples.setdefault(key, []).append(tps)
+
+ rollups: list[ModelRollup] = []
+ for model, b in buckets.items():
+ samples = tps_samples.get(model)
+ avg_tps = (sum(samples) / len(samples)) if samples else None
+ rollups.append(
+ ModelRollup(
+ model=model,
+ requests=int(b["requests"]),
+ input_tokens=int(b["input"]),
+ output_tokens=int(b["output"]),
+ cache_write=int(b["cache_write"]),
+ cache_read=int(b["cache_read"]),
+ cost_usd=float(b["cost"]),
+ has_cost=bool(b["has_cost"]),
+ avg_tokens_per_sec=avg_tps,
+ )
+ )
+ rollups.sort(key=lambda r: (r.cost_usd, r.requests), reverse=True)
+ return rollups
+
+
+@dataclass
+class RouteHit:
+ route: str
+ hits: int
+ pct: float
+ p95_latency_ms: float | None
+ error_count: int
+
+
+def route_hits(calls: list[LLMCall]) -> list[RouteHit]:
+ counts: Counter[str] = Counter()
+ per_route_latency: dict[str, list[float]] = {}
+ per_route_errors: dict[str, int] = {}
+ for c in calls:
+ if not c.route_name:
+ continue
+ counts[c.route_name] += 1
+ if c.duration_ms is not None:
+ per_route_latency.setdefault(c.route_name, []).append(c.duration_ms)
+ if c.status_code is not None and c.status_code >= 400:
+ per_route_errors[c.route_name] = per_route_errors.get(c.route_name, 0) + 1
+ total = sum(counts.values())
+ if total == 0:
+ return []
+ return [
+ RouteHit(
+ route=r,
+ hits=n,
+ pct=(n / total) * 100.0,
+ p95_latency_ms=_percentile(per_route_latency.get(r, []), 95),
+ error_count=per_route_errors.get(r, 0),
+ )
+ for r, n in counts.most_common()
+ ]
+
+
+def _fmt_cost(v: float | None, *, zero: str = "—") -> str:
+ if v is None:
+ return "—"
+ if v == 0:
+ return zero
+ if abs(v) < 0.0001:
+ return f"${v:.8f}".rstrip("0").rstrip(".")
+ if abs(v) < 0.01:
+ return f"${v:.6f}".rstrip("0").rstrip(".")
+ if abs(v) < 1:
+ return f"${v:.4f}"
+ return f"${v:,.2f}"
+
+
+def _fmt_ms(v: float | None) -> str:
+ if v is None:
+ return "—"
+ if v >= 1000:
+ return f"{v / 1000:.1f}s"
+ return f"{v:.0f}ms"
+
+
+def _fmt_int(v: int | None) -> str:
+ if v is None or v == 0:
+ return "—"
+ return f"{v:,}"
+
+
+def _fmt_tokens(v: int | None) -> str:
+ if v is None:
+ return "—"
+ return f"{v:,}"
+
+
+def _fmt_tps(v: float | None) -> str:
+ if v is None or v <= 0:
+ return "—"
+ if v >= 100:
+ return f"{v:.0f}/s"
+ return f"{v:.1f}/s"
+
+
+def _latency_style(v: float | None) -> str:
+ if v is None:
+ return "dim"
+ if v < 500:
+ return "green"
+ if v < 2000:
+ return "yellow"
+ return "red"
+
+
+def _ttft_style(v: float | None) -> str:
+ if v is None:
+ return "dim"
+ if v < 300:
+ return "green"
+ if v < 1000:
+ return "yellow"
+ return "red"
+
+
+def _truncate_model(name: str, limit: int = 32) -> str:
+ if len(name) <= limit:
+ return name
+ return name[: limit - 1] + "…"
+
+
+def _status_text(code: int | None) -> Text:
+ if code is None:
+ return Text("—", style="dim")
+ if 200 <= code < 300:
+ return Text("● ok", style="green")
+ if 300 <= code < 400:
+ return Text(f"● {code}", style="yellow")
+ if 400 <= code < 500:
+ return Text(f"● {code}", style="yellow bold")
+ return Text(f"● {code}", style="red bold")
+
+
+def _summary_panel(last: LLMCall | None, stats: AggregateStats) -> Panel:
+ # Content-sized columns with a fixed gutter keep the two blocks close
+ # together instead of stretching across the full terminal on wide screens.
+ grid = Table.grid(padding=(0, 4))
+ grid.add_column(no_wrap=True)
+ grid.add_column(no_wrap=True)
+
+ # Left: latest request snapshot.
+ left = Table.grid(padding=(0, 1))
+ left.add_column(style="dim", no_wrap=True)
+ left.add_column(no_wrap=True)
+ if last is None:
+ left.add_row("latest", Text("waiting for spans…", style="dim italic"))
+ else:
+ model_text = Text(_truncate_model(last.model, 48), style="bold cyan")
+ if last.is_streaming:
+ model_text.append(" ⟳ stream", style="dim")
+ left.add_row("model", model_text)
+ if last.request_model and last.request_model != last.model:
+ left.add_row(
+ "requested", Text(_truncate_model(last.request_model, 48), style="cyan")
+ )
+ if last.route_name:
+ left.add_row("route", Text(last.route_name, style="yellow"))
+ left.add_row("status", _status_text(last.status_code))
+ tokens = Text()
+ tokens.append(_fmt_tokens(last.prompt_tokens))
+ tokens.append(" in", style="dim")
+ tokens.append(" · ", style="dim")
+ tokens.append(_fmt_tokens(last.completion_tokens), style="green")
+ tokens.append(" out", style="dim")
+ if last.cached_input_tokens:
+ tokens.append(" · ", style="dim")
+ tokens.append(_fmt_tokens(last.cached_input_tokens), style="yellow")
+ tokens.append(" cached", style="dim")
+ left.add_row("tokens", tokens)
+ timing = Text()
+ timing.append("TTFT ", style="dim")
+ timing.append(_fmt_ms(last.ttft_ms), style=_ttft_style(last.ttft_ms))
+ timing.append(" · ", style="dim")
+ timing.append("lat ", style="dim")
+ timing.append(_fmt_ms(last.duration_ms), style=_latency_style(last.duration_ms))
+ tps = last.tokens_per_sec
+ if tps:
+ timing.append(" · ", style="dim")
+ timing.append(_fmt_tps(tps), style="green")
+ left.add_row("timing", timing)
+ left.add_row("cost", Text(_fmt_cost(last.cost_usd), style="green bold"))
+
+ # Right: lifetime totals.
+ right = Table.grid(padding=(0, 1))
+ right.add_column(style="dim", no_wrap=True)
+ right.add_column(no_wrap=True)
+ right.add_row(
+ "requests",
+ Text(f"{stats.count:,}", style="bold"),
+ )
+ if stats.error_count:
+ err_text = Text()
+ err_text.append(f"{stats.error_count:,}", style="red bold")
+ parts: list[str] = []
+ if stats.errors_4xx:
+ parts.append(f"{stats.errors_4xx} 4xx")
+ if stats.errors_5xx:
+ parts.append(f"{stats.errors_5xx} 5xx")
+ if parts:
+ err_text.append(f" ({' · '.join(parts)})", style="dim")
+ right.add_row("errors", err_text)
+ cost_str = _fmt_cost(stats.total_cost_usd) if stats.has_cost else "—"
+ right.add_row("total cost", Text(cost_str, style="green bold"))
+ tokens_total = Text()
+ tokens_total.append(_fmt_tokens(stats.total_input_tokens))
+ tokens_total.append(" in", style="dim")
+ tokens_total.append(" · ", style="dim")
+ tokens_total.append(_fmt_tokens(stats.total_output_tokens), style="green")
+ tokens_total.append(" out", style="dim")
+ right.add_row("tokens", tokens_total)
+ lat_text = Text()
+ lat_text.append("p50 ", style="dim")
+ lat_text.append(
+ _fmt_ms(stats.p50_latency_ms), style=_latency_style(stats.p50_latency_ms)
+ )
+ lat_text.append(" · ", style="dim")
+ lat_text.append("p95 ", style="dim")
+ lat_text.append(
+ _fmt_ms(stats.p95_latency_ms), style=_latency_style(stats.p95_latency_ms)
+ )
+ lat_text.append(" · ", style="dim")
+ lat_text.append("p99 ", style="dim")
+ lat_text.append(
+ _fmt_ms(stats.p99_latency_ms), style=_latency_style(stats.p99_latency_ms)
+ )
+ right.add_row("latency", lat_text)
+ ttft_text = Text()
+ ttft_text.append("p50 ", style="dim")
+ ttft_text.append(_fmt_ms(stats.p50_ttft_ms), style=_ttft_style(stats.p50_ttft_ms))
+ ttft_text.append(" · ", style="dim")
+ ttft_text.append("p95 ", style="dim")
+ ttft_text.append(_fmt_ms(stats.p95_ttft_ms), style=_ttft_style(stats.p95_ttft_ms))
+ ttft_text.append(" · ", style="dim")
+ ttft_text.append("p99 ", style="dim")
+ ttft_text.append(_fmt_ms(stats.p99_ttft_ms), style=_ttft_style(stats.p99_ttft_ms))
+ right.add_row("TTFT", ttft_text)
+ sess = Text()
+ sess.append(f"{stats.distinct_sessions}")
+ if stats.current_session:
+ sess.append(" · current ", style="dim")
+ sess.append(stats.current_session, style="magenta")
+ right.add_row("sessions", sess)
+
+ grid.add_row(left, right)
+ return Panel(
+ grid,
+ title="[bold]live LLM traffic[/]",
+ border_style="cyan",
+ box=SIMPLE_HEAVY,
+ padding=(0, 1),
+ )
+
+
+def _model_rollup_table(rollups: list[ModelRollup]) -> Table:
+ table = Table(
+ title="by model",
+ title_justify="left",
+ title_style="bold dim",
+ caption="cost via DigitalOcean Gradient catalog",
+ caption_justify="left",
+ caption_style="dim italic",
+ box=SIMPLE,
+ header_style="bold",
+ pad_edge=False,
+ padding=(0, 1),
+ )
+ table.add_column("model", style="cyan", no_wrap=True)
+ table.add_column("req", justify="right")
+ table.add_column("input", justify="right")
+ table.add_column("output", justify="right", style="green")
+ table.add_column("cache wr", justify="right", style="yellow")
+ table.add_column("cache rd", justify="right", style="yellow")
+ table.add_column("tok/s", justify="right")
+ table.add_column("cost", justify="right", style="green")
+ if not rollups:
+ table.add_row(
+ Text("no requests yet", style="dim italic"),
+ *(["—"] * 7),
+ )
+ return table
+ for r in rollups:
+ cost_cell = _fmt_cost(r.cost_usd) if r.has_cost else "—"
+ table.add_row(
+ _truncate_model(r.model),
+ f"{r.requests:,}",
+ _fmt_tokens(r.input_tokens),
+ _fmt_tokens(r.output_tokens),
+ _fmt_int(r.cache_write),
+ _fmt_int(r.cache_read),
+ _fmt_tps(r.avg_tokens_per_sec),
+ cost_cell,
+ )
+ return table
+
+
+def _route_hit_table(hits: list[RouteHit]) -> Table:
+ table = Table(
+ title="route share",
+ title_justify="left",
+ title_style="bold dim",
+ box=SIMPLE,
+ header_style="bold",
+ pad_edge=False,
+ padding=(0, 1),
+ )
+ table.add_column("route", style="cyan")
+ table.add_column("hits", justify="right")
+ table.add_column("%", justify="right")
+ table.add_column("p95", justify="right")
+ table.add_column("err", justify="right")
+ for h in hits:
+ err_cell = (
+ Text(f"{h.error_count:,}", style="red bold") if h.error_count else "—"
+ )
+ table.add_row(
+ h.route,
+ f"{h.hits:,}",
+ f"{h.pct:5.1f}%",
+ Text(_fmt_ms(h.p95_latency_ms), style=_latency_style(h.p95_latency_ms)),
+ err_cell,
+ )
+ return table
+
+
+def _recent_table(calls: list[LLMCall], limit: int = 15) -> Table:
+ show_route = any(c.route_name for c in calls)
+ show_cache = any((c.cached_input_tokens or 0) > 0 for c in calls)
+ show_rsn = any((c.reasoning_tokens or 0) > 0 for c in calls)
+
+ caption_parts = ["in·new = fresh prompt tokens"]
+ if show_cache:
+ caption_parts.append("in·cache = cached read")
+ if show_rsn:
+ caption_parts.append("rsn = reasoning")
+ caption_parts.append("lat = total latency")
+
+ table = Table(
+ title=f"recent · last {min(limit, len(calls)) if calls else 0}",
+ title_justify="left",
+ title_style="bold dim",
+ caption=" · ".join(caption_parts),
+ caption_justify="left",
+ caption_style="dim italic",
+ box=SIMPLE,
+ header_style="bold",
+ pad_edge=False,
+ padding=(0, 1),
+ )
+ table.add_column("time", no_wrap=True)
+ table.add_column("model", style="cyan", no_wrap=True)
+ if show_route:
+ table.add_column("route", style="yellow", no_wrap=True)
+ table.add_column("in·new", justify="right")
+ if show_cache:
+ table.add_column("in·cache", justify="right", style="yellow")
+ table.add_column("out", justify="right", style="green")
+ if show_rsn:
+ table.add_column("rsn", justify="right")
+ table.add_column("tok/s", justify="right")
+ table.add_column("TTFT", justify="right")
+ table.add_column("lat", justify="right")
+ table.add_column("cost", justify="right", style="green")
+ table.add_column("status")
+
+ if not calls:
+ cols = len(table.columns)
+ table.add_row(
+ Text("waiting for spans…", style="dim italic"),
+ *(["—"] * (cols - 1)),
+ )
+ return table
+
+ recent = list(reversed(calls))[:limit]
+ for idx, c in enumerate(recent):
+ is_newest = idx == 0
+ time_style = "bold white" if is_newest else None
+ model_style = "bold cyan" if is_newest else "cyan"
+ row: list[object] = [
+ (
+ Text(c.timestamp.strftime("%H:%M:%S"), style=time_style)
+ if time_style
+ else c.timestamp.strftime("%H:%M:%S")
+ ),
+ Text(_truncate_model(c.model), style=model_style),
+ ]
+ if show_route:
+ row.append(c.route_name or "—")
+ row.append(_fmt_tokens(c.prompt_tokens))
+ if show_cache:
+ row.append(_fmt_int(c.cached_input_tokens))
+ row.append(_fmt_tokens(c.completion_tokens))
+ if show_rsn:
+ row.append(_fmt_int(c.reasoning_tokens))
+ row.extend(
+ [
+ _fmt_tps(c.tokens_per_sec),
+ Text(_fmt_ms(c.ttft_ms), style=_ttft_style(c.ttft_ms)),
+ Text(_fmt_ms(c.duration_ms), style=_latency_style(c.duration_ms)),
+ _fmt_cost(c.cost_usd),
+ _status_text(c.status_code),
+ ]
+ )
+ table.add_row(*row)
+ return table
+
+
+def _last_error(calls: list[LLMCall]) -> LLMCall | None:
+ for c in reversed(calls):
+ if c.status_code is not None and c.status_code >= 400:
+ return c
+ return None
+
+
+def _http_reason(code: int) -> str:
+ try:
+ return HTTPStatus(code).phrase
+ except ValueError:
+ return ""
+
+
+def _fmt_ago(ts: datetime) -> str:
+ # `ts` is produced in collector.py via datetime.now(tz=...), but fall back
+ # gracefully if a naive timestamp ever sneaks in.
+ now = datetime.now(tz=ts.tzinfo) if ts.tzinfo else datetime.now()
+ delta = (now - ts).total_seconds()
+ if delta < 0:
+ delta = 0
+ if delta < 60:
+ return f"{int(delta)}s ago"
+ if delta < 3600:
+ return f"{int(delta // 60)}m ago"
+ return f"{int(delta // 3600)}h ago"
+
+
+def _error_banner(call: LLMCall) -> Panel:
+ code = call.status_code or 0
+ border = "red" if code >= 500 else "yellow"
+ header = Text()
+ header.append(f"● {code}", style=f"{border} bold")
+ reason = _http_reason(code)
+ if reason:
+ header.append(f" {reason}", style=border)
+ header.append(" · ", style="dim")
+ header.append(_truncate_model(call.model, 48), style="cyan")
+ if call.route_name:
+ header.append(" · ", style="dim")
+ header.append(call.route_name, style="yellow")
+ header.append(" · ", style="dim")
+ header.append(_fmt_ago(call.timestamp), style="dim")
+ if call.request_id:
+ header.append(" · req ", style="dim")
+ header.append(call.request_id, style="magenta")
+ return Panel(
+ header,
+ title="[bold]last error[/]",
+ title_align="left",
+ border_style=border,
+ box=SIMPLE,
+ padding=(0, 1),
+ )
+
+
+def _footer(stats: AggregateStats) -> Text:
+ waiting = stats.count == 0
+ text = Text()
+ text.append("Ctrl-C ", style="bold")
+ text.append("exit", style="dim")
+ text.append(" · OTLP :4317", style="dim")
+ text.append(" · pricing: DigitalOcean ", style="dim")
+ if waiting:
+ text.append("waiting for spans", style="yellow")
+ text.append(
+ " — set tracing.opentracing_grpc_endpoint=localhost:4317", style="dim"
+ )
+ else:
+ text.append(f"receiving · {stats.count:,} call(s) buffered", style="green")
+ return text
+
+
+def render(calls: list[LLMCall]) -> Align:
+ last = calls[-1] if calls else None
+ stats = aggregates(calls)
+ rollups = model_rollups(calls)
+ hits = route_hits(calls)
+
+ parts: list[object] = [_summary_panel(last, stats)]
+ err = _last_error(calls)
+ if err is not None:
+ parts.append(_error_banner(err))
+ if hits:
+ split = Table.grid(padding=(0, 2))
+ split.add_column(no_wrap=False)
+ split.add_column(no_wrap=False)
+ split.add_row(_model_rollup_table(rollups), _route_hit_table(hits))
+ parts.append(split)
+ else:
+ parts.append(_model_rollup_table(rollups))
+ parts.append(_recent_table(calls))
+ parts.append(_footer(stats))
+ # Cap overall width so wide terminals don't stretch the layout into a
+ # mostly-whitespace gap between columns.
+ return Align.left(Group(*parts), width=MAX_WIDTH)
diff --git a/cli/planoai/obs_cmd.py b/cli/planoai/obs_cmd.py
new file mode 100644
index 00000000..6249df30
--- /dev/null
+++ b/cli/planoai/obs_cmd.py
@@ -0,0 +1,99 @@
+"""`planoai obs` — live observability TUI."""
+
+from __future__ import annotations
+
+import time
+
+import rich_click as click
+from rich.console import Console
+from rich.live import Live
+
+from planoai.consts import PLANO_COLOR
+from planoai.obs.collector import (
+ DEFAULT_CAPACITY,
+ DEFAULT_GRPC_PORT,
+ LLMCallStore,
+ ObsCollector,
+)
+from planoai.obs.pricing import PricingCatalog
+from planoai.obs.render import render
+
+
+@click.command(name="obs", help="Live observability console for Plano LLM traffic.")
+@click.option(
+ "--port",
+ type=int,
+ default=DEFAULT_GRPC_PORT,
+ show_default=True,
+ help="OTLP/gRPC port to listen on. Must match the brightstaff tracing endpoint.",
+)
+@click.option(
+ "--host",
+ type=str,
+ default="0.0.0.0",
+ show_default=True,
+ help="Host to bind the OTLP listener.",
+)
+@click.option(
+ "--capacity",
+ type=int,
+ default=DEFAULT_CAPACITY,
+ show_default=True,
+ help="Max LLM calls kept in memory; older calls evicted FIFO.",
+)
+@click.option(
+ "--refresh-ms",
+ type=int,
+ default=500,
+ show_default=True,
+ help="TUI refresh interval.",
+)
+def obs(port: int, host: str, capacity: int, refresh_ms: int) -> None:
+ console = Console()
+ console.print(
+ f"[bold {PLANO_COLOR}]planoai obs[/] — loading DO pricing catalog...",
+ end="",
+ )
+ pricing = PricingCatalog.fetch()
+ if len(pricing):
+ sample = ", ".join(pricing.sample_models(3))
+ console.print(
+ f" [green]{len(pricing)} models loaded[/] [dim]({sample}, ...)[/]"
+ )
+ else:
+ console.print(
+ " [yellow]no pricing loaded[/] — "
+ "[dim]cost column will be blank (DO catalog unreachable)[/]"
+ )
+
+ store = LLMCallStore(capacity=capacity)
+ collector = ObsCollector(store=store, pricing=pricing, host=host, port=port)
+ try:
+ collector.start()
+ except OSError as exc:
+ console.print(f"[red]{exc}[/]")
+ raise SystemExit(1)
+
+ console.print(
+ f"Listening for OTLP spans on [bold]{host}:{port}[/]. "
+ "Ensure plano config has [cyan]tracing.opentracing_grpc_endpoint: http://localhost:4317[/] "
+ "and [cyan]tracing.random_sampling: 100[/] (or run [bold]planoai up[/] "
+ "with no config — it wires this automatically)."
+ )
+ console.print("Press [bold]Ctrl-C[/] to exit.\n")
+
+ refresh = max(0.05, refresh_ms / 1000.0)
+ try:
+ with Live(
+ render(store.snapshot()),
+ console=console,
+ refresh_per_second=1.0 / refresh,
+ screen=False,
+ ) as live:
+ while True:
+ time.sleep(refresh)
+ live.update(render(store.snapshot()))
+ except KeyboardInterrupt:
+ console.print("\n[dim]obs stopped[/]")
+ finally:
+ collector.stop()
diff --git a/cli/planoai/rich_click_config.py b/cli/planoai/rich_click_config.py
index ba75bc23..fe90dcf1 100644
--- a/cli/planoai/rich_click_config.py
+++ b/cli/planoai/rich_click_config.py
@@ -61,7 +61,7 @@ def configure_rich_click(plano_color: str) -> None:
},
{
"name": "Observability",
- "commands": ["trace"],
+ "commands": ["trace", "obs"],
},
{
"name": "Utilities",
diff --git a/cli/planoai/utils.py b/cli/planoai/utils.py
index 8f73bf18..214fd0a3 100644
--- a/cli/planoai/utils.py
+++ b/cli/planoai/utils.py
@@ -91,7 +91,12 @@ def convert_legacy_listeners(
"type": "model",
"port": 12000,
"address": "0.0.0.0",
- "timeout": "30s",
+ # LLM streaming responses routinely exceed 30s (extended thinking,
+ # long tool reasoning, large completions). Match the 300s ceiling
+ # used by the direct upstream-provider routes so Envoy doesn't
+ # abort streams with UT mid-response. Users can override via their
+ # plano_config.yaml `listeners.timeout` field.
+ "timeout": "300s",
"model_providers": model_providers or [],
}
@@ -100,7 +105,7 @@ def convert_legacy_listeners(
"type": "prompt",
"port": 10000,
"address": "0.0.0.0",
- "timeout": "30s",
+ "timeout": "300s",
}
# Handle None case
diff --git a/cli/pyproject.toml b/cli/pyproject.toml
index 1864a915..0be85ed5 100644
--- a/cli/pyproject.toml
+++ b/cli/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "planoai"
-version = "0.4.19"
+version = "0.4.21"
description = "Python-based CLI tool to manage Plano."
authors = [{name = "Katanemo Labs, Inc."}]
readme = "README.md"
diff --git a/cli/test/test_config_generator.py b/cli/test/test_config_generator.py
index 17fa56cc..77b5b480 100644
--- a/cli/test/test_config_generator.py
+++ b/cli/test/test_config_generator.py
@@ -1,7 +1,11 @@
import json
import pytest
+import yaml
from unittest import mock
-from planoai.config_generator import validate_and_render_schema
+from planoai.config_generator import (
+ validate_and_render_schema,
+ migrate_inline_routing_preferences,
+)
@pytest.fixture(autouse=True)
@@ -253,38 +257,72 @@ llm_providers:
base_url: "http://custom.com/api/v2"
provider_interface: openai
+""",
+ },
+ {
+ "id": "vercel_is_supported_provider",
+ "expected_error": None,
+ "plano_config": """
+version: v0.4.0
+
+listeners:
+ - name: llm
+ type: model
+ port: 12000
+
+model_providers:
+ - model: vercel/*
+ base_url: https://ai-gateway.vercel.sh/v1
+ passthrough_auth: true
+
+""",
+ },
+ {
+ "id": "openrouter_is_supported_provider",
+ "expected_error": None,
+ "plano_config": """
+version: v0.4.0
+
+listeners:
+ - name: llm
+ type: model
+ port: 12000
+
+model_providers:
+ - model: openrouter/*
+ base_url: https://openrouter.ai/api/v1
+ passthrough_auth: true
+
""",
},
{
"id": "duplicate_routeing_preference_name",
"expected_error": "Duplicate routing preference name",
"plano_config": """
-version: v0.1.0
+version: v0.4.0
listeners:
- egress_traffic:
- address: 0.0.0.0
+ - name: llm
+ type: model
port: 12000
- message_format: openai
- timeout: 30s
-
-llm_providers:
+model_providers:
- model: openai/gpt-4o-mini
access_key: $OPENAI_API_KEY
default: true
- model: openai/gpt-4o
access_key: $OPENAI_API_KEY
- routing_preferences:
- - name: code understanding
- description: understand and explain existing code snippets, functions, or libraries
- - model: openai/gpt-4.1
- access_key: $OPENAI_API_KEY
- routing_preferences:
- - name: code understanding
- description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
+routing_preferences:
+ - name: code understanding
+ description: understand and explain existing code snippets, functions, or libraries
+ models:
+ - openai/gpt-4o
+ - name: code understanding
+ description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
+ models:
+ - openai/gpt-4o-mini
tracing:
random_sampling: 100
@@ -465,3 +503,238 @@ def test_convert_legacy_llm_providers_no_prompt_gateway():
"port": 12000,
"timeout": "30s",
}
+
+
+def test_inline_routing_preferences_migrated_to_top_level():
+ plano_config = """
+version: v0.3.0
+
+listeners:
+ - type: model
+ name: model_listener
+ port: 12000
+
+model_providers:
+ - model: openai/gpt-4o-mini
+ access_key: $OPENAI_API_KEY
+ default: true
+
+ - model: openai/gpt-4o
+ access_key: $OPENAI_API_KEY
+ routing_preferences:
+ - name: code understanding
+ description: understand and explain existing code snippets, functions, or libraries
+
+ - model: anthropic/claude-sonnet-4-20250514
+ access_key: $ANTHROPIC_API_KEY
+ routing_preferences:
+ - name: code generation
+ description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
+"""
+ config_yaml = yaml.safe_load(plano_config)
+ migrate_inline_routing_preferences(config_yaml)
+
+ assert config_yaml["version"] == "v0.4.0"
+ for provider in config_yaml["model_providers"]:
+ assert "routing_preferences" not in provider
+
+ top_level = config_yaml["routing_preferences"]
+ by_name = {entry["name"]: entry for entry in top_level}
+ assert set(by_name) == {"code understanding", "code generation"}
+ assert by_name["code understanding"]["models"] == ["openai/gpt-4o"]
+ assert by_name["code generation"]["models"] == [
+ "anthropic/claude-sonnet-4-20250514"
+ ]
+ assert (
+ by_name["code understanding"]["description"]
+ == "understand and explain existing code snippets, functions, or libraries"
+ )
+
+
+def test_inline_same_name_across_providers_merges_models():
+ plano_config = """
+version: v0.3.0
+
+listeners:
+ - type: model
+ name: model_listener
+ port: 12000
+
+model_providers:
+ - model: openai/gpt-4o
+ access_key: $OPENAI_API_KEY
+ routing_preferences:
+ - name: code generation
+ description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
+
+ - model: anthropic/claude-sonnet-4-20250514
+ access_key: $ANTHROPIC_API_KEY
+ routing_preferences:
+ - name: code generation
+ description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
+"""
+ config_yaml = yaml.safe_load(plano_config)
+ migrate_inline_routing_preferences(config_yaml)
+
+ top_level = config_yaml["routing_preferences"]
+ assert len(top_level) == 1
+ entry = top_level[0]
+ assert entry["name"] == "code generation"
+ assert entry["models"] == [
+ "openai/gpt-4o",
+ "anthropic/claude-sonnet-4-20250514",
+ ]
+ assert config_yaml["version"] == "v0.4.0"
+
+
+def test_existing_top_level_routing_preferences_preserved():
+ plano_config = """
+version: v0.4.0
+
+listeners:
+ - type: model
+ name: model_listener
+ port: 12000
+
+model_providers:
+ - model: openai/gpt-4o
+ access_key: $OPENAI_API_KEY
+ - model: anthropic/claude-sonnet-4-20250514
+ access_key: $ANTHROPIC_API_KEY
+
+routing_preferences:
+ - name: code generation
+ description: generating new code snippets or boilerplate
+ models:
+ - openai/gpt-4o
+ - anthropic/claude-sonnet-4-20250514
+"""
+ config_yaml = yaml.safe_load(plano_config)
+ before = yaml.safe_dump(config_yaml, sort_keys=True)
+ migrate_inline_routing_preferences(config_yaml)
+ after = yaml.safe_dump(config_yaml, sort_keys=True)
+
+ assert before == after
+
+
+def test_existing_top_level_wins_over_inline_migration():
+ plano_config = """
+version: v0.3.0
+
+listeners:
+ - type: model
+ name: model_listener
+ port: 12000
+
+model_providers:
+ - model: openai/gpt-4o
+ access_key: $OPENAI_API_KEY
+ routing_preferences:
+ - name: code generation
+ description: inline description should lose
+
+routing_preferences:
+ - name: code generation
+ description: user-defined top-level description wins
+ models:
+ - openai/gpt-4o
+"""
+ config_yaml = yaml.safe_load(plano_config)
+ migrate_inline_routing_preferences(config_yaml)
+
+ top_level = config_yaml["routing_preferences"]
+ assert len(top_level) == 1
+ entry = top_level[0]
+ assert entry["description"] == "user-defined top-level description wins"
+ assert entry["models"] == ["openai/gpt-4o"]
+
+
+def test_wildcard_with_inline_routing_preferences_errors():
+ plano_config = """
+version: v0.3.0
+
+listeners:
+ - type: model
+ name: model_listener
+ port: 12000
+
+model_providers:
+ - model: openrouter/*
+ base_url: https://openrouter.ai/api/v1
+ passthrough_auth: true
+ routing_preferences:
+ - name: code generation
+ description: generating code
+"""
+ config_yaml = yaml.safe_load(plano_config)
+ with pytest.raises(Exception) as excinfo:
+ migrate_inline_routing_preferences(config_yaml)
+ assert "wildcard" in str(excinfo.value).lower()
+
+
+def test_migration_bumps_version_even_without_inline_preferences():
+ plano_config = """
+version: v0.3.0
+
+listeners:
+ - type: model
+ name: model_listener
+ port: 12000
+
+model_providers:
+ - model: openai/gpt-4o
+ access_key: $OPENAI_API_KEY
+"""
+ config_yaml = yaml.safe_load(plano_config)
+ migrate_inline_routing_preferences(config_yaml)
+
+ assert "routing_preferences" not in config_yaml
+ assert config_yaml["version"] == "v0.4.0"
+
+
+def test_migration_is_noop_on_v040_config_with_stray_inline_preferences():
+ # v0.4.0 configs are assumed to be on the canonical top-level shape.
+ # The migration intentionally does not rescue stray inline preferences
+ # at v0.4.0+ so that the deprecation boundary is a clean version gate.
+ plano_config = """
+version: v0.4.0
+
+listeners:
+ - type: model
+ name: model_listener
+ port: 12000
+
+model_providers:
+ - model: openai/gpt-4o
+ access_key: $OPENAI_API_KEY
+ routing_preferences:
+ - name: code generation
+ description: generating new code
+"""
+ config_yaml = yaml.safe_load(plano_config)
+ migrate_inline_routing_preferences(config_yaml)
+
+ assert config_yaml["version"] == "v0.4.0"
+ assert "routing_preferences" not in config_yaml
+ assert config_yaml["model_providers"][0]["routing_preferences"] == [
+ {"name": "code generation", "description": "generating new code"}
+ ]
+
+
+def test_migration_does_not_downgrade_newer_versions():
+ plano_config = """
+version: v0.5.0
+
+listeners:
+ - type: model
+ name: model_listener
+ port: 12000
+
+model_providers:
+ - model: openai/gpt-4o
+ access_key: $OPENAI_API_KEY
+"""
+ config_yaml = yaml.safe_load(plano_config)
+ migrate_inline_routing_preferences(config_yaml)
+
+ assert config_yaml["version"] == "v0.5.0"
diff --git a/cli/test/test_defaults.py b/cli/test/test_defaults.py
new file mode 100644
index 00000000..7017a70c
--- /dev/null
+++ b/cli/test/test_defaults.py
@@ -0,0 +1,111 @@
+from pathlib import Path
+
+import jsonschema
+import yaml
+
+from planoai.defaults import (
+ PROVIDER_DEFAULTS,
+ detect_providers,
+ synthesize_default_config,
+)
+
+_SCHEMA_PATH = Path(__file__).parents[2] / "config" / "plano_config_schema.yaml"
+
+
+def _schema() -> dict:
+ return yaml.safe_load(_SCHEMA_PATH.read_text())
+
+
+def test_zero_env_vars_produces_pure_passthrough():
+ cfg = synthesize_default_config(env={})
+ assert cfg["version"] == "v0.4.0"
+ assert cfg["listeners"][0]["port"] == 12000
+ for provider in cfg["model_providers"]:
+ assert provider.get("passthrough_auth") is True
+ assert "access_key" not in provider
+ # No provider should be marked default in pure pass-through mode.
+ assert provider.get("default") is not True
+ # All known providers should be listed.
+ names = {p["name"] for p in cfg["model_providers"]}
+ assert "digitalocean" in names
+ assert "vercel" in names
+ assert "openrouter" in names
+ assert "openai" in names
+ assert "anthropic" in names
+
+
+def test_env_keys_promote_providers_to_env_keyed():
+ cfg = synthesize_default_config(
+ env={"OPENAI_API_KEY": "sk-1", "DO_API_KEY": "do-1"}
+ )
+ by_name = {p["name"]: p for p in cfg["model_providers"]}
+ assert by_name["openai"].get("access_key") == "$OPENAI_API_KEY"
+ assert by_name["openai"].get("passthrough_auth") is None
+ assert by_name["digitalocean"].get("access_key") == "$DO_API_KEY"
+ # Unset env keys remain pass-through.
+ assert by_name["anthropic"].get("passthrough_auth") is True
+
+
+def test_no_default_is_synthesized():
+ # Bare model names resolve via brightstaff's wildcard expansion registering
+ # bare keys, so the synthesizer intentionally never sets `default: true`.
+ cfg = synthesize_default_config(
+ env={"OPENAI_API_KEY": "sk-1", "ANTHROPIC_API_KEY": "a-1"}
+ )
+ assert not any(p.get("default") is True for p in cfg["model_providers"])
+
+
+def test_listener_port_is_configurable():
+ cfg = synthesize_default_config(env={}, listener_port=11000)
+ assert cfg["listeners"][0]["port"] == 11000
+
+
+def test_detection_summary_strings():
+ det = detect_providers(env={"OPENAI_API_KEY": "sk", "DO_API_KEY": "d"})
+ summary = det.summary
+ assert "env-keyed" in summary and "openai" in summary and "digitalocean" in summary
+ assert "pass-through" in summary
+
+
+def test_tracing_block_points_at_local_console():
+ cfg = synthesize_default_config(env={})
+ tracing = cfg["tracing"]
+ assert tracing["opentracing_grpc_endpoint"] == "http://localhost:4317"
+ # random_sampling is a percentage in the plano config — 100 = every span.
+ assert tracing["random_sampling"] == 100
+
+
+def test_synthesized_config_validates_against_schema():
+ cfg = synthesize_default_config(env={"OPENAI_API_KEY": "sk"})
+ jsonschema.validate(cfg, _schema())
+
+
+def test_provider_defaults_digitalocean_is_configured():
+ by_name = {p.name: p for p in PROVIDER_DEFAULTS}
+ assert "digitalocean" in by_name
+ assert by_name["digitalocean"].env_var == "DO_API_KEY"
+ assert by_name["digitalocean"].base_url == "https://inference.do-ai.run/v1"
+ assert by_name["digitalocean"].model_pattern == "digitalocean/*"
+
+
+def test_provider_defaults_vercel_is_configured():
+ by_name = {p.name: p for p in PROVIDER_DEFAULTS}
+ assert "vercel" in by_name
+ assert by_name["vercel"].env_var == "AI_GATEWAY_API_KEY"
+ assert by_name["vercel"].base_url == "https://ai-gateway.vercel.sh/v1"
+ assert by_name["vercel"].model_pattern == "vercel/*"
+
+
+def test_provider_defaults_openrouter_is_configured():
+ by_name = {p.name: p for p in PROVIDER_DEFAULTS}
+ assert "openrouter" in by_name
+ assert by_name["openrouter"].env_var == "OPENROUTER_API_KEY"
+ assert by_name["openrouter"].base_url == "https://openrouter.ai/api/v1"
+ assert by_name["openrouter"].model_pattern == "openrouter/*"
+
+
+def test_openrouter_env_key_promotes_to_env_keyed():
+ cfg = synthesize_default_config(env={"OPENROUTER_API_KEY": "or-1"})
+ by_name = {p["name"]: p for p in cfg["model_providers"]}
+ assert by_name["openrouter"].get("access_key") == "$OPENROUTER_API_KEY"
+ assert by_name["openrouter"].get("passthrough_auth") is None
diff --git a/cli/test/test_obs_collector.py b/cli/test/test_obs_collector.py
new file mode 100644
index 00000000..a16506d9
--- /dev/null
+++ b/cli/test/test_obs_collector.py
@@ -0,0 +1,145 @@
+import time
+from datetime import datetime, timezone
+from types import SimpleNamespace
+from unittest.mock import MagicMock
+
+import pytest
+
+from planoai.obs.collector import LLMCall, LLMCallStore, span_to_llm_call
+
+
+def _mk_attr(key: str, value):
+ v = MagicMock()
+ if isinstance(value, bool):
+ v.WhichOneof.return_value = "bool_value"
+ v.bool_value = value
+ elif isinstance(value, int):
+ v.WhichOneof.return_value = "int_value"
+ v.int_value = value
+ elif isinstance(value, float):
+ v.WhichOneof.return_value = "double_value"
+ v.double_value = value
+ else:
+ v.WhichOneof.return_value = "string_value"
+ v.string_value = str(value)
+ kv = MagicMock()
+ kv.key = key
+ kv.value = v
+ return kv
+
+
+def _mk_span(
+ attrs: dict, start_ns: int | None = None, span_id_hex: str = "ab"
+) -> MagicMock:
+ span = MagicMock()
+ span.attributes = [_mk_attr(k, v) for k, v in attrs.items()]
+ span.start_time_unix_nano = start_ns or int(time.time() * 1_000_000_000)
+ span.span_id.hex.return_value = span_id_hex
+ return span
+
+
+def test_span_without_llm_model_is_ignored():
+ span = _mk_span({"http.method": "POST"})
+ assert span_to_llm_call(span, "plano(llm)") is None
+
+
+def test_span_with_full_llm_attrs_produces_call():
+ span = _mk_span(
+ {
+ "llm.model": "openai-gpt-5.4",
+ "model.requested": "router:software-engineering",
+ "plano.session_id": "sess-abc",
+ "plano.route.name": "software-engineering",
+ "llm.is_streaming": False,
+ "llm.duration_ms": 1234,
+ "llm.time_to_first_token": 210,
+ "llm.usage.prompt_tokens": 100,
+ "llm.usage.completion_tokens": 50,
+ "llm.usage.total_tokens": 150,
+ "llm.usage.cached_input_tokens": 30,
+ "llm.usage.cache_creation_tokens": 5,
+ "llm.usage.reasoning_tokens": 200,
+ "http.status_code": 200,
+ "request_id": "req-42",
+ }
+ )
+ call = span_to_llm_call(span, "plano(llm)")
+ assert call is not None
+ assert call.request_id == "req-42"
+ assert call.model == "openai-gpt-5.4"
+ assert call.request_model == "router:software-engineering"
+ assert call.session_id == "sess-abc"
+ assert call.route_name == "software-engineering"
+ assert call.is_streaming is False
+ assert call.duration_ms == 1234.0
+ assert call.ttft_ms == 210.0
+ assert call.prompt_tokens == 100
+ assert call.completion_tokens == 50
+ assert call.total_tokens == 150
+ assert call.cached_input_tokens == 30
+ assert call.cache_creation_tokens == 5
+ assert call.reasoning_tokens == 200
+ assert call.status_code == 200
+
+
+def test_pricing_lookup_attaches_cost():
+ class StubPricing:
+ def cost_for_call(self, call):
+ # Simple: 2 * prompt + 3 * completion, in cents
+ return 0.02 * (call.prompt_tokens or 0) + 0.03 * (
+ call.completion_tokens or 0
+ )
+
+ span = _mk_span(
+ {
+ "llm.model": "do/openai-gpt-5.4",
+ "llm.usage.prompt_tokens": 10,
+ "llm.usage.completion_tokens": 2,
+ }
+ )
+ call = span_to_llm_call(span, "plano(llm)", pricing=StubPricing())
+ assert call is not None
+ assert call.cost_usd == pytest.approx(0.26)
+
+
+def test_tpt_and_tokens_per_sec_derived():
+ call = LLMCall(
+ request_id="x",
+ timestamp=datetime.now(tz=timezone.utc),
+ model="m",
+ duration_ms=1000,
+ ttft_ms=200,
+ completion_tokens=80,
+ )
+ # (1000 - 200) / 80 = 10ms per token => 100 tokens/sec
+ assert call.tpt_ms == 10.0
+ assert call.tokens_per_sec == 100.0
+
+
+def test_tpt_returns_none_when_no_completion_tokens():
+ call = LLMCall(
+ request_id="x",
+ timestamp=datetime.now(tz=timezone.utc),
+ model="m",
+ duration_ms=1000,
+ ttft_ms=200,
+ completion_tokens=0,
+ )
+ assert call.tpt_ms is None
+ assert call.tokens_per_sec is None
+
+
+def test_store_evicts_fifo_at_capacity():
+ store = LLMCallStore(capacity=3)
+ now = datetime.now(tz=timezone.utc)
+ for i in range(5):
+ store.add(
+ LLMCall(
+ request_id=f"r{i}",
+ timestamp=now,
+ model="m",
+ )
+ )
+ snap = store.snapshot()
+ assert len(snap) == 3
+ assert [c.request_id for c in snap] == ["r2", "r3", "r4"]
diff --git a/cli/test/test_obs_pricing.py b/cli/test/test_obs_pricing.py
new file mode 100644
index 00000000..02247d3d
--- /dev/null
+++ b/cli/test/test_obs_pricing.py
@@ -0,0 +1,146 @@
+from datetime import datetime, timezone
+
+from planoai.obs.collector import LLMCall
+from planoai.obs.pricing import ModelPrice, PricingCatalog
+
+
+def _call(model: str, prompt: int, completion: int, cached: int = 0) -> LLMCall:
+ return LLMCall(
+ request_id="r",
+ timestamp=datetime.now(tz=timezone.utc),
+ model=model,
+ prompt_tokens=prompt,
+ completion_tokens=completion,
+ cached_input_tokens=cached,
+ )
+
+
+def test_lookup_matches_bare_and_prefixed():
+ prices = {
+ "openai-gpt-5.4": ModelPrice(
+ input_per_token_usd=0.000001, output_per_token_usd=0.000002
+ )
+ }
+ catalog = PricingCatalog(prices)
+ assert catalog.price_for("openai-gpt-5.4") is not None
+ # do/openai-gpt-5.4 should resolve after stripping the provider prefix.
+ assert catalog.price_for("do/openai-gpt-5.4") is not None
+ assert catalog.price_for("unknown-model") is None
+
+
+def test_cost_computation_without_cache():
+ prices = {
+ "m": ModelPrice(input_per_token_usd=0.000001, output_per_token_usd=0.000002)
+ }
+ cost = PricingCatalog(prices).cost_for_call(_call("m", 1000, 500))
+ assert cost == 0.002 # 1000 * 1e-6 + 500 * 2e-6
+
+
+def test_cost_computation_with_cached_discount():
+ prices = {
+ "m": ModelPrice(
+ input_per_token_usd=0.000001,
+ output_per_token_usd=0.000002,
+ cached_input_per_token_usd=0.0000001,
+ )
+ }
+ # 800 fresh @ 1e-6 = 8e-4; 200 cached @ 1e-7 = 2e-5; 500 out @ 2e-6 = 1e-3
+ cost = PricingCatalog(prices).cost_for_call(_call("m", 1000, 500, cached=200))
+ assert cost == round(0.0008 + 0.00002 + 0.001, 6)
+
+
+def test_empty_catalog_returns_none():
+ assert PricingCatalog().cost_for_call(_call("m", 100, 50)) is None
+
+
+def test_parse_do_catalog_treats_small_values_as_per_token():
+ """DO's real catalog uses per-token values under the `_per_million` key
+ (e.g. 5E-8 for GPT-oss-20b). We treat values < 1 as already per-token."""
+ from planoai.obs.pricing import _parse_do_pricing
+
+ sample = {
+ "data": [
+ {
+ "model_id": "openai-gpt-oss-20b",
+ "pricing": {
+ "input_price_per_million": 5e-8,
+ "output_price_per_million": 4.5e-7,
+ },
+ },
+ {
+ "model_id": "openai-gpt-oss-120b",
+ "pricing": {
+ "input_price_per_million": 1e-7,
+ "output_price_per_million": 7e-7,
+ },
+ },
+ ]
+ }
+ prices = _parse_do_pricing(sample)
+ # Values < 1 are assumed to already be per-token — no extra division.
+ assert prices["openai-gpt-oss-20b"].input_per_token_usd == 5e-8
+ assert prices["openai-gpt-oss-20b"].output_per_token_usd == 4.5e-7
+ assert prices["openai-gpt-oss-120b"].input_per_token_usd == 1e-7
+
+
+def test_anthropic_aliases_match_plano_emitted_names():
+ """DO publishes 'anthropic-claude-opus-4.7' and 'anthropic-claude-haiku-4.5';
+ Plano emits 'claude-opus-4-7' and 'claude-haiku-4-5-20251001'. Aliases
+ registered at parse time should bridge the gap."""
+ from planoai.obs.pricing import _parse_do_pricing
+
+ sample = {
+ "data": [
+ {
+ "model_id": "anthropic-claude-opus-4.7",
+ "pricing": {
+ "input_price_per_million": 15.0,
+ "output_price_per_million": 75.0,
+ },
+ },
+ {
+ "model_id": "anthropic-claude-haiku-4.5",
+ "pricing": {
+ "input_price_per_million": 1.0,
+ "output_price_per_million": 5.0,
+ },
+ },
+ {
+ "model_id": "anthropic-claude-4.6-sonnet",
+ "pricing": {
+ "input_price_per_million": 3.0,
+ "output_price_per_million": 15.0,
+ },
+ },
+ ]
+ }
+ catalog = PricingCatalog(_parse_do_pricing(sample))
+ # Family-last shapes Plano emits.
+ assert catalog.price_for("claude-opus-4-7") is not None
+ assert catalog.price_for("claude-haiku-4-5") is not None
+ # Date-suffixed name (Anthropic API style).
+ assert catalog.price_for("claude-haiku-4-5-20251001") is not None
+ # Word-order swap: DO has 'claude-4.6-sonnet', Plano emits 'claude-sonnet-4-6'.
+ assert catalog.price_for("claude-sonnet-4-6") is not None
+ # Original DO ids still resolve.
+ assert catalog.price_for("anthropic-claude-opus-4.7") is not None
+
+
+def test_parse_do_catalog_divides_large_values_as_per_million():
+ """A provider that genuinely reports $5-per-million in that field gets divided."""
+ from planoai.obs.pricing import _parse_do_pricing
+
+ sample = {
+ "data": [
+ {
+ "model_id": "mystery-model",
+ "pricing": {
+ "input_price_per_million": 5.0, # > 1 → treated as per-million
+ "output_price_per_million": 15.0,
+ },
+ },
+ ]
+ }
+ prices = _parse_do_pricing(sample)
+ assert prices["mystery-model"].input_per_token_usd == 5.0 / 1_000_000
+ assert prices["mystery-model"].output_per_token_usd == 15.0 / 1_000_000
diff --git a/cli/test/test_obs_render.py b/cli/test/test_obs_render.py
new file mode 100644
index 00000000..dd598363
--- /dev/null
+++ b/cli/test/test_obs_render.py
@@ -0,0 +1,106 @@
+from datetime import datetime, timedelta, timezone
+
+from planoai.obs.collector import LLMCall
+from planoai.obs.render import aggregates, model_rollups, route_hits
+
+
+def _call(
+ model: str,
+ ts: datetime,
+ prompt=0,
+ completion=0,
+ cost=None,
+ route=None,
+ session=None,
+ cache_read=0,
+ cache_write=0,
+):
+ return LLMCall(
+ request_id="r",
+ timestamp=ts,
+ model=model,
+ prompt_tokens=prompt,
+ completion_tokens=completion,
+ cached_input_tokens=cache_read,
+ cache_creation_tokens=cache_write,
+ cost_usd=cost,
+ route_name=route,
+ session_id=session,
+ )
+
+
+def test_aggregates_sum_and_session_counts():
+ now = datetime.now(tz=timezone.utc).astimezone()
+ calls = [
+ _call(
+ "m1",
+ now - timedelta(seconds=50),
+ prompt=10,
+ completion=5,
+ cost=0.001,
+ session="s1",
+ ),
+ _call(
+ "m2",
+ now - timedelta(seconds=40),
+ prompt=20,
+ completion=10,
+ cost=0.002,
+ session="s1",
+ ),
+ _call(
+ "m1",
+ now - timedelta(seconds=30),
+ prompt=30,
+ completion=15,
+ cost=0.003,
+ session="s2",
+ ),
+ ]
+ stats = aggregates(calls)
+ assert stats.count == 3
+ assert stats.total_cost_usd == 0.006
+ assert stats.total_input_tokens == 60
+ assert stats.total_output_tokens == 30
+ assert stats.distinct_sessions == 2
+ assert stats.current_session == "s2"
+
+
+def test_rollups_split_by_model_and_cache():
+ now = datetime.now(tz=timezone.utc).astimezone()
+ calls = [
+ _call(
+ "m1", now, prompt=10, completion=5, cost=0.001, cache_write=3, cache_read=7
+ ),
+ _call("m1", now, prompt=20, completion=10, cost=0.002, cache_read=1),
+ _call("m2", now, prompt=30, completion=15, cost=0.004),
+ ]
+ rollups = model_rollups(calls)
+ by_model = {r.model: r for r in rollups}
+ assert by_model["m1"].requests == 2
+ assert by_model["m1"].input_tokens == 30
+ assert by_model["m1"].cache_write == 3
+ assert by_model["m1"].cache_read == 8
+ assert by_model["m2"].input_tokens == 30
+
+
+def test_route_hits_only_for_routed_calls():
+ now = datetime.now(tz=timezone.utc).astimezone()
+ calls = [
+ _call("m", now, route="code"),
+ _call("m", now, route="code"),
+ _call("m", now, route="summarization"),
+ _call("m", now), # no route
+ ]
+ hits = route_hits(calls)
+ # Only calls with route names are counted.
+ assert sum(h.hits for h in hits) == 3
+ hits_by_name = {h.route: h for h in hits}
+ assert hits_by_name["code"].hits == 2
+ assert hits_by_name["summarization"].hits == 1
+
+
+def test_route_hits_empty_when_no_routes():
+ now = datetime.now(tz=timezone.utc).astimezone()
+ calls = [_call("m", now), _call("m", now)]
+ assert route_hits(calls) == []
diff --git a/cli/uv.lock b/cli/uv.lock
index 665ebdb8..8910b0a4 100644
--- a/cli/uv.lock
+++ b/cli/uv.lock
@@ -337,7 +337,7 @@ wheels = [
[[package]]
name = "planoai"
-version = "0.4.18"
+version = "0.4.21"
source = { editable = "." }
dependencies = [
{ name = "click" },
diff --git a/config/envoy.template.yaml b/config/envoy.template.yaml
index 5669511d..b2b9fb1f 100644
--- a/config/envoy.template.yaml
+++ b/config/envoy.template.yaml
@@ -901,6 +901,33 @@ static_resources:
validation_context:
trusted_ca:
filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }}
+ - name: digitalocean
+ connect_timeout: {{ upstream_connect_timeout | default('5s') }}
+ type: LOGICAL_DNS
+ dns_lookup_family: V4_ONLY
+ lb_policy: ROUND_ROBIN
+ load_assignment:
+ cluster_name: digitalocean
+ endpoints:
+ - lb_endpoints:
+ - endpoint:
+ address:
+ socket_address:
+ address: inference.do-ai.run
+ port_value: 443
+ hostname: "inference.do-ai.run"
+ transport_socket:
+ name: envoy.transport_sockets.tls
+ typed_config:
+ "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
+ sni: inference.do-ai.run
+ common_tls_context:
+ tls_params:
+ tls_minimum_protocol_version: TLSv1_2
+ tls_maximum_protocol_version: TLSv1_3
+ validation_context:
+ trusted_ca:
+ filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }}
- name: xiaomi
connect_timeout: {{ upstream_connect_timeout | default('5s') }}
type: LOGICAL_DNS
diff --git a/config/grafana/brightstaff_dashboard.json b/config/grafana/brightstaff_dashboard.json
new file mode 100644
index 00000000..4b54721f
--- /dev/null
+++ b/config/grafana/brightstaff_dashboard.json
@@ -0,0 +1,541 @@
+{
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "description": "RED, LLM upstream, routing service, and process metrics for brightstaff. Pair with Envoy admin metrics from cluster=bright_staff.",
+ "editable": true,
+ "fiscalYearStartMonth": 0,
+ "graphTooltip": 1,
+ "id": null,
+ "links": [],
+ "liveNow": false,
+ "panels": [
+ {
+ "collapsed": false,
+ "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 },
+ "id": 100,
+ "panels": [],
+ "title": "HTTP RED",
+ "type": "row"
+ },
+ {
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+ "fieldConfig": {
+ "defaults": {
+ "color": { "mode": "palette-classic" },
+ "custom": {
+ "axisLabel": "req/s",
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "lineWidth": 1,
+ "showPoints": "never"
+ },
+ "unit": "reqps"
+ }
+ },
+ "gridPos": { "h": 8, "w": 12, "x": 0, "y": 1 },
+ "id": 1,
+ "options": {
+ "legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
+ "tooltip": { "mode": "multi" }
+ },
+ "targets": [
+ {
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+ "expr": "sum by (handler) (rate(brightstaff_http_requests_total[1m]))",
+ "legendFormat": "{{handler}}",
+ "refId": "A"
+ }
+ ],
+ "title": "Rate — brightstaff RPS by handler",
+ "type": "timeseries"
+ },
+ {
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+ "description": "5xx fraction over 5m. Page-worthy when sustained above ~1%.",
+ "fieldConfig": {
+ "defaults": {
+ "color": { "mode": "thresholds" },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ { "color": "green", "value": null },
+ { "color": "yellow", "value": 0.01 },
+ { "color": "red", "value": 0.05 }
+ ]
+ },
+ "unit": "percentunit"
+ }
+ },
+ "gridPos": { "h": 8, "w": 12, "x": 12, "y": 1 },
+ "id": 2,
+ "options": {
+ "colorMode": "background",
+ "graphMode": "area",
+ "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
+ },
+ "targets": [
+ {
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+ "expr": "sum(rate(brightstaff_http_requests_total{status_class=\"5xx\"}[5m])) / clamp_min(sum(rate(brightstaff_http_requests_total[5m])), 1)",
+ "legendFormat": "5xx rate",
+ "refId": "A"
+ }
+ ],
+ "title": "Errors — brightstaff 5xx rate",
+ "type": "stat"
+ },
+ {
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+ "description": "p50/p95/p99 by handler, computed from histogram buckets over 5m.",
+ "fieldConfig": {
+ "defaults": {
+ "color": { "mode": "palette-classic" },
+ "custom": { "drawStyle": "line", "fillOpacity": 5, "lineWidth": 1, "showPoints": "never" },
+ "unit": "s"
+ }
+ },
+ "gridPos": { "h": 9, "w": 24, "x": 0, "y": 9 },
+ "id": 3,
+ "options": {
+ "legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
+ "tooltip": { "mode": "multi" }
+ },
+ "targets": [
+ {
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+ "expr": "histogram_quantile(0.50, sum by (le, handler) (rate(brightstaff_http_request_duration_seconds_bucket[5m])))",
+ "legendFormat": "p50 {{handler}}",
+ "refId": "A"
+ },
+ {
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+ "expr": "histogram_quantile(0.95, sum by (le, handler) (rate(brightstaff_http_request_duration_seconds_bucket[5m])))",
+ "legendFormat": "p95 {{handler}}",
+ "refId": "B"
+ },
+ {
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+ "expr": "histogram_quantile(0.99, sum by (le, handler) (rate(brightstaff_http_request_duration_seconds_bucket[5m])))",
+ "legendFormat": "p99 {{handler}}",
+ "refId": "C"
+ }
+ ],
+ "title": "Duration — p50 / p95 / p99 by handler",
+ "type": "timeseries"
+ },
+ {
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+ "description": "In-flight requests by handler. Climbs before latency does when brightstaff is saturated.",
+ "fieldConfig": {
+ "defaults": {
+ "color": { "mode": "palette-classic" },
+ "custom": { "drawStyle": "line", "fillOpacity": 10, "lineWidth": 1, "showPoints": "never" },
+ "unit": "short"
+ }
+ },
+ "gridPos": { "h": 8, "w": 24, "x": 0, "y": 18 },
+ "id": 4,
+ "options": {
+ "legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
+ "tooltip": { "mode": "multi" }
+ },
+ "targets": [
+ {
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+ "expr": "sum by (handler) (brightstaff_http_in_flight_requests)",
+ "legendFormat": "{{handler}}",
+ "refId": "A"
+ }
+ ],
+ "title": "In-flight requests by handler",
+ "type": "timeseries"
+ },
+ {
+ "collapsed": false,
+ "gridPos": { "h": 1, "w": 24, "x": 0, "y": 26 },
+ "id": 200,
+ "panels": [],
+ "title": "LLM upstream",
+ "type": "row"
+ },
+ {
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+ "fieldConfig": {
+ "defaults": {
+ "color": { "mode": "palette-classic" },
+ "custom": { "drawStyle": "line", "fillOpacity": 5, "lineWidth": 1, "showPoints": "never" },
+ "unit": "s"
+ }
+ },
+ "gridPos": { "h": 9, "w": 12, "x": 0, "y": 27 },
+ "id": 5,
+ "options": {
+ "legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
+ "tooltip": { "mode": "multi" }
+ },
+ "targets": [
+ {
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+ "expr": "histogram_quantile(0.95, sum by (le, provider, model) (rate(brightstaff_llm_upstream_duration_seconds_bucket[5m])))",
+ "legendFormat": "p95 {{provider}}/{{model}}",
+ "refId": "A"
+ }
+ ],
+ "title": "LLM upstream p95 by provider/model",
+ "type": "timeseries"
+ },
+ {
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+ "description": "All non-success error classes. timeout/connect = network, 5xx/429 = provider, parse = body shape mismatch, stream = mid-stream disconnect.",
+ "fieldConfig": {
+ "defaults": {
+ "color": { "mode": "palette-classic" },
+ "custom": { "drawStyle": "line", "fillOpacity": 30, "lineWidth": 1, "showPoints": "never", "stacking": { "mode": "normal" } },
+ "unit": "reqps"
+ }
+ },
+ "gridPos": { "h": 9, "w": 12, "x": 12, "y": 27 },
+ "id": 6,
+ "options": {
+ "legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
+ "tooltip": { "mode": "multi" }
+ },
+ "targets": [
+ {
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+ "expr": "sum by (provider, error_class) (rate(brightstaff_llm_upstream_requests_total{error_class!=\"none\"}[5m]))",
+ "legendFormat": "{{provider}} / {{error_class}}",
+ "refId": "A"
+ }
+ ],
+ "title": "LLM upstream errors by provider / class",
+ "type": "timeseries"
+ },
+ {
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+ "description": "Streaming only. Empty if the route never streams.",
+ "fieldConfig": {
+ "defaults": {
+ "color": { "mode": "palette-classic" },
+ "custom": { "drawStyle": "line", "fillOpacity": 5, "lineWidth": 1, "showPoints": "never" },
+ "unit": "s"
+ }
+ },
+ "gridPos": { "h": 9, "w": 12, "x": 0, "y": 36 },
+ "id": 7,
+ "options": {
+ "legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
+ "tooltip": { "mode": "multi" }
+ },
+ "targets": [
+ {
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+ "expr": "histogram_quantile(0.95, sum by (le, provider, model) (rate(brightstaff_llm_time_to_first_token_seconds_bucket[5m])))",
+ "legendFormat": "p95 {{provider}}/{{model}}",
+ "refId": "A"
+ }
+ ],
+ "title": "Time-to-first-token p95 (streaming)",
+ "type": "timeseries"
+ },
+ {
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+ "description": "Tokens/sec by provider/model/kind — proxy for cost. Stacked.",
+ "fieldConfig": {
+ "defaults": {
+ "color": { "mode": "palette-classic" },
+ "custom": { "drawStyle": "line", "fillOpacity": 30, "lineWidth": 1, "showPoints": "never", "stacking": { "mode": "normal" } },
+ "unit": "tokens/s"
+ }
+ },
+ "gridPos": { "h": 9, "w": 12, "x": 12, "y": 36 },
+ "id": 8,
+ "options": {
+ "legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
+ "tooltip": { "mode": "multi" }
+ },
+ "targets": [
+ {
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+ "expr": "sum by (provider, model, kind) (rate(brightstaff_llm_tokens_total[5m]))",
+ "legendFormat": "{{provider}}/{{model}} {{kind}}",
+ "refId": "A"
+ }
+ ],
+ "title": "Token throughput by provider / model / kind",
+ "type": "timeseries"
+ },
+ {
+ "collapsed": false,
+ "gridPos": { "h": 1, "w": 24, "x": 0, "y": 45 },
+ "id": 300,
+ "panels": [],
+ "title": "Routing service",
+ "type": "row"
+ },
+ {
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+ "description": "Which models the orchestrator picked over the last 15 minutes.",
+ "fieldConfig": {
+ "defaults": {
+ "color": { "mode": "palette-classic" },
+ "unit": "short"
+ }
+ },
+ "gridPos": { "h": 9, "w": 12, "x": 0, "y": 46 },
+ "id": 9,
+ "options": {
+ "displayMode": "gradient",
+ "orientation": "horizontal",
+ "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
+ },
+ "targets": [
+ {
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+ "expr": "sum by (selected_model) (increase(brightstaff_router_decisions_total[15m]))",
+ "legendFormat": "{{selected_model}}",
+ "refId": "A"
+ }
+ ],
+ "title": "Model selection distribution (last 15m)",
+ "type": "bargauge"
+ },
+ {
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+ "description": "Fraction of decisions that fell back (orchestrator returned `none` or errored). High = router can't classify intent or no candidates configured.",
+ "fieldConfig": {
+ "defaults": {
+ "color": { "mode": "palette-classic" },
+ "custom": { "drawStyle": "line", "fillOpacity": 10, "lineWidth": 1, "showPoints": "never" },
+ "unit": "percentunit"
+ }
+ },
+ "gridPos": { "h": 9, "w": 12, "x": 12, "y": 46 },
+ "id": 10,
+ "options": {
+ "legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
+ "tooltip": { "mode": "multi" }
+ },
+ "targets": [
+ {
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+ "expr": "sum by (route) (rate(brightstaff_router_decisions_total{fallback=\"true\"}[5m])) / clamp_min(sum by (route) (rate(brightstaff_router_decisions_total[5m])), 1)",
+ "legendFormat": "{{route}}",
+ "refId": "A"
+ }
+ ],
+ "title": "Fallback rate by route",
+ "type": "timeseries"
+ },
+ {
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+ "fieldConfig": {
+ "defaults": {
+ "color": { "mode": "palette-classic" },
+ "custom": { "drawStyle": "line", "fillOpacity": 5, "lineWidth": 1, "showPoints": "never" },
+ "unit": "s"
+ }
+ },
+ "gridPos": { "h": 8, "w": 12, "x": 0, "y": 55 },
+ "id": 11,
+ "options": {
+ "legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
+ "tooltip": { "mode": "multi" }
+ },
+ "targets": [
+ {
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+ "expr": "histogram_quantile(0.95, sum by (le, route) (rate(brightstaff_router_decision_duration_seconds_bucket[5m])))",
+ "legendFormat": "p95 {{route}}",
+ "refId": "A"
+ }
+ ],
+ "title": "Router decision p95 latency",
+ "type": "timeseries"
+ },
+ {
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+ "description": "Hit / (hit + miss). Low ratio = sessions aren't being reused or TTL too short.",
+ "fieldConfig": {
+ "defaults": {
+ "color": { "mode": "thresholds" },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ { "color": "red", "value": null },
+ { "color": "yellow", "value": 0.5 },
+ { "color": "green", "value": 0.8 }
+ ]
+ },
+ "unit": "percentunit",
+ "min": 0,
+ "max": 1
+ }
+ },
+ "gridPos": { "h": 8, "w": 6, "x": 12, "y": 55 },
+ "id": 12,
+ "options": {
+ "colorMode": "background",
+ "graphMode": "area",
+ "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
+ },
+ "targets": [
+ {
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+ "expr": "sum(rate(brightstaff_session_cache_events_total{outcome=\"hit\"}[5m])) / clamp_min(sum(rate(brightstaff_session_cache_events_total{outcome=~\"hit|miss\"}[5m])), 1)",
+ "legendFormat": "hit rate",
+ "refId": "A"
+ }
+ ],
+ "title": "Session cache hit rate",
+ "type": "stat"
+ },
+ {
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+ "description": "decision_served = a real model picked. no_candidates = sentinel `none` returned. policy_error = orchestrator failed.",
+ "fieldConfig": {
+ "defaults": {
+ "color": { "mode": "palette-classic" },
+ "custom": { "drawStyle": "line", "fillOpacity": 30, "lineWidth": 1, "showPoints": "never", "stacking": { "mode": "normal" } },
+ "unit": "reqps"
+ }
+ },
+ "gridPos": { "h": 8, "w": 6, "x": 18, "y": 55 },
+ "id": 13,
+ "options": {
+ "legend": { "displayMode": "list", "placement": "bottom", "showLegend": true },
+ "tooltip": { "mode": "multi" }
+ },
+ "targets": [
+ {
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+ "expr": "sum by (outcome) (rate(brightstaff_routing_service_requests_total[5m]))",
+ "legendFormat": "{{outcome}}",
+ "refId": "A"
+ }
+ ],
+ "title": "/routing/* outcomes",
+ "type": "timeseries"
+ },
+ {
+ "collapsed": false,
+ "gridPos": { "h": 1, "w": 24, "x": 0, "y": 63 },
+ "id": 400,
+ "panels": [],
+ "title": "Process & Envoy link",
+ "type": "row"
+ },
+ {
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+ "description": "Compare to brightstaff RPS (panel 1) — sustained gap = network or Envoy queueing.",
+ "fieldConfig": {
+ "defaults": {
+ "color": { "mode": "palette-classic" },
+ "custom": { "drawStyle": "line", "fillOpacity": 10, "lineWidth": 1, "showPoints": "never" },
+ "unit": "reqps"
+ }
+ },
+ "gridPos": { "h": 8, "w": 12, "x": 0, "y": 64 },
+ "id": 14,
+ "options": {
+ "legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
+ "tooltip": { "mode": "multi" }
+ },
+ "targets": [
+ {
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+ "expr": "sum(rate(envoy_cluster_upstream_rq_total{envoy_cluster_name=\"bright_staff\"}[1m]))",
+ "legendFormat": "envoy → bright_staff",
+ "refId": "A"
+ },
+ {
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+ "expr": "sum(rate(brightstaff_http_requests_total[1m]))",
+ "legendFormat": "brightstaff served",
+ "refId": "B"
+ }
+ ],
+ "title": "Envoy → brightstaff link health",
+ "type": "timeseries"
+ },
+ {
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+ "fieldConfig": {
+ "defaults": {
+ "color": { "mode": "palette-classic" },
+ "custom": { "drawStyle": "line", "fillOpacity": 10, "lineWidth": 1, "showPoints": "never" }
+ },
+ "overrides": [
+ {
+ "matcher": { "id": "byName", "options": "RSS" },
+ "properties": [{ "id": "unit", "value": "bytes" }]
+ },
+ {
+ "matcher": { "id": "byName", "options": "CPU" },
+ "properties": [{ "id": "unit", "value": "percentunit" }]
+ }
+ ]
+ },
+ "gridPos": { "h": 8, "w": 12, "x": 12, "y": 64 },
+ "id": 15,
+ "options": {
+ "legend": { "displayMode": "table", "placement": "bottom", "showLegend": true },
+ "tooltip": { "mode": "multi" }
+ },
+ "targets": [
+ {
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+ "expr": "process_resident_memory_bytes{job=\"brightstaff\"}",
+ "legendFormat": "RSS",
+ "refId": "A"
+ },
+ {
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+ "expr": "rate(process_cpu_seconds_total{job=\"brightstaff\"}[1m])",
+ "legendFormat": "CPU",
+ "refId": "B"
+ }
+ ],
+ "title": "Brightstaff process RSS / CPU",
+ "type": "timeseries"
+ }
+ ],
+ "refresh": "30s",
+ "schemaVersion": 39,
+ "tags": ["plano", "brightstaff", "llm"],
+ "templating": {
+ "list": [
+ {
+ "name": "DS_PROMETHEUS",
+ "label": "Prometheus",
+ "type": "datasource",
+ "query": "prometheus",
+ "current": { "selected": false, "text": "Prometheus", "value": "DS_PROMETHEUS" },
+ "hide": 0,
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "includeAll": false,
+ "multi": false
+ }
+ ]
+ },
+ "time": { "from": "now-1h", "to": "now" },
+ "timepicker": {},
+ "timezone": "browser",
+ "title": "Brightstaff (Plano dataplane)",
+ "uid": "brightstaff",
+ "version": 1,
+ "weekStart": ""
+}
diff --git a/config/grafana/docker-compose.yaml b/config/grafana/docker-compose.yaml
new file mode 100644
index 00000000..33238073
--- /dev/null
+++ b/config/grafana/docker-compose.yaml
@@ -0,0 +1,43 @@
+# One-command Prometheus + Grafana stack for observing a locally-running
+# Plano (Envoy admin :9901 + brightstaff :9092 on the host).
+#
+# cd config/grafana
+# docker compose up -d
+# open http://localhost:3000 (admin / admin)
+#
+# Grafana is preloaded with:
+# - Prometheus datasource (uid=DS_PROMETHEUS) → http://prometheus:9090
+# - Brightstaff dashboard (auto-imported from brightstaff_dashboard.json)
+#
+# Prometheus scrapes the host's :9092 and :9901 via host.docker.internal.
+# On Linux this works because of the `extra_hosts: host-gateway` mapping
+# below. On Mac/Win it works natively.
+
+services:
+ prometheus:
+ image: prom/prometheus:latest
+ container_name: plano-prometheus
+ ports:
+ - "9090:9090"
+ volumes:
+ - ./prometheus_scrape.yaml:/etc/prometheus/prometheus.yml:ro
+ extra_hosts:
+ - "host.docker.internal:host-gateway"
+ restart: unless-stopped
+
+ grafana:
+ image: grafana/grafana:latest
+ container_name: plano-grafana
+ ports:
+ - "3000:3000"
+ environment:
+ GF_SECURITY_ADMIN_USER: admin
+ GF_SECURITY_ADMIN_PASSWORD: admin
+ GF_AUTH_ANONYMOUS_ENABLED: "true"
+ GF_AUTH_ANONYMOUS_ORG_ROLE: Viewer
+ volumes:
+ - ./provisioning:/etc/grafana/provisioning:ro
+ - ./brightstaff_dashboard.json:/var/lib/grafana/dashboards/brightstaff_dashboard.json:ro
+ depends_on:
+ - prometheus
+ restart: unless-stopped
diff --git a/config/grafana/prometheus_scrape.yaml b/config/grafana/prometheus_scrape.yaml
new file mode 100644
index 00000000..b4041287
--- /dev/null
+++ b/config/grafana/prometheus_scrape.yaml
@@ -0,0 +1,44 @@
+# Prometheus config that scrapes Plano (Envoy admin + brightstaff). This is
+# a complete Prometheus config — mount it directly at
+# /etc/prometheus/prometheus.yml. The included docker-compose.yaml does this
+# for you.
+#
+# Targets:
+# - envoy:9901 Envoy admin → envoy_cluster_*, envoy_http_*, envoy_server_*.
+# - brightstaff:9092 Native dataplane → brightstaff_http_*, brightstaff_llm_*,
+# brightstaff_router_*, process_*.
+#
+# Hostname `host.docker.internal` works on Docker Desktop (Mac/Win) and on
+# Linux when the container is started with `--add-host=host.docker.internal:
+# host-gateway` (the included compose does this). If Plano runs *inside*
+# Docker on the same network as Prometheus, replace it with the container
+# name (e.g. `plano:9092`).
+#
+# This file is unrelated to demos/llm_routing/model_routing_service/prometheus.yaml,
+# which scrapes a fake metrics service to feed the routing engine.
+
+global:
+ scrape_interval: 15s
+ scrape_timeout: 10s
+ evaluation_interval: 15s
+
+scrape_configs:
+ - job_name: envoy
+ honor_timestamps: true
+ metrics_path: /stats
+ params:
+ format: ["prometheus"]
+ static_configs:
+ - targets:
+ - host.docker.internal:9901
+ labels:
+ service: plano
+
+ - job_name: brightstaff
+ honor_timestamps: true
+ metrics_path: /metrics
+ static_configs:
+ - targets:
+ - host.docker.internal:9092
+ labels:
+ service: plano
diff --git a/config/grafana/provisioning/dashboards/brightstaff.yaml b/config/grafana/provisioning/dashboards/brightstaff.yaml
new file mode 100644
index 00000000..271e4a9b
--- /dev/null
+++ b/config/grafana/provisioning/dashboards/brightstaff.yaml
@@ -0,0 +1,15 @@
+# Auto-load the brightstaff dashboard JSON on Grafana startup.
+
+apiVersion: 1
+
+providers:
+ - name: brightstaff
+ orgId: 1
+ folder: Plano
+ type: file
+ disableDeletion: false
+ updateIntervalSeconds: 30
+ allowUiUpdates: true
+ options:
+ path: /var/lib/grafana/dashboards
+ foldersFromFilesStructure: false
diff --git a/config/grafana/provisioning/datasources/prometheus.yaml b/config/grafana/provisioning/datasources/prometheus.yaml
new file mode 100644
index 00000000..2e3170ec
--- /dev/null
+++ b/config/grafana/provisioning/datasources/prometheus.yaml
@@ -0,0 +1,14 @@
+# Auto-provision the Prometheus datasource so the bundled dashboard wires up
+# without any clicks. The `uid: DS_PROMETHEUS` matches the templated input in
+# brightstaff_dashboard.json.
+
+apiVersion: 1
+
+datasources:
+ - name: Prometheus
+ uid: DS_PROMETHEUS
+ type: prometheus
+ access: proxy
+ url: http://prometheus:9090
+ isDefault: true
+ editable: true
diff --git a/config/plano_config_schema.yaml b/config/plano_config_schema.yaml
index d3d6a643..9560b437 100644
--- a/config/plano_config_schema.yaml
+++ b/config/plano_config_schema.yaml
@@ -190,8 +190,18 @@ properties:
- openai
- xiaomi
- gemini
+ - chatgpt
+ - digitalocean
+ - vercel
+ - openrouter
+ headers:
+ type: object
+ additionalProperties:
+ type: string
+ description: "Additional headers to send with upstream requests (e.g., ChatGPT-Account-Id, originator)."
routing_preferences:
type: array
+ description: "[DEPRECATED] Inline routing_preferences under a model_provider are auto-migrated to the top-level routing_preferences list by the config generator. New configs should declare routing_preferences at the top level with an explicit models: [...] list. See docs/routing-api.md."
items:
type: object
properties:
@@ -238,8 +248,18 @@ properties:
- openai
- xiaomi
- gemini
+ - chatgpt
+ - digitalocean
+ - vercel
+ - openrouter
+ headers:
+ type: object
+ additionalProperties:
+ type: string
+ description: "Additional headers to send with upstream requests (e.g., ChatGPT-Account-Id, originator)."
routing_preferences:
type: array
+ description: "[DEPRECATED] Inline routing_preferences under an llm_provider are auto-migrated to the top-level routing_preferences list by the config generator. New configs should declare routing_preferences at the top level with an explicit models: [...] list. See docs/routing-api.md."
items:
type: object
properties:
@@ -276,6 +296,9 @@ properties:
type: boolean
use_agent_orchestrator:
type: boolean
+ disable_signals:
+ type: boolean
+ description: "Disable agentic signal analysis (frustration, repetition, escalation, etc.) on LLM responses to save CPU. Default false."
upstream_connect_timeout:
type: string
description: "Connect timeout for upstream provider clusters (e.g., '5s', '10s'). Default is '5s'."
diff --git a/crates/Cargo.lock b/crates/Cargo.lock
index e07b47ee..39261d67 100644
--- a/crates/Cargo.lock
+++ b/crates/Cargo.lock
@@ -23,6 +23,18 @@ version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8fd72866655d1904d6b0997d0b07ba561047d070fbe29de039031c641b61217"
+[[package]]
+name = "ahash"
+version = "0.8.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
+dependencies = [
+ "cfg-if",
+ "once_cell",
+ "version_check",
+ "zerocopy",
+]
+
[[package]]
name = "aho-corasick"
version = "1.1.4"
@@ -257,6 +269,24 @@ dependencies = [
"vsimd",
]
+[[package]]
+name = "bindgen"
+version = "0.72.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895"
+dependencies = [
+ "bitflags",
+ "cexpr",
+ "clang-sys",
+ "itertools 0.13.0",
+ "proc-macro2",
+ "quote",
+ "regex",
+ "rustc-hash 2.1.2",
+ "shlex",
+ "syn 2.0.117",
+]
+
[[package]]
name = "bit-set"
version = "0.5.3"
@@ -316,6 +346,9 @@ dependencies = [
"hyper 1.9.0",
"hyper-util",
"lru",
+ "metrics 0.23.1",
+ "metrics-exporter-prometheus",
+ "metrics-process",
"mockito",
"opentelemetry",
"opentelemetry-http",
@@ -325,6 +358,7 @@ dependencies = [
"pretty_assertions",
"rand 0.9.4",
"redis",
+ "regex",
"reqwest",
"serde",
"serde_json",
@@ -332,6 +366,8 @@ dependencies = [
"serde_yaml",
"strsim",
"thiserror 2.0.18",
+ "tikv-jemalloc-ctl",
+ "tikv-jemallocator",
"time",
"tokio",
"tokio-postgres",
@@ -391,6 +427,15 @@ dependencies = [
"shlex",
]
+[[package]]
+name = "cexpr"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
+dependencies = [
+ "nom",
+]
+
[[package]]
name = "cfg-if"
version = "1.0.4"
@@ -428,6 +473,17 @@ dependencies = [
"windows-link",
]
+[[package]]
+name = "clang-sys"
+version = "1.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
+dependencies = [
+ "glob",
+ "libc",
+ "libloading",
+]
+
[[package]]
name = "cmov"
version = "0.5.3"
@@ -574,6 +630,21 @@ dependencies = [
"cfg-if",
]
+[[package]]
+name = "crossbeam-epoch"
+version = "0.9.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
+
[[package]]
name = "crypto-common"
version = "0.1.7"
@@ -1070,6 +1141,12 @@ dependencies = [
"wasip3",
]
+[[package]]
+name = "glob"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
+
[[package]]
name = "governor"
version = "0.6.3"
@@ -1128,7 +1205,7 @@ version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e91b62f79061a0bc2e046024cb7ba44b08419ed238ecbd9adbd787434b9e8c25"
dependencies = [
- "ahash",
+ "ahash 0.3.8",
"autocfg",
]
@@ -1138,6 +1215,15 @@ version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
+[[package]]
+name = "hashbrown"
+version = "0.14.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
+dependencies = [
+ "ahash 0.8.12",
+]
+
[[package]]
name = "hashbrown"
version = "0.15.5"
@@ -1189,6 +1275,12 @@ dependencies = [
"uuid",
]
+[[package]]
+name = "hermit-abi"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
+
[[package]]
name = "hex"
version = "0.4.3"
@@ -1665,6 +1757,27 @@ version = "0.2.185"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "52ff2c0fe9bc6cb6b14a0592c2ff4fa9ceb83eea9db979b0487cd054946a2b8f"
+[[package]]
+name = "libloading"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
+dependencies = [
+ "cfg-if",
+ "windows-link",
+]
+
+[[package]]
+name = "libproc"
+version = "0.14.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a54ad7278b8bc5301d5ffd2a94251c004feb971feba96c971ea4063645990757"
+dependencies = [
+ "bindgen",
+ "errno",
+ "libc",
+]
+
[[package]]
name = "libredox"
version = "0.1.16"
@@ -1745,6 +1858,12 @@ version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
+[[package]]
+name = "mach2"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dae608c151f68243f2b000364e1f7b186d9c29845f7d2d85bd31b9ad77ad552b"
+
[[package]]
name = "matchers"
version = "0.2.0"
@@ -1782,6 +1901,77 @@ version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
+[[package]]
+name = "metrics"
+version = "0.23.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3045b4193fbdc5b5681f32f11070da9be3609f189a79f3390706d42587f46bb5"
+dependencies = [
+ "ahash 0.8.12",
+ "portable-atomic",
+]
+
+[[package]]
+name = "metrics"
+version = "0.24.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5d5312e9ba3771cfa961b585728215e3d972c950a3eed9252aa093d6301277e8"
+dependencies = [
+ "ahash 0.8.12",
+ "portable-atomic",
+]
+
+[[package]]
+name = "metrics-exporter-prometheus"
+version = "0.15.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b4f0c8427b39666bf970460908b213ec09b3b350f20c0c2eabcbba51704a08e6"
+dependencies = [
+ "base64 0.22.1",
+ "http-body-util",
+ "hyper 1.9.0",
+ "hyper-util",
+ "indexmap 2.14.0",
+ "ipnet",
+ "metrics 0.23.1",
+ "metrics-util",
+ "quanta",
+ "thiserror 1.0.69",
+ "tokio",
+ "tracing",
+]
+
+[[package]]
+name = "metrics-process"
+version = "2.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4268d87f64a752f5a651314fc683f04da10be65701ea3e721ba4d74f79163cac"
+dependencies = [
+ "libc",
+ "libproc",
+ "mach2",
+ "metrics 0.24.3",
+ "once_cell",
+ "procfs",
+ "rlimit",
+ "windows",
+]
+
+[[package]]
+name = "metrics-util"
+version = "0.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4259040465c955f9f2f1a4a8a16dc46726169bca0f88e8fb2dbeced487c3e828"
+dependencies = [
+ "crossbeam-epoch",
+ "crossbeam-utils",
+ "hashbrown 0.14.5",
+ "metrics 0.23.1",
+ "num_cpus",
+ "quanta",
+ "sketches-ddsketch",
+]
+
[[package]]
name = "mime"
version = "0.3.17"
@@ -1935,6 +2125,16 @@ dependencies = [
"autocfg",
]
+[[package]]
+name = "num_cpus"
+version = "1.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b"
+dependencies = [
+ "hermit-abi",
+ "libc",
+]
+
[[package]]
name = "objc2-core-foundation"
version = "0.3.2"
@@ -2125,6 +2325,12 @@ dependencies = [
"windows-link",
]
+[[package]]
+name = "paste"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
+
[[package]]
name = "percent-encoding"
version = "2.3.2"
@@ -2278,6 +2484,27 @@ dependencies = [
"unicode-ident",
]
+[[package]]
+name = "procfs"
+version = "0.18.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "25485360a54d6861439d60facef26de713b1e126bf015ec8f98239467a2b82f7"
+dependencies = [
+ "bitflags",
+ "procfs-core",
+ "rustix",
+]
+
+[[package]]
+name = "procfs-core"
+version = "0.18.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6401bf7b6af22f78b563665d15a22e9aef27775b79b149a66ca022468a4e405"
+dependencies = [
+ "bitflags",
+ "hex",
+]
+
[[package]]
name = "prompt_gateway"
version = "0.1.0"
@@ -2333,6 +2560,21 @@ dependencies = [
"log",
]
+[[package]]
+name = "quanta"
+version = "0.12.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f3ab5a9d756f0d97bdc89019bd2e4ea098cf9cde50ee7564dde6b81ccc8f06c7"
+dependencies = [
+ "crossbeam-utils",
+ "libc",
+ "once_cell",
+ "raw-cpuid",
+ "wasi 0.11.1+wasi-snapshot-preview1",
+ "web-sys",
+ "winapi",
+]
+
[[package]]
name = "quinn"
version = "0.11.9"
@@ -2485,6 +2727,15 @@ version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "63b8176103e19a2643978565ca18b50549f6101881c443590420e4dc998a3c69"
+[[package]]
+name = "raw-cpuid"
+version = "11.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "498cd0dc59d73224351ee52a95fee0f1a617a2eae0e7d9d720cc622c73a54186"
+dependencies = [
+ "bitflags",
+]
+
[[package]]
name = "redis"
version = "0.27.6"
@@ -2646,6 +2897,15 @@ dependencies = [
"windows-sys 0.52.0",
]
+[[package]]
+name = "rlimit"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f35ee2729c56bb610f6dba436bf78135f728b7373bdffae2ec815b2d3eb98cc3"
+dependencies = [
+ "libc",
+]
+
[[package]]
name = "rustc-hash"
version = "1.1.0"
@@ -3098,6 +3358,12 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e"
+[[package]]
+name = "sketches-ddsketch"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85636c14b73d81f541e525f585c0a2109e6744e1565b5c1668e31c70c10ed65c"
+
[[package]]
name = "slab"
version = "0.4.12"
@@ -3308,6 +3574,37 @@ dependencies = [
"rustc-hash 1.1.0",
]
+[[package]]
+name = "tikv-jemalloc-ctl"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "661f1f6a57b3a36dc9174a2c10f19513b4866816e13425d3e418b11cc37bc24c"
+dependencies = [
+ "libc",
+ "paste",
+ "tikv-jemalloc-sys",
+]
+
+[[package]]
+name = "tikv-jemalloc-sys"
+version = "0.6.1+5.3.0-1-ge13ca993e8ccb9ba9847cc330696e02839f328f7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cd8aa5b2ab86a2cefa406d889139c162cbb230092f7d1d7cbc1716405d852a3b"
+dependencies = [
+ "cc",
+ "libc",
+]
+
+[[package]]
+name = "tikv-jemallocator"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0359b4327f954e0567e69fb191cf1436617748813819c94b8cd4a431422d053a"
+dependencies = [
+ "libc",
+ "tikv-jemalloc-sys",
+]
+
[[package]]
name = "time"
version = "0.3.47"
@@ -4003,6 +4300,49 @@ dependencies = [
"web-sys",
]
+[[package]]
+name = "winapi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
+dependencies = [
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
+]
+
+[[package]]
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+
+[[package]]
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
+
+[[package]]
+name = "windows"
+version = "0.62.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "527fadee13e0c05939a6a05d5bd6eec6cd2e3dbd648b9f8e447c6518133d8580"
+dependencies = [
+ "windows-collections",
+ "windows-core",
+ "windows-future",
+ "windows-numerics",
+]
+
+[[package]]
+name = "windows-collections"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "23b2d95af1a8a14a3c7367e1ed4fc9c20e0a26e79551b1454d72583c97cc6610"
+dependencies = [
+ "windows-core",
+]
+
[[package]]
name = "windows-core"
version = "0.62.2"
@@ -4016,6 +4356,17 @@ dependencies = [
"windows-strings",
]
+[[package]]
+name = "windows-future"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e1d6f90251fe18a279739e78025bd6ddc52a7e22f921070ccdc67dde84c605cb"
+dependencies = [
+ "windows-core",
+ "windows-link",
+ "windows-threading",
+]
+
[[package]]
name = "windows-implement"
version = "0.60.2"
@@ -4044,6 +4395,16 @@ version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
+[[package]]
+name = "windows-numerics"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e2e40844ac143cdb44aead537bbf727de9b044e107a0f1220392177d15b0f26"
+dependencies = [
+ "windows-core",
+ "windows-link",
+]
+
[[package]]
name = "windows-registry"
version = "0.6.1"
@@ -4133,6 +4494,15 @@ dependencies = [
"windows_x86_64_msvc 0.53.1",
]
+[[package]]
+name = "windows-threading"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3949bd5b99cafdf1c7ca86b43ca564028dfe27d66958f2470940f73d86d75b37"
+dependencies = [
+ "windows-link",
+]
+
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.6"
diff --git a/crates/brightstaff/Cargo.toml b/crates/brightstaff/Cargo.toml
index f88ed918..d2635963 100644
--- a/crates/brightstaff/Cargo.toml
+++ b/crates/brightstaff/Cargo.toml
@@ -3,6 +3,18 @@ name = "brightstaff"
version = "0.1.0"
edition = "2021"
+[features]
+default = ["jemalloc"]
+jemalloc = ["tikv-jemallocator", "tikv-jemalloc-ctl"]
+
+[[bin]]
+name = "brightstaff"
+path = "src/main.rs"
+
+[[bin]]
+name = "signals_replay"
+path = "src/bin/signals_replay.rs"
+
[dependencies]
async-openai = "0.30.1"
async-trait = "0.1"
@@ -26,7 +38,11 @@ opentelemetry-stdout = "0.31"
opentelemetry_sdk = { version = "0.31", features = ["rt-tokio"] }
pretty_assertions = "1.4.1"
rand = "0.9.2"
+regex = "1.10"
lru = "0.12"
+metrics = "0.23"
+metrics-exporter-prometheus = { version = "0.15", default-features = false, features = ["http-listener"] }
+metrics-process = "2.1"
redis = { version = "0.27", features = ["tokio-comp"] }
reqwest = { version = "0.12.15", features = ["stream"] }
serde = { version = "1.0.219", features = ["derive"] }
@@ -35,6 +51,8 @@ serde_with = "3.13.0"
strsim = "0.11"
serde_yaml = "0.9.34"
thiserror = "2.0.12"
+tikv-jemallocator = { version = "0.6", optional = true }
+tikv-jemalloc-ctl = { version = "0.6", features = ["stats"], optional = true }
tokio = { version = "1.44.2", features = ["full"] }
tokio-postgres = { version = "0.7", features = ["with-serde_json-1"] }
tokio-stream = "0.1"
diff --git a/crates/brightstaff/src/app_state.rs b/crates/brightstaff/src/app_state.rs
index e585d2db..1d534e89 100644
--- a/crates/brightstaff/src/app_state.rs
+++ b/crates/brightstaff/src/app_state.rs
@@ -24,4 +24,7 @@ pub struct AppState {
/// Shared HTTP client for upstream LLM requests (connection pooling / keep-alive).
pub http_client: reqwest::Client,
pub filter_pipeline: Arc,
+ /// When false, agentic signal analysis is skipped on LLM responses to save CPU.
+ /// Controlled by `overrides.disable_signals` in plano config.
+ pub signals_enabled: bool,
}
diff --git a/crates/brightstaff/src/bin/signals_replay.rs b/crates/brightstaff/src/bin/signals_replay.rs
new file mode 100644
index 00000000..41879ac1
--- /dev/null
+++ b/crates/brightstaff/src/bin/signals_replay.rs
@@ -0,0 +1,175 @@
+//! `signals-replay` — batch driver for the `brightstaff` signal analyzer.
+//!
+//! Reads JSONL conversations from stdin (one per line) and emits matching
+//! JSONL reports on stdout, one per input conversation, in the same order.
+//!
+//! Input shape (per line):
+//! ```json
+//! {"id": "convo-42", "messages": [{"from": "human", "value": "..."}, ...]}
+//! ```
+//!
+//! Output shape (per line, success):
+//! ```json
+//! {"id": "convo-42", "report": { ...python-compatible SignalReport dict... }}
+//! ```
+//!
+//! On per-line failure (parse / analyzer error), emits:
+//! ```json
+//! {"id": "convo-42", "error": "..."}
+//! ```
+//!
+//! The output report dict is shaped to match the Python reference's
+//! `SignalReport.to_dict()` byte-for-byte so the parity comparator can do a
+//! direct structural diff.
+
+use std::io::{self, BufRead, BufWriter, Write};
+
+use serde::Deserialize;
+use serde_json::{json, Map, Value};
+
+use brightstaff::signals::{SignalAnalyzer, SignalGroup, SignalReport};
+
+#[derive(Debug, Deserialize)]
+struct InputLine {
+ id: Value,
+ messages: Vec,
+}
+
+#[derive(Debug, Deserialize)]
+struct MessageRow {
+ #[serde(default)]
+ from: String,
+ #[serde(default)]
+ value: String,
+}
+
+fn main() {
+ let stdin = io::stdin();
+ let stdout = io::stdout();
+ let mut out = BufWriter::new(stdout.lock());
+ let analyzer = SignalAnalyzer::default();
+
+ for line in stdin.lock().lines() {
+ let line = match line {
+ Ok(l) => l,
+ Err(e) => {
+ eprintln!("read error: {e}");
+ std::process::exit(1);
+ }
+ };
+ let trimmed = line.trim();
+ if trimmed.is_empty() {
+ continue;
+ }
+ let result = process_line(&analyzer, trimmed);
+ // Always emit one line per input line so id ordering stays aligned.
+ if let Err(e) = writeln!(out, "{result}") {
+ eprintln!("write error: {e}");
+ std::process::exit(1);
+ }
+ // Flush periodically isn't strictly needed — BufWriter handles it,
+ // and the parent process reads the whole stream when we're done.
+ }
+ let _ = out.flush();
+}
+
+fn process_line(analyzer: &SignalAnalyzer, line: &str) -> Value {
+ let parsed: InputLine = match serde_json::from_str(line) {
+ Ok(p) => p,
+ Err(e) => {
+ return json!({
+ "id": Value::Null,
+ "error": format!("input parse: {e}"),
+ });
+ }
+ };
+
+ let id = parsed.id.clone();
+
+ let view: Vec> = parsed
+ .messages
+ .iter()
+ .map(|m| brightstaff::signals::analyzer::ShareGptMessage {
+ from: m.from.as_str(),
+ value: m.value.as_str(),
+ })
+ .collect();
+
+ let report = analyzer.analyze_sharegpt(&view);
+ let report_dict = report_to_python_dict(&report);
+ json!({
+ "id": id,
+ "report": report_dict,
+ })
+}
+
+/// Convert a `SignalReport` into the Python reference's `to_dict()` shape.
+///
+/// Ordering of category keys in each layer dict follows the Python source
+/// exactly so even string-equality comparisons behave deterministically.
+fn report_to_python_dict(r: &SignalReport) -> Value {
+ let mut interaction = Map::new();
+ interaction.insert(
+ "misalignment".to_string(),
+ signal_group_to_python(&r.interaction.misalignment),
+ );
+ interaction.insert(
+ "stagnation".to_string(),
+ signal_group_to_python(&r.interaction.stagnation),
+ );
+ interaction.insert(
+ "disengagement".to_string(),
+ signal_group_to_python(&r.interaction.disengagement),
+ );
+ interaction.insert(
+ "satisfaction".to_string(),
+ signal_group_to_python(&r.interaction.satisfaction),
+ );
+
+ let mut execution = Map::new();
+ execution.insert(
+ "failure".to_string(),
+ signal_group_to_python(&r.execution.failure),
+ );
+ execution.insert(
+ "loops".to_string(),
+ signal_group_to_python(&r.execution.loops),
+ );
+
+ let mut environment = Map::new();
+ environment.insert(
+ "exhaustion".to_string(),
+ signal_group_to_python(&r.environment.exhaustion),
+ );
+
+ json!({
+ "interaction_signals": Value::Object(interaction),
+ "execution_signals": Value::Object(execution),
+ "environment_signals": Value::Object(environment),
+ "overall_quality": r.overall_quality.as_str(),
+ "summary": r.summary,
+ })
+}
+
+fn signal_group_to_python(g: &SignalGroup) -> Value {
+ let signals: Vec = g
+ .signals
+ .iter()
+ .map(|s| {
+ json!({
+ "signal_type": s.signal_type.as_str(),
+ "message_index": s.message_index,
+ "snippet": s.snippet,
+ "confidence": s.confidence,
+ "metadata": s.metadata,
+ })
+ })
+ .collect();
+
+ json!({
+ "category": g.category,
+ "count": g.count,
+ "severity": g.severity,
+ "signals": signals,
+ })
+}
diff --git a/crates/brightstaff/src/handlers/debug.rs b/crates/brightstaff/src/handlers/debug.rs
new file mode 100644
index 00000000..58fbecd2
--- /dev/null
+++ b/crates/brightstaff/src/handlers/debug.rs
@@ -0,0 +1,53 @@
+use bytes::Bytes;
+use http_body_util::combinators::BoxBody;
+use hyper::{Response, StatusCode};
+
+use super::full;
+
+#[derive(serde::Serialize)]
+struct MemStats {
+ allocated_bytes: usize,
+ resident_bytes: usize,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ error: Option,
+}
+
+/// Returns jemalloc memory statistics as JSON.
+/// Falls back to a stub when the jemalloc feature is disabled.
+pub async fn memstats() -> Result>, hyper::Error> {
+ let stats = get_jemalloc_stats();
+ let json = serde_json::to_string(&stats).unwrap();
+ Ok(Response::builder()
+ .status(StatusCode::OK)
+ .header("Content-Type", "application/json")
+ .body(full(json))
+ .unwrap())
+}
+
+#[cfg(feature = "jemalloc")]
+fn get_jemalloc_stats() -> MemStats {
+ use tikv_jemalloc_ctl::{epoch, stats};
+
+ if let Err(e) = epoch::advance() {
+ return MemStats {
+ allocated_bytes: 0,
+ resident_bytes: 0,
+ error: Some(format!("failed to advance jemalloc epoch: {e}")),
+ };
+ }
+
+ MemStats {
+ allocated_bytes: stats::allocated::read().unwrap_or(0),
+ resident_bytes: stats::resident::read().unwrap_or(0),
+ error: None,
+ }
+}
+
+#[cfg(not(feature = "jemalloc"))]
+fn get_jemalloc_stats() -> MemStats {
+ MemStats {
+ allocated_bytes: 0,
+ resident_bytes: 0,
+ error: Some("jemalloc feature not enabled".to_string()),
+ }
+}
diff --git a/crates/brightstaff/src/handlers/function_calling.rs b/crates/brightstaff/src/handlers/function_calling.rs
index ca4def32..3e2543bc 100644
--- a/crates/brightstaff/src/handlers/function_calling.rs
+++ b/crates/brightstaff/src/handlers/function_calling.rs
@@ -441,10 +441,8 @@ impl ArchFunctionHandler {
}
}
// Handle str/string conversions
- "str" | "string" => {
- if !value.is_string() {
- return Ok(json!(value.to_string()));
- }
+ "str" | "string" if !value.is_string() => {
+ return Ok(json!(value.to_string()));
}
_ => {}
}
diff --git a/crates/brightstaff/src/handlers/llm/mod.rs b/crates/brightstaff/src/handlers/llm/mod.rs
index 8f00e4b6..3336209f 100644
--- a/crates/brightstaff/src/handlers/llm/mod.rs
+++ b/crates/brightstaff/src/handlers/llm/mod.rs
@@ -24,16 +24,18 @@ use crate::app_state::AppState;
use crate::handlers::agents::pipeline::PipelineProcessor;
use crate::handlers::extract_request_id;
use crate::handlers::full;
+use crate::metrics as bs_metrics;
use crate::state::response_state_processor::ResponsesStateProcessor;
use crate::state::{
extract_input_items, retrieve_and_combine_input, StateStorage, StateStorageError,
};
use crate::streaming::{
create_streaming_response, create_streaming_response_with_output_filter, truncate_message,
- ObservableStreamProcessor, StreamProcessor,
+ LlmMetricsCtx, ObservableStreamProcessor, StreamProcessor,
};
use crate::tracing::{
- collect_custom_trace_attributes, llm as tracing_llm, operation_component, set_service_name,
+ collect_custom_trace_attributes, llm as tracing_llm, operation_component,
+ plano as tracing_plano, set_service_name,
};
use model_selection::router_chat_get_upstream_model;
@@ -102,15 +104,36 @@ async fn llm_chat_inner(
.and_then(|hdr| request_headers.get(hdr))
.and_then(|v| v.to_str().ok())
.map(|s| s.to_string());
- let pinned_model: Option = if let Some(ref sid) = session_id {
+ let cached_route = if let Some(ref sid) = session_id {
state
.orchestrator_service
.get_cached_route(sid, tenant_id.as_deref())
.await
- .map(|c| c.model_name)
} else {
None
};
+ let (pinned_model, pinned_route_name): (Option, Option) = match cached_route {
+ Some(c) => (Some(c.model_name), c.route_name),
+ None => (None, None),
+ };
+
+ // Record session id on the LLM span for the observability console.
+ if let Some(ref sid) = session_id {
+ get_active_span(|span| {
+ span.set_attribute(opentelemetry::KeyValue::new(
+ tracing_plano::SESSION_ID,
+ sid.clone(),
+ ));
+ });
+ }
+ if let Some(ref route_name) = pinned_route_name {
+ get_active_span(|span| {
+ span.set_attribute(opentelemetry::KeyValue::new(
+ tracing_plano::ROUTE_NAME,
+ route_name.clone(),
+ ));
+ });
+ }
let full_qualified_llm_provider_url = format!("{}{}", state.llm_provider_url, request_path);
@@ -120,6 +143,7 @@ async fn llm_chat_inner(
&request_path,
&state.model_aliases,
&state.llm_providers,
+ state.signals_enabled,
)
.await
{
@@ -231,7 +255,15 @@ async fn llm_chat_inner(
if let Some(ref client_api_kind) = client_api {
let upstream_api =
provider_id.compatible_api_for_client(client_api_kind, is_streaming_request);
- client_request.normalize_for_upstream(provider_id, &upstream_api);
+ if let Err(e) = client_request.normalize_for_upstream(provider_id, &upstream_api) {
+ warn!(
+ "request_id={}: normalize_for_upstream failed: {}",
+ request_id, e
+ );
+ let mut bad_request = Response::new(full(e.message));
+ *bad_request.status_mut() = StatusCode::BAD_REQUEST;
+ return Ok(bad_request);
+ }
}
// --- Phase 2: Resolve conversation state (v1/responses API) ---
@@ -311,6 +343,18 @@ async fn llm_chat_inner(
alias_resolved_model.clone()
};
+ // Record route name on the LLM span (only when the orchestrator produced one).
+ if let Some(ref rn) = route_name {
+ if !rn.is_empty() && rn != "none" {
+ get_active_span(|span| {
+ span.set_attribute(opentelemetry::KeyValue::new(
+ tracing_plano::ROUTE_NAME,
+ rn.clone(),
+ ));
+ });
+ }
+ }
+
if let Some(ref sid) = session_id {
state
.orchestrator_service
@@ -373,6 +417,7 @@ async fn parse_and_validate_request(
request_path: &str,
model_aliases: &Option>,
llm_providers: &Arc>,
+ signals_enabled: bool,
) -> Result>> {
let raw_bytes = request
.collect()
@@ -451,7 +496,11 @@ async fn parse_and_validate_request(
let user_message_preview = client_request
.get_recent_user_message()
.map(|msg| truncate_message(&msg, 50));
- let messages_for_signals = Some(client_request.get_messages());
+ let messages_for_signals = if signals_enabled {
+ Some(client_request.get_messages())
+ } else {
+ None
+ };
// Set the upstream model name and strip routing metadata
client_request.set_model(model_name_only.clone());
@@ -652,6 +701,13 @@ async fn send_upstream(
let request_start_time = std::time::Instant::now();
+ // Labels for LLM upstream metrics. We prefer `resolved_model` (post-routing)
+ // and derive the provider from its `provider/model` prefix. This matches the
+ // same model id the cost/latency router keys off.
+ let (metric_provider_raw, metric_model_raw) = bs_metrics::split_provider_model(resolved_model);
+ let metric_provider = metric_provider_raw.to_string();
+ let metric_model = metric_model_raw.to_string();
+
let llm_response = match http_client
.post(upstream_url)
.headers(request_headers.clone())
@@ -661,6 +717,14 @@ async fn send_upstream(
{
Ok(res) => res,
Err(err) => {
+ let err_class = bs_metrics::llm_error_class_from_reqwest(&err);
+ bs_metrics::record_llm_upstream(
+ &metric_provider,
+ &metric_model,
+ 0,
+ err_class,
+ request_start_time.elapsed(),
+ );
let err_msg = format!("Failed to send request: {}", err);
let mut internal_error = Response::new(full(err_msg));
*internal_error.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
@@ -671,6 +735,36 @@ async fn send_upstream(
// Propagate upstream headers and status
let response_headers = llm_response.headers().clone();
let upstream_status = llm_response.status();
+
+ // Upstream routers (e.g. DigitalOcean Gradient) may return an
+ // `x-model-router-selected-route` header indicating which task-level
+ // route the request was classified into (e.g. "Code Generation"). Surface
+ // it as `plano.route.name` so the obs console's Route hit % panel can
+ // show the breakdown even when Plano's own orchestrator wasn't in the
+ // routing path. Any value from Plano's orchestrator already set earlier
+ // takes precedence — this only fires when the span doesn't already have
+ // a route name.
+ if let Some(upstream_route) = response_headers
+ .get("x-model-router-selected-route")
+ .and_then(|v| v.to_str().ok())
+ {
+ if !upstream_route.is_empty() {
+ get_active_span(|span| {
+ span.set_attribute(opentelemetry::KeyValue::new(
+ crate::tracing::plano::ROUTE_NAME,
+ upstream_route.to_string(),
+ ));
+ });
+ }
+ }
+ // Record the upstream HTTP status on the span for the obs console.
+ get_active_span(|span| {
+ span.set_attribute(opentelemetry::KeyValue::new(
+ crate::tracing::http::STATUS_CODE,
+ upstream_status.as_u16() as i64,
+ ));
+ });
+
let mut response = Response::builder().status(upstream_status);
if let Some(headers) = response.headers_mut() {
for (name, value) in response_headers.iter() {
@@ -686,7 +780,12 @@ async fn send_upstream(
span_name,
request_start_time,
messages_for_signals,
- );
+ )
+ .with_llm_metrics(LlmMetricsCtx {
+ provider: metric_provider.clone(),
+ model: metric_model.clone(),
+ upstream_status: upstream_status.as_u16(),
+ });
let output_filter_request_headers = if filter_pipeline.has_output_filters() {
Some(request_headers.clone())
diff --git a/crates/brightstaff/src/handlers/llm/model_selection.rs b/crates/brightstaff/src/handlers/llm/model_selection.rs
index 1b4315e7..a1378d86 100644
--- a/crates/brightstaff/src/handlers/llm/model_selection.rs
+++ b/crates/brightstaff/src/handlers/llm/model_selection.rs
@@ -5,10 +5,24 @@ use hyper::StatusCode;
use std::sync::Arc;
use tracing::{debug, info, warn};
+use crate::metrics as bs_metrics;
+use crate::metrics::labels as metric_labels;
use crate::router::orchestrator::OrchestratorService;
use crate::streaming::truncate_message;
use crate::tracing::routing;
+/// Classify a request path (already stripped of `/agents` or `/routing` by
+/// the caller) into the fixed `route` label used on routing metrics.
+fn route_label_for_path(request_path: &str) -> &'static str {
+ if request_path.starts_with("/agents") {
+ metric_labels::ROUTE_AGENT
+ } else if request_path.starts_with("/routing") {
+ metric_labels::ROUTE_ROUTING
+ } else {
+ metric_labels::ROUTE_LLM
+ }
+}
+
pub struct RoutingResult {
/// Primary model to use (first in the ranked list).
pub model_name: String,
@@ -106,15 +120,23 @@ pub async fn router_chat_get_upstream_model(
)
.await;
- let determination_ms = routing_start_time.elapsed().as_millis() as i64;
+ let determination_elapsed = routing_start_time.elapsed();
+ let determination_ms = determination_elapsed.as_millis() as i64;
let current_span = tracing::Span::current();
current_span.record(routing::ROUTE_DETERMINATION_MS, determination_ms);
+ let route_label = route_label_for_path(request_path);
match routing_result {
Ok(route) => match route {
Some((route_name, ranked_models)) => {
let model_name = ranked_models.first().cloned().unwrap_or_default();
current_span.record("route.selected_model", model_name.as_str());
+ bs_metrics::record_router_decision(
+ route_label,
+ &model_name,
+ false,
+ determination_elapsed,
+ );
Ok(RoutingResult {
model_name,
models: ranked_models,
@@ -126,6 +148,12 @@ pub async fn router_chat_get_upstream_model(
// This signals to llm.rs to use the original validated request model
current_span.record("route.selected_model", "none");
info!("no route determined, using default model");
+ bs_metrics::record_router_decision(
+ route_label,
+ "none",
+ true,
+ determination_elapsed,
+ );
Ok(RoutingResult {
model_name: "none".to_string(),
@@ -136,6 +164,7 @@ pub async fn router_chat_get_upstream_model(
},
Err(err) => {
current_span.record("route.selected_model", "unknown");
+ bs_metrics::record_router_decision(route_label, "unknown", true, determination_elapsed);
Err(RoutingError::internal_error(format!(
"Failed to determine route: {}",
err
diff --git a/crates/brightstaff/src/handlers/mod.rs b/crates/brightstaff/src/handlers/mod.rs
index 485a0438..4e851264 100644
--- a/crates/brightstaff/src/handlers/mod.rs
+++ b/crates/brightstaff/src/handlers/mod.rs
@@ -1,4 +1,5 @@
pub mod agents;
+pub mod debug;
pub mod function_calling;
pub mod llm;
pub mod models;
diff --git a/crates/brightstaff/src/handlers/routing_service.rs b/crates/brightstaff/src/handlers/routing_service.rs
index 5fc0d3b9..b93b1422 100644
--- a/crates/brightstaff/src/handlers/routing_service.rs
+++ b/crates/brightstaff/src/handlers/routing_service.rs
@@ -12,6 +12,8 @@ use tracing::{debug, info, info_span, warn, Instrument};
use super::extract_or_generate_traceparent;
use crate::handlers::llm::model_selection::router_chat_get_upstream_model;
+use crate::metrics as bs_metrics;
+use crate::metrics::labels as metric_labels;
use crate::router::orchestrator::OrchestratorService;
use crate::tracing::{collect_custom_trace_attributes, operation_component, set_service_name};
@@ -230,6 +232,17 @@ async fn routing_decision_inner(
pinned: false,
};
+ // Distinguish "decision served" (a concrete model picked) from
+ // "no_candidates" (the sentinel "none" returned when nothing
+ // matched). The handler still responds 200 in both cases, so RED
+ // metrics alone can't tell them apart.
+ let outcome = if response.models.first().map(|m| m == "none").unwrap_or(true) {
+ metric_labels::ROUTING_SVC_NO_CANDIDATES
+ } else {
+ metric_labels::ROUTING_SVC_DECISION_SERVED
+ };
+ bs_metrics::record_routing_service_outcome(outcome);
+
info!(
primary_model = %response.models.first().map(|s| s.as_str()).unwrap_or("none"),
total_models = response.models.len(),
@@ -249,6 +262,7 @@ async fn routing_decision_inner(
.unwrap())
}
Err(err) => {
+ bs_metrics::record_routing_service_outcome(metric_labels::ROUTING_SVC_POLICY_ERROR);
warn!(error = %err.message, "routing decision failed");
Ok(BrightStaffError::InternalServerError(err.message).into_response())
}
diff --git a/crates/brightstaff/src/lib.rs b/crates/brightstaff/src/lib.rs
index a0ba5f43..66c6eadf 100644
--- a/crates/brightstaff/src/lib.rs
+++ b/crates/brightstaff/src/lib.rs
@@ -1,5 +1,6 @@
pub mod app_state;
pub mod handlers;
+pub mod metrics;
pub mod router;
pub mod session_cache;
pub mod signals;
diff --git a/crates/brightstaff/src/main.rs b/crates/brightstaff/src/main.rs
index 40ac429d..b1e17e42 100644
--- a/crates/brightstaff/src/main.rs
+++ b/crates/brightstaff/src/main.rs
@@ -1,10 +1,17 @@
+#[cfg(feature = "jemalloc")]
+#[global_allocator]
+static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
+
use brightstaff::app_state::AppState;
use brightstaff::handlers::agents::orchestrator::agent_chat;
+use brightstaff::handlers::debug;
use brightstaff::handlers::empty;
use brightstaff::handlers::function_calling::function_calling_chat_handler;
use brightstaff::handlers::llm::llm_chat;
use brightstaff::handlers::models::list_models;
use brightstaff::handlers::routing_service::routing_decision;
+use brightstaff::metrics as bs_metrics;
+use brightstaff::metrics::labels as metric_labels;
use brightstaff::router::model_metrics::ModelMetricsService;
use brightstaff::router::orchestrator::OrchestratorService;
use brightstaff::session_cache::init_session_cache;
@@ -326,6 +333,8 @@ async fn init_app_state(
.as_ref()
.and_then(|tracing| tracing.span_attributes.clone());
+ let signals_enabled = !overrides.disable_signals.unwrap_or(false);
+
Ok(AppState {
orchestrator_service,
model_aliases: config.model_aliases.clone(),
@@ -337,6 +346,7 @@ async fn init_app_state(
span_attributes,
http_client: reqwest::Client::new(),
filter_pipeline,
+ signals_enabled,
})
}
@@ -384,10 +394,79 @@ async fn init_state_storage(
// Request routing
// ---------------------------------------------------------------------------
+/// Normalized method label — limited set so we never emit a free-form string.
+fn method_label(method: &Method) -> &'static str {
+ match *method {
+ Method::GET => "GET",
+ Method::POST => "POST",
+ Method::PUT => "PUT",
+ Method::DELETE => "DELETE",
+ Method::PATCH => "PATCH",
+ Method::HEAD => "HEAD",
+ Method::OPTIONS => "OPTIONS",
+ _ => "OTHER",
+ }
+}
+
+/// Compute the fixed `handler` metric label from the request's path+method.
+/// Returning `None` for fall-through means `route()` will hand the request to
+/// the catch-all 404 branch.
+fn handler_label_for(method: &Method, path: &str) -> &'static str {
+ if let Some(stripped) = path.strip_prefix("/agents") {
+ if matches!(
+ stripped,
+ CHAT_COMPLETIONS_PATH | MESSAGES_PATH | OPENAI_RESPONSES_API_PATH
+ ) {
+ return metric_labels::HANDLER_AGENT_CHAT;
+ }
+ }
+ if let Some(stripped) = path.strip_prefix("/routing") {
+ if matches!(
+ stripped,
+ CHAT_COMPLETIONS_PATH | MESSAGES_PATH | OPENAI_RESPONSES_API_PATH
+ ) {
+ return metric_labels::HANDLER_ROUTING_DECISION;
+ }
+ }
+ match (method, path) {
+ (&Method::POST, CHAT_COMPLETIONS_PATH | MESSAGES_PATH | OPENAI_RESPONSES_API_PATH) => {
+ metric_labels::HANDLER_LLM_CHAT
+ }
+ (&Method::POST, "/function_calling") => metric_labels::HANDLER_FUNCTION_CALLING,
+ (&Method::GET, "/v1/models" | "/agents/v1/models") => metric_labels::HANDLER_LIST_MODELS,
+ (&Method::OPTIONS, "/v1/models" | "/agents/v1/models") => {
+ metric_labels::HANDLER_CORS_PREFLIGHT
+ }
+ _ => metric_labels::HANDLER_NOT_FOUND,
+ }
+}
+
/// Route an incoming HTTP request to the appropriate handler.
async fn route(
req: Request,
state: Arc,
+) -> Result>, hyper::Error> {
+ let handler = handler_label_for(req.method(), req.uri().path());
+ let method = method_label(req.method());
+ let started = std::time::Instant::now();
+ let _in_flight = bs_metrics::InFlightGuard::new(handler);
+
+ let result = dispatch(req, state).await;
+
+ let status = match &result {
+ Ok(resp) => resp.status().as_u16(),
+ // hyper::Error here means the body couldn't be produced; conventionally 500.
+ Err(_) => 500,
+ };
+ bs_metrics::record_http(handler, method, status, started);
+ result
+}
+
+/// Inner dispatcher split out so `route()` can wrap it with metrics without
+/// duplicating the match tree.
+async fn dispatch(
+ req: Request,
+ state: Arc,
) -> Result>, hyper::Error> {
let parent_cx = global::get_text_map_propagator(|p| p.extract(&HeaderExtractor(req.headers())));
let path = req.uri().path().to_string();
@@ -439,6 +518,7 @@ async fn route(
Ok(list_models(Arc::clone(&state.llm_providers)).await)
}
(&Method::OPTIONS, "/v1/models" | "/agents/v1/models") => cors_preflight(),
+ (&Method::GET, "/debug/memstats") => debug::memstats().await,
_ => {
debug!(method = %req.method(), path = %path, "no route found");
let mut not_found = Response::new(empty());
@@ -503,6 +583,7 @@ async fn run_server(state: Arc) -> Result<(), Box Result<(), Box> {
let config = load_config()?;
let _tracer_provider = init_tracer(config.tracing.as_ref());
+ bs_metrics::init();
info!("loaded plano_config.yaml");
let state = Arc::new(init_app_state(&config).await?);
run_server(state).await
diff --git a/crates/brightstaff/src/metrics/labels.rs b/crates/brightstaff/src/metrics/labels.rs
new file mode 100644
index 00000000..4eaf3e59
--- /dev/null
+++ b/crates/brightstaff/src/metrics/labels.rs
@@ -0,0 +1,38 @@
+//! Fixed label-value constants so callers never emit free-form strings
+//! (which would blow up cardinality).
+
+// Handler enum — derived from the path+method match in `route()`.
+pub const HANDLER_AGENT_CHAT: &str = "agent_chat";
+pub const HANDLER_ROUTING_DECISION: &str = "routing_decision";
+pub const HANDLER_LLM_CHAT: &str = "llm_chat";
+pub const HANDLER_FUNCTION_CALLING: &str = "function_calling";
+pub const HANDLER_LIST_MODELS: &str = "list_models";
+pub const HANDLER_CORS_PREFLIGHT: &str = "cors_preflight";
+pub const HANDLER_NOT_FOUND: &str = "not_found";
+
+// Router "route" class — which brightstaff endpoint prompted the decision.
+pub const ROUTE_AGENT: &str = "agent";
+pub const ROUTE_ROUTING: &str = "routing";
+pub const ROUTE_LLM: &str = "llm";
+
+// Token kind for brightstaff_llm_tokens_total.
+pub const TOKEN_KIND_PROMPT: &str = "prompt";
+pub const TOKEN_KIND_COMPLETION: &str = "completion";
+
+// LLM error_class values (match docstring in metrics/mod.rs).
+pub const LLM_ERR_NONE: &str = "none";
+pub const LLM_ERR_TIMEOUT: &str = "timeout";
+pub const LLM_ERR_CONNECT: &str = "connect";
+pub const LLM_ERR_PARSE: &str = "parse";
+pub const LLM_ERR_OTHER: &str = "other";
+pub const LLM_ERR_STREAM: &str = "stream";
+
+// Routing service outcome values.
+pub const ROUTING_SVC_DECISION_SERVED: &str = "decision_served";
+pub const ROUTING_SVC_NO_CANDIDATES: &str = "no_candidates";
+pub const ROUTING_SVC_POLICY_ERROR: &str = "policy_error";
+
+// Session cache outcome values.
+pub const SESSION_CACHE_HIT: &str = "hit";
+pub const SESSION_CACHE_MISS: &str = "miss";
+pub const SESSION_CACHE_STORE: &str = "store";
diff --git a/crates/brightstaff/src/metrics/mod.rs b/crates/brightstaff/src/metrics/mod.rs
new file mode 100644
index 00000000..34679cca
--- /dev/null
+++ b/crates/brightstaff/src/metrics/mod.rs
@@ -0,0 +1,377 @@
+//! Prometheus metrics for brightstaff.
+//!
+//! Installs the `metrics` global recorder backed by
+//! `metrics-exporter-prometheus` and exposes a `/metrics` HTTP endpoint on a
+//! dedicated admin port (default `0.0.0.0:9092`, overridable via
+//! `METRICS_BIND_ADDRESS`).
+//!
+//! Emitted metric families (see `describe_all` for full list):
+//! - HTTP RED: `brightstaff_http_requests_total`,
+//! `brightstaff_http_request_duration_seconds`,
+//! `brightstaff_http_in_flight_requests`.
+//! - LLM upstream: `brightstaff_llm_upstream_requests_total`,
+//! `brightstaff_llm_upstream_duration_seconds`,
+//! `brightstaff_llm_time_to_first_token_seconds`,
+//! `brightstaff_llm_tokens_total`,
+//! `brightstaff_llm_tokens_usage_missing_total`.
+//! - Routing: `brightstaff_router_decisions_total`,
+//! `brightstaff_router_decision_duration_seconds`,
+//! `brightstaff_routing_service_requests_total`,
+//! `brightstaff_session_cache_events_total`.
+//! - Process: via `metrics-process`.
+//! - Build: `brightstaff_build_info`.
+
+use std::net::SocketAddr;
+use std::sync::OnceLock;
+use std::time::{Duration, Instant};
+
+use metrics::{counter, describe_counter, describe_gauge, describe_histogram, gauge, histogram};
+use metrics_exporter_prometheus::{Matcher, PrometheusBuilder};
+use tracing::{info, warn};
+
+pub mod labels;
+
+/// Guard flag so tests don't re-install the global recorder.
+static INIT: OnceLock<()> = OnceLock::new();
+
+const DEFAULT_METRICS_BIND: &str = "0.0.0.0:9092";
+
+/// HTTP request duration buckets (seconds). Capped at 60s.
+const HTTP_BUCKETS: &[f64] = &[
+ 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0, 60.0,
+];
+
+/// LLM upstream / TTFT buckets (seconds). Capped at 120s because provider
+/// completions routinely run that long.
+const LLM_BUCKETS: &[f64] = &[0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0, 60.0, 120.0];
+
+/// Router decision buckets (seconds). The orchestrator call itself is usually
+/// sub-second but bucketed generously in case of upstream slowness.
+const ROUTER_BUCKETS: &[f64] = &[
+ 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0,
+];
+
+/// Install the global recorder and spawn the `/metrics` HTTP listener.
+///
+/// Safe to call more than once; subsequent calls are no-ops so tests that
+/// construct their own recorder still work.
+pub fn init() {
+ if INIT.get().is_some() {
+ return;
+ }
+
+ let bind: SocketAddr = std::env::var("METRICS_BIND_ADDRESS")
+ .unwrap_or_else(|_| DEFAULT_METRICS_BIND.to_string())
+ .parse()
+ .unwrap_or_else(|err| {
+ warn!(error = %err, default = DEFAULT_METRICS_BIND, "invalid METRICS_BIND_ADDRESS, falling back to default");
+ DEFAULT_METRICS_BIND.parse().expect("default bind parses")
+ });
+
+ let builder = PrometheusBuilder::new()
+ .with_http_listener(bind)
+ .set_buckets_for_metric(
+ Matcher::Full("brightstaff_http_request_duration_seconds".to_string()),
+ HTTP_BUCKETS,
+ )
+ .and_then(|b| {
+ b.set_buckets_for_metric(Matcher::Prefix("brightstaff_llm_".to_string()), LLM_BUCKETS)
+ })
+ .and_then(|b| {
+ b.set_buckets_for_metric(
+ Matcher::Full("brightstaff_router_decision_duration_seconds".to_string()),
+ ROUTER_BUCKETS,
+ )
+ });
+
+ let builder = match builder {
+ Ok(b) => b,
+ Err(err) => {
+ warn!(error = %err, "failed to configure metrics buckets, using defaults");
+ PrometheusBuilder::new().with_http_listener(bind)
+ }
+ };
+
+ if let Err(err) = builder.install() {
+ warn!(error = %err, "failed to install Prometheus recorder; metrics disabled");
+ return;
+ }
+
+ let _ = INIT.set(());
+
+ describe_all();
+ emit_build_info();
+
+ // Register process-level collector (RSS, CPU, FDs).
+ let collector = metrics_process::Collector::default();
+ collector.describe();
+ // Prime once at startup; subsequent scrapes refresh via the exporter's
+ // per-scrape render, so we additionally refresh on a short interval to
+ // keep gauges moving between scrapes without requiring client pull.
+ collector.collect();
+ tokio::spawn(async move {
+ let mut tick = tokio::time::interval(Duration::from_secs(10));
+ tick.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
+ loop {
+ tick.tick().await;
+ collector.collect();
+ }
+ });
+
+ info!(address = %bind, "metrics listener started");
+}
+
+fn describe_all() {
+ describe_counter!(
+ "brightstaff_http_requests_total",
+ "Total HTTP requests served by brightstaff, by handler and status class."
+ );
+ describe_histogram!(
+ "brightstaff_http_request_duration_seconds",
+ "Wall-clock duration of HTTP requests served by brightstaff, by handler."
+ );
+ describe_gauge!(
+ "brightstaff_http_in_flight_requests",
+ "Number of HTTP requests currently being served by brightstaff, by handler."
+ );
+
+ describe_counter!(
+ "brightstaff_llm_upstream_requests_total",
+ "LLM upstream request outcomes, by provider, model, status class and error class."
+ );
+ describe_histogram!(
+ "brightstaff_llm_upstream_duration_seconds",
+ "Wall-clock duration of LLM upstream calls (stream close for streaming), by provider and model."
+ );
+ describe_histogram!(
+ "brightstaff_llm_time_to_first_token_seconds",
+ "Time from request start to first streamed byte, by provider and model (streaming only)."
+ );
+ describe_counter!(
+ "brightstaff_llm_tokens_total",
+ "Tokens reported in the provider `usage` field, by provider, model and kind (prompt/completion)."
+ );
+ describe_counter!(
+ "brightstaff_llm_tokens_usage_missing_total",
+ "LLM responses that completed without a usable `usage` block (so token counts are unknown)."
+ );
+
+ describe_counter!(
+ "brightstaff_router_decisions_total",
+ "Routing decisions made by the orchestrator, by route, selected model, and whether a fallback was used."
+ );
+ describe_histogram!(
+ "brightstaff_router_decision_duration_seconds",
+ "Time spent in the orchestrator deciding a route, by route."
+ );
+ describe_counter!(
+ "brightstaff_routing_service_requests_total",
+ "Outcomes of /routing/* decision requests: decision_served, no_candidates, policy_error."
+ );
+ describe_counter!(
+ "brightstaff_session_cache_events_total",
+ "Session affinity cache lookups and stores, by outcome."
+ );
+
+ describe_gauge!(
+ "brightstaff_build_info",
+ "Build metadata. Always 1; labels carry version and git SHA."
+ );
+}
+
+fn emit_build_info() {
+ let version = env!("CARGO_PKG_VERSION");
+ let git_sha = option_env!("GIT_SHA").unwrap_or("unknown");
+ gauge!(
+ "brightstaff_build_info",
+ "version" => version.to_string(),
+ "git_sha" => git_sha.to_string(),
+ )
+ .set(1.0);
+}
+
+/// Split a provider-qualified model id like `"openai/gpt-4o"` into
+/// `(provider, model)`. Returns `("unknown", raw)` when there is no `/`.
+pub fn split_provider_model(full: &str) -> (&str, &str) {
+ match full.split_once('/') {
+ Some((p, m)) => (p, m),
+ None => ("unknown", full),
+ }
+}
+
+/// Bucket an HTTP status code into `"2xx"` / `"4xx"` / `"5xx"` / `"1xx"` / `"3xx"`.
+pub fn status_class(status: u16) -> &'static str {
+ match status {
+ 100..=199 => "1xx",
+ 200..=299 => "2xx",
+ 300..=399 => "3xx",
+ 400..=499 => "4xx",
+ 500..=599 => "5xx",
+ _ => "other",
+ }
+}
+
+// ---------------------------------------------------------------------------
+// HTTP RED helpers
+// ---------------------------------------------------------------------------
+
+/// RAII guard that increments the in-flight gauge on construction and
+/// decrements on drop. Pair with [`HttpTimer`] in the `route()` wrapper so the
+/// gauge drops even on error paths.
+pub struct InFlightGuard {
+ handler: &'static str,
+}
+
+impl InFlightGuard {
+ pub fn new(handler: &'static str) -> Self {
+ gauge!(
+ "brightstaff_http_in_flight_requests",
+ "handler" => handler,
+ )
+ .increment(1.0);
+ Self { handler }
+ }
+}
+
+impl Drop for InFlightGuard {
+ fn drop(&mut self) {
+ gauge!(
+ "brightstaff_http_in_flight_requests",
+ "handler" => self.handler,
+ )
+ .decrement(1.0);
+ }
+}
+
+/// Record the HTTP request counter + duration histogram.
+pub fn record_http(handler: &'static str, method: &'static str, status: u16, started: Instant) {
+ let class = status_class(status);
+ counter!(
+ "brightstaff_http_requests_total",
+ "handler" => handler,
+ "method" => method,
+ "status_class" => class,
+ )
+ .increment(1);
+ histogram!(
+ "brightstaff_http_request_duration_seconds",
+ "handler" => handler,
+ )
+ .record(started.elapsed().as_secs_f64());
+}
+
+// ---------------------------------------------------------------------------
+// LLM upstream helpers
+// ---------------------------------------------------------------------------
+
+/// Classify an outcome of an LLM upstream call for the `error_class` label.
+pub fn llm_error_class_from_reqwest(err: &reqwest::Error) -> &'static str {
+ if err.is_timeout() {
+ "timeout"
+ } else if err.is_connect() {
+ "connect"
+ } else if err.is_decode() {
+ "parse"
+ } else {
+ "other"
+ }
+}
+
+/// Record the outcome of an LLM upstream call. `status` is the HTTP status
+/// the upstream returned (0 if the call never produced one, e.g. send failure).
+/// `error_class` is `"none"` on success, or a discriminated error label.
+pub fn record_llm_upstream(
+ provider: &str,
+ model: &str,
+ status: u16,
+ error_class: &str,
+ duration: Duration,
+) {
+ let class = if status == 0 {
+ "error"
+ } else {
+ status_class(status)
+ };
+ counter!(
+ "brightstaff_llm_upstream_requests_total",
+ "provider" => provider.to_string(),
+ "model" => model.to_string(),
+ "status_class" => class,
+ "error_class" => error_class.to_string(),
+ )
+ .increment(1);
+ histogram!(
+ "brightstaff_llm_upstream_duration_seconds",
+ "provider" => provider.to_string(),
+ "model" => model.to_string(),
+ )
+ .record(duration.as_secs_f64());
+}
+
+pub fn record_llm_ttft(provider: &str, model: &str, ttft: Duration) {
+ histogram!(
+ "brightstaff_llm_time_to_first_token_seconds",
+ "provider" => provider.to_string(),
+ "model" => model.to_string(),
+ )
+ .record(ttft.as_secs_f64());
+}
+
+pub fn record_llm_tokens(provider: &str, model: &str, kind: &'static str, count: u64) {
+ counter!(
+ "brightstaff_llm_tokens_total",
+ "provider" => provider.to_string(),
+ "model" => model.to_string(),
+ "kind" => kind,
+ )
+ .increment(count);
+}
+
+pub fn record_llm_tokens_usage_missing(provider: &str, model: &str) {
+ counter!(
+ "brightstaff_llm_tokens_usage_missing_total",
+ "provider" => provider.to_string(),
+ "model" => model.to_string(),
+ )
+ .increment(1);
+}
+
+// ---------------------------------------------------------------------------
+// Router helpers
+// ---------------------------------------------------------------------------
+
+pub fn record_router_decision(
+ route: &'static str,
+ selected_model: &str,
+ fallback: bool,
+ duration: Duration,
+) {
+ counter!(
+ "brightstaff_router_decisions_total",
+ "route" => route,
+ "selected_model" => selected_model.to_string(),
+ "fallback" => if fallback { "true" } else { "false" },
+ )
+ .increment(1);
+ histogram!(
+ "brightstaff_router_decision_duration_seconds",
+ "route" => route,
+ )
+ .record(duration.as_secs_f64());
+}
+
+pub fn record_routing_service_outcome(outcome: &'static str) {
+ counter!(
+ "brightstaff_routing_service_requests_total",
+ "outcome" => outcome,
+ )
+ .increment(1);
+}
+
+pub fn record_session_cache_event(outcome: &'static str) {
+ counter!(
+ "brightstaff_session_cache_events_total",
+ "outcome" => outcome,
+ )
+ .increment(1);
+}
diff --git a/crates/brightstaff/src/router/http.rs b/crates/brightstaff/src/router/http.rs
index ad1b711c..e1f2be1e 100644
--- a/crates/brightstaff/src/router/http.rs
+++ b/crates/brightstaff/src/router/http.rs
@@ -1,8 +1,14 @@
use hermesllm::apis::openai::ChatCompletionsResponse;
use hyper::header;
+use serde::Deserialize;
use thiserror::Error;
use tracing::warn;
+/// Max bytes of raw upstream body we include in a log message or error text
+/// when the body is not a recognizable error envelope. Keeps logs from being
+/// flooded by huge HTML error pages.
+const RAW_BODY_LOG_LIMIT: usize = 512;
+
#[derive(Debug, Error)]
pub enum HttpError {
#[error("Failed to send request: {0}")]
@@ -10,13 +16,64 @@ pub enum HttpError {
#[error("Failed to parse JSON response: {0}")]
Json(serde_json::Error, String),
+
+ #[error("Upstream returned {status}: {message}")]
+ Upstream { status: u16, message: String },
+}
+
+/// Shape of an OpenAI-style error response body, e.g.
+/// `{"error": {"message": "...", "type": "...", "param": "...", "code": ...}}`.
+#[derive(Debug, Deserialize)]
+struct UpstreamErrorEnvelope {
+ error: UpstreamErrorBody,
+}
+
+#[derive(Debug, Deserialize)]
+struct UpstreamErrorBody {
+ message: String,
+ #[serde(default, rename = "type")]
+ err_type: Option,
+ #[serde(default)]
+ param: Option,
+}
+
+/// Extract a human-readable error message from an upstream response body.
+/// Tries to parse an OpenAI-style `{"error": {"message": ...}}` envelope; if
+/// that fails, falls back to the first `RAW_BODY_LOG_LIMIT` bytes of the raw
+/// body (UTF-8 safe).
+fn extract_upstream_error_message(body: &str) -> String {
+ if let Ok(env) = serde_json::from_str::(body) {
+ let mut msg = env.error.message;
+ if let Some(param) = env.error.param {
+ msg.push_str(&format!(" (param={param})"));
+ }
+ if let Some(err_type) = env.error.err_type {
+ msg.push_str(&format!(" [type={err_type}]"));
+ }
+ return msg;
+ }
+ truncate_for_log(body).to_string()
+}
+
+fn truncate_for_log(s: &str) -> &str {
+ if s.len() <= RAW_BODY_LOG_LIMIT {
+ return s;
+ }
+ let mut end = RAW_BODY_LOG_LIMIT;
+ while end > 0 && !s.is_char_boundary(end) {
+ end -= 1;
+ }
+ &s[..end]
}
/// Sends a POST request to the given URL and extracts the text content
/// from the first choice of the `ChatCompletionsResponse`.
///
-/// Returns `Some((content, elapsed))` on success, or `None` if the response
-/// had no choices or the first choice had no content.
+/// Returns `Some((content, elapsed))` on success, `None` if the response
+/// had no choices or the first choice had no content. Returns
+/// `HttpError::Upstream` for any non-2xx status, carrying a message
+/// extracted from the OpenAI-style error envelope (or a truncated raw body
+/// if the body is not in that shape).
pub async fn post_and_extract_content(
client: &reqwest::Client,
url: &str,
@@ -26,17 +83,36 @@ pub async fn post_and_extract_content(
let start_time = std::time::Instant::now();
let res = client.post(url).headers(headers).body(body).send().await?;
+ let status = res.status();
let body = res.text().await?;
let elapsed = start_time.elapsed();
+ if !status.is_success() {
+ let message = extract_upstream_error_message(&body);
+ warn!(
+ status = status.as_u16(),
+ message = %message,
+ body_size = body.len(),
+ "upstream returned error response"
+ );
+ return Err(HttpError::Upstream {
+ status: status.as_u16(),
+ message,
+ });
+ }
+
let response: ChatCompletionsResponse = serde_json::from_str(&body).map_err(|err| {
- warn!(error = %err, body = %body, "failed to parse json response");
+ warn!(
+ error = %err,
+ body = %truncate_for_log(&body),
+ "failed to parse json response",
+ );
HttpError::Json(err, format!("Failed to parse JSON: {}", body))
})?;
if response.choices.is_empty() {
- warn!(body = %body, "no choices in response");
+ warn!(body = %truncate_for_log(&body), "no choices in response");
return Ok(None);
}
@@ -46,3 +122,52 @@ pub async fn post_and_extract_content(
.as_ref()
.map(|c| (c.clone(), elapsed)))
}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn extracts_message_from_openai_style_error_envelope() {
+ let body = r#"{"error":{"code":400,"message":"This model's maximum context length is 32768 tokens. However, you requested 0 output tokens and your prompt contains at least 32769 input tokens, for a total of at least 32769 tokens.","param":"input_tokens","type":"BadRequestError"}}"#;
+ let msg = extract_upstream_error_message(body);
+ assert!(
+ msg.starts_with("This model's maximum context length is 32768 tokens."),
+ "unexpected message: {msg}"
+ );
+ assert!(msg.contains("(param=input_tokens)"));
+ assert!(msg.contains("[type=BadRequestError]"));
+ }
+
+ #[test]
+ fn extracts_message_without_optional_fields() {
+ let body = r#"{"error":{"message":"something broke"}}"#;
+ let msg = extract_upstream_error_message(body);
+ assert_eq!(msg, "something broke");
+ }
+
+ #[test]
+ fn falls_back_to_raw_body_when_not_error_envelope() {
+ let body = "502 Bad Gateway";
+ let msg = extract_upstream_error_message(body);
+ assert_eq!(msg, body);
+ }
+
+ #[test]
+ fn truncates_non_envelope_bodies_in_logs() {
+ let body = "x".repeat(RAW_BODY_LOG_LIMIT * 3);
+ let msg = extract_upstream_error_message(&body);
+ assert_eq!(msg.len(), RAW_BODY_LOG_LIMIT);
+ }
+
+ #[test]
+ fn truncate_for_log_respects_utf8_boundaries() {
+ // 2-byte characters; picking a length that would split mid-char.
+ let body = "é".repeat(RAW_BODY_LOG_LIMIT);
+ let out = truncate_for_log(&body);
+ // Should be a valid &str (implicit — would panic if we returned
+ // a non-boundary slice) and at most RAW_BODY_LOG_LIMIT bytes.
+ assert!(out.len() <= RAW_BODY_LOG_LIMIT);
+ assert!(out.chars().all(|c| c == 'é'));
+ }
+}
diff --git a/crates/brightstaff/src/router/mod.rs b/crates/brightstaff/src/router/mod.rs
index 2ef0d11a..0f48c090 100644
--- a/crates/brightstaff/src/router/mod.rs
+++ b/crates/brightstaff/src/router/mod.rs
@@ -3,3 +3,5 @@ pub mod model_metrics;
pub mod orchestrator;
pub mod orchestrator_model;
pub mod orchestrator_model_v1;
+#[cfg(test)]
+mod stress_tests;
diff --git a/crates/brightstaff/src/router/orchestrator.rs b/crates/brightstaff/src/router/orchestrator.rs
index 7aaf70a2..2d7b25de 100644
--- a/crates/brightstaff/src/router/orchestrator.rs
+++ b/crates/brightstaff/src/router/orchestrator.rs
@@ -15,6 +15,8 @@ use super::http::{self, post_and_extract_content};
use super::model_metrics::ModelMetricsService;
use super::orchestrator_model::OrchestratorModel;
+use crate::metrics as bs_metrics;
+use crate::metrics::labels as metric_labels;
use crate::router::orchestrator_model_v1;
use crate::session_cache::SessionCache;
@@ -130,7 +132,13 @@ impl OrchestratorService {
tenant_id: Option<&str>,
) -> Option {
let cache = self.session_cache.as_ref()?;
- cache.get(&Self::session_key(tenant_id, session_id)).await
+ let result = cache.get(&Self::session_key(tenant_id, session_id)).await;
+ bs_metrics::record_session_cache_event(if result.is_some() {
+ metric_labels::SESSION_CACHE_HIT
+ } else {
+ metric_labels::SESSION_CACHE_MISS
+ });
+ result
}
pub async fn cache_route(
@@ -151,6 +159,7 @@ impl OrchestratorService {
self.session_ttl,
)
.await;
+ bs_metrics::record_session_cache_event(metric_labels::SESSION_CACHE_STORE);
}
}
diff --git a/crates/brightstaff/src/router/orchestrator_model_v1.rs b/crates/brightstaff/src/router/orchestrator_model_v1.rs
index 75e5c586..693aacc2 100644
--- a/crates/brightstaff/src/router/orchestrator_model_v1.rs
+++ b/crates/brightstaff/src/router/orchestrator_model_v1.rs
@@ -10,6 +10,18 @@ use super::orchestrator_model::{OrchestratorModel, OrchestratorModelError};
pub const MAX_TOKEN_LEN: usize = 8192; // Default max token length for the orchestration model
+/// Hard cap on the number of recent messages considered when building the
+/// routing prompt. Bounds prompt growth for long-running conversations and
+/// acts as an outer guardrail before the token-budget loop runs. The most
+/// recent `MAX_ROUTING_TURNS` filtered messages are kept; older turns are
+/// dropped entirely.
+pub const MAX_ROUTING_TURNS: usize = 16;
+
+/// Unicode ellipsis used to mark where content was trimmed out of a long
+/// message. Helps signal to the downstream router model that the message was
+/// truncated.
+const TRIM_MARKER: &str = "…";
+
/// Custom JSON formatter that produces spaced JSON (space after colons and commas), same as JSON in python
struct SpacedJsonFormatter;
@@ -176,10 +188,9 @@ impl OrchestratorModel for OrchestratorModelV1 {
messages: &[Message],
usage_preferences_from_request: &Option>,
) -> ChatCompletionsRequest {
- // remove system prompt, tool calls, tool call response and messages without content
- // if content is empty its likely a tool call
- // when role == tool its tool call response
- let messages_vec = messages
+ // Remove system/developer/tool messages and messages without extractable
+ // text (tool calls have no text content we can classify against).
+ let filtered: Vec<&Message> = messages
.iter()
.filter(|m| {
m.role != Role::System
@@ -187,37 +198,72 @@ impl OrchestratorModel for OrchestratorModelV1 {
&& m.role != Role::Tool
&& !m.content.extract_text().is_empty()
})
- .collect::>();
+ .collect();
- // Following code is to ensure that the conversation does not exceed max token length
- // Note: we use a simple heuristic to estimate token count based on character length to optimize for performance
+ // Outer guardrail: only consider the last `MAX_ROUTING_TURNS` filtered
+ // messages when building the routing prompt. Keeps prompt growth
+ // predictable for long conversations regardless of per-message size.
+ let start = filtered.len().saturating_sub(MAX_ROUTING_TURNS);
+ let messages_vec: &[&Message] = &filtered[start..];
+
+ // Ensure the conversation does not exceed the configured token budget.
+ // We use `len() / TOKEN_LENGTH_DIVISOR` as a cheap token estimate to
+ // avoid running a real tokenizer on the hot path.
let mut token_count = ARCH_ORCHESTRATOR_V1_SYSTEM_PROMPT.len() / TOKEN_LENGTH_DIVISOR;
- let mut selected_messages_list_reversed: Vec<&Message> = vec![];
+ let mut selected_messages_list_reversed: Vec = vec![];
for (selected_messsage_count, message) in messages_vec.iter().rev().enumerate() {
- let message_token_count = message.content.extract_text().len() / TOKEN_LENGTH_DIVISOR;
- token_count += message_token_count;
- if token_count > self.max_token_length {
+ let message_text = message.content.extract_text();
+ let message_token_count = message_text.len() / TOKEN_LENGTH_DIVISOR;
+ if token_count + message_token_count > self.max_token_length {
+ let remaining_tokens = self.max_token_length.saturating_sub(token_count);
debug!(
- token_count = token_count,
+ attempted_total_tokens = token_count + message_token_count,
max_tokens = self.max_token_length,
+ remaining_tokens,
selected = selected_messsage_count,
total = messages_vec.len(),
"token count exceeds max, truncating conversation"
);
- if message.role == Role::User {
- // If message that exceeds max token length is from user, we need to keep it
- selected_messages_list_reversed.push(message);
+ // If the overflow message is from the user we need to keep
+ // some of it so the orchestrator still sees the latest user
+ // intent. Use a middle-trim (head + ellipsis + tail): users
+ // often frame the task at the start AND put the actual ask
+ // at the end of a long pasted block, so preserving both is
+ // better than a head-only cut. The ellipsis also signals to
+ // the router model that content was dropped.
+ if message.role == Role::User && remaining_tokens > 0 {
+ let max_bytes = remaining_tokens.saturating_mul(TOKEN_LENGTH_DIVISOR);
+ let truncated = trim_middle_utf8(&message_text, max_bytes);
+ selected_messages_list_reversed.push(Message {
+ role: Role::User,
+ content: Some(MessageContent::Text(truncated)),
+ name: None,
+ tool_calls: None,
+ tool_call_id: None,
+ });
}
break;
}
- // If we are here, it means that the message is within the max token length
- selected_messages_list_reversed.push(message);
+ token_count += message_token_count;
+ selected_messages_list_reversed.push(Message {
+ role: message.role.clone(),
+ content: Some(MessageContent::Text(message_text)),
+ name: None,
+ tool_calls: None,
+ tool_call_id: None,
+ });
}
if selected_messages_list_reversed.is_empty() {
debug!("no messages selected, using last message");
if let Some(last_message) = messages_vec.last() {
- selected_messages_list_reversed.push(last_message);
+ selected_messages_list_reversed.push(Message {
+ role: last_message.role.clone(),
+ content: Some(MessageContent::Text(last_message.content.extract_text())),
+ name: None,
+ tool_calls: None,
+ tool_call_id: None,
+ });
}
}
@@ -237,22 +283,8 @@ impl OrchestratorModel for OrchestratorModelV1 {
}
// Reverse the selected messages to maintain the conversation order
- let selected_conversation_list = selected_messages_list_reversed
- .iter()
- .rev()
- .map(|message| Message {
- role: message.role.clone(),
- content: Some(MessageContent::Text(
- message
- .content
- .as_ref()
- .map_or(String::new(), |c| c.to_string()),
- )),
- name: None,
- tool_calls: None,
- tool_call_id: None,
- })
- .collect::>();
+ let selected_conversation_list: Vec =
+ selected_messages_list_reversed.into_iter().rev().collect();
// Generate the orchestrator request message based on the usage preferences.
// If preferences are passed in request then we use them;
@@ -405,6 +437,45 @@ fn fix_json_response(body: &str) -> String {
body.replace("'", "\"").replace("\\n", "")
}
+/// Truncate `s` so the result is at most `max_bytes` bytes long, keeping
+/// roughly 60% from the start and 40% from the end, with a Unicode ellipsis
+/// separating the two. All splits respect UTF-8 character boundaries. When
+/// `max_bytes` is too small to fit the marker at all, falls back to a
+/// head-only truncation.
+fn trim_middle_utf8(s: &str, max_bytes: usize) -> String {
+ if s.len() <= max_bytes {
+ return s.to_string();
+ }
+ if max_bytes <= TRIM_MARKER.len() {
+ // Not enough room even for the marker — just keep the start.
+ let mut end = max_bytes;
+ while end > 0 && !s.is_char_boundary(end) {
+ end -= 1;
+ }
+ return s[..end].to_string();
+ }
+
+ let available = max_bytes - TRIM_MARKER.len();
+ // Bias toward the start (60%) where task framing typically lives, while
+ // still preserving ~40% of the tail where the user's actual ask often
+ // appears after a long paste.
+ let mut start_len = available * 3 / 5;
+ while start_len > 0 && !s.is_char_boundary(start_len) {
+ start_len -= 1;
+ }
+ let end_len = available - start_len;
+ let mut end_start = s.len().saturating_sub(end_len);
+ while end_start < s.len() && !s.is_char_boundary(end_start) {
+ end_start += 1;
+ }
+
+ let mut out = String::with_capacity(start_len + TRIM_MARKER.len() + (s.len() - end_start));
+ out.push_str(&s[..start_len]);
+ out.push_str(TRIM_MARKER);
+ out.push_str(&s[end_start..]);
+ out
+}
+
impl std::fmt::Debug for dyn OrchestratorModel {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "OrchestratorModel")
@@ -777,6 +848,10 @@ If no routes are needed, return an empty list for `route`.
#[test]
fn test_conversation_trim_upto_user_message() {
+ // With max_token_length=230, the older user message "given the image
+ // In style of Andy Warhol" overflows the remaining budget and gets
+ // middle-trimmed (head + ellipsis + tail) until it fits. Newer turns
+ // are kept in full.
let expected_prompt = r#"
You are a helpful assistant that selects the most suitable routes based on user intent.
You are provided with a list of available routes enclosed within XML tags:
@@ -789,7 +864,7 @@ You are also given the conversation context enclosed within >,
+ >(orchestrations_str)
+ .unwrap();
+
+ let max_token_length = 2048;
+ let orchestrator = OrchestratorModelV1::new(
+ agent_orchestrations,
+ "test-model".to_string(),
+ max_token_length,
+ );
+
+ // ~500KB of content — same scale as the real payload that triggered
+ // the production upstream 400.
+ let head = "HEAD_MARKER_START ";
+ let tail = " TAIL_MARKER_END";
+ let filler = "A".repeat(500_000);
+ let huge_user_content = format!("{head}{filler}{tail}");
+
+ let conversation = vec![Message {
+ role: Role::User,
+ content: Some(MessageContent::Text(huge_user_content.clone())),
+ name: None,
+ tool_calls: None,
+ tool_call_id: None,
+ }];
+
+ let req = orchestrator.generate_request(&conversation, &None);
+ let prompt = req.messages[0].content.extract_text();
+
+ // Prompt must stay bounded. Generous ceiling = budget-in-bytes +
+ // scaffolding + slack. Real result should be well under this.
+ let byte_ceiling = max_token_length * TOKEN_LENGTH_DIVISOR
+ + ARCH_ORCHESTRATOR_V1_SYSTEM_PROMPT.len()
+ + 1024;
+ assert!(
+ prompt.len() < byte_ceiling,
+ "prompt length {} exceeded ceiling {} — truncation did not apply",
+ prompt.len(),
+ byte_ceiling,
+ );
+
+ // Not all 500k filler chars survive.
+ let a_count = prompt.chars().filter(|c| *c == 'A').count();
+ assert!(
+ a_count < filler.len(),
+ "expected user message to be truncated; all {} 'A's survived",
+ a_count
+ );
+ assert!(
+ a_count > 0,
+ "expected some of the user message to survive truncation"
+ );
+
+ // Head and tail of the message must both be preserved (that's the
+ // whole point of middle-trim over head-only).
+ assert!(
+ prompt.contains(head),
+ "head marker missing — head was not preserved"
+ );
+ assert!(
+ prompt.contains(tail),
+ "tail marker missing — tail was not preserved"
+ );
+
+ // Trim marker must be present so the router model can see that
+ // content was omitted.
+ assert!(
+ prompt.contains(TRIM_MARKER),
+ "ellipsis trim marker missing from truncated prompt"
+ );
+
+ // Routing prompt scaffolding remains intact.
+ assert!(prompt.contains(""));
+ assert!(prompt.contains(""));
+ }
+
+ #[test]
+ fn test_turn_cap_limits_routing_history() {
+ // The outer turn-cap guardrail should keep only the last
+ // `MAX_ROUTING_TURNS` filtered messages regardless of how long the
+ // conversation is. We build a conversation with alternating
+ // user/assistant turns tagged with their index and verify that only
+ // the tail of the conversation makes it into the prompt.
+ let orchestrations_str = r#"
+ {
+ "gpt-4o": [
+ {"name": "Image generation", "description": "generating image"}
+ ]
+ }
+ "#;
+ let agent_orchestrations = serde_json::from_str::<
+ HashMap>,
+ >(orchestrations_str)
+ .unwrap();
+
+ let orchestrator =
+ OrchestratorModelV1::new(agent_orchestrations, "test-model".to_string(), usize::MAX);
+
+ let mut conversation: Vec = Vec::new();
+ let total_turns = MAX_ROUTING_TURNS * 2; // well past the cap
+ for i in 0..total_turns {
+ let role = if i % 2 == 0 {
+ Role::User
+ } else {
+ Role::Assistant
+ };
+ conversation.push(Message {
+ role,
+ content: Some(MessageContent::Text(format!("turn-{i:03}"))),
+ name: None,
+ tool_calls: None,
+ tool_call_id: None,
+ });
+ }
+
+ let req = orchestrator.generate_request(&conversation, &None);
+ let prompt = req.messages[0].content.extract_text();
+
+ // The last MAX_ROUTING_TURNS messages (indexes total-cap..total)
+ // must all appear.
+ for i in (total_turns - MAX_ROUTING_TURNS)..total_turns {
+ let tag = format!("turn-{i:03}");
+ assert!(
+ prompt.contains(&tag),
+ "expected recent turn tag {tag} to be present"
+ );
+ }
+
+ // And earlier turns (indexes 0..total-cap) must all be dropped.
+ for i in 0..(total_turns - MAX_ROUTING_TURNS) {
+ let tag = format!("turn-{i:03}");
+ assert!(
+ !prompt.contains(&tag),
+ "old turn tag {tag} leaked past turn cap into the prompt"
+ );
+ }
+ }
+
+ #[test]
+ fn test_trim_middle_utf8_helper() {
+ // No-op when already small enough.
+ assert_eq!(trim_middle_utf8("hello", 100), "hello");
+ assert_eq!(trim_middle_utf8("hello", 5), "hello");
+
+ // 60/40 split with ellipsis when too long.
+ let long = "a".repeat(20);
+ let out = trim_middle_utf8(&long, 10);
+ assert!(out.len() <= 10);
+ assert!(out.contains(TRIM_MARKER));
+ // Exactly one ellipsis, rest are 'a's.
+ assert_eq!(out.matches(TRIM_MARKER).count(), 1);
+ assert!(out.chars().filter(|c| *c == 'a').count() > 0);
+
+ // When max_bytes is smaller than the marker, falls back to
+ // head-only truncation (no marker).
+ let out = trim_middle_utf8("abcdefgh", 2);
+ assert_eq!(out, "ab");
+
+ // UTF-8 boundary safety: 2-byte chars.
+ let s = "é".repeat(50); // 100 bytes
+ let out = trim_middle_utf8(&s, 25);
+ assert!(out.len() <= 25);
+ // Must still be valid UTF-8 that only contains 'é' and the marker.
+ let ok = out.chars().all(|c| c == 'é' || c == '…');
+ assert!(ok, "unexpected char in trimmed output: {out:?}");
+ }
+
#[test]
fn test_non_text_input() {
let expected_prompt = r#"
diff --git a/crates/brightstaff/src/router/stress_tests.rs b/crates/brightstaff/src/router/stress_tests.rs
new file mode 100644
index 00000000..6c3ffefd
--- /dev/null
+++ b/crates/brightstaff/src/router/stress_tests.rs
@@ -0,0 +1,260 @@
+#[cfg(test)]
+mod tests {
+ use crate::router::orchestrator::OrchestratorService;
+ use crate::session_cache::memory::MemorySessionCache;
+ use common::configuration::{SelectionPolicy, SelectionPreference, TopLevelRoutingPreference};
+ use hermesllm::apis::openai::{Message, MessageContent, Role};
+ use std::sync::Arc;
+
+ fn make_messages(n: usize) -> Vec {
+ (0..n)
+ .map(|i| Message {
+ role: if i % 2 == 0 {
+ Role::User
+ } else {
+ Role::Assistant
+ },
+ content: Some(MessageContent::Text(format!(
+ "This is message number {i} with some padding text to make it realistic."
+ ))),
+ name: None,
+ tool_calls: None,
+ tool_call_id: None,
+ })
+ .collect()
+ }
+
+ fn make_routing_prefs() -> Vec {
+ vec![
+ TopLevelRoutingPreference {
+ name: "code_generation".to_string(),
+ description: "Code generation and debugging tasks".to_string(),
+ models: vec![
+ "openai/gpt-4o".to_string(),
+ "openai/gpt-4o-mini".to_string(),
+ ],
+ selection_policy: SelectionPolicy {
+ prefer: SelectionPreference::None,
+ },
+ },
+ TopLevelRoutingPreference {
+ name: "summarization".to_string(),
+ description: "Summarizing documents and text".to_string(),
+ models: vec![
+ "anthropic/claude-3-sonnet".to_string(),
+ "openai/gpt-4o-mini".to_string(),
+ ],
+ selection_policy: SelectionPolicy {
+ prefer: SelectionPreference::None,
+ },
+ },
+ ]
+ }
+
+ /// Stress test: exercise the full routing code path N times using a mock
+ /// HTTP server and measure jemalloc allocated bytes before/after.
+ ///
+ /// This catches:
+ /// - Memory leaks in generate_request / parse_response
+ /// - Leaks in reqwest connection handling
+ /// - String accumulation in the orchestrator model
+ /// - Fragmentation (jemalloc allocated vs resident)
+ #[tokio::test]
+ async fn stress_test_routing_determine_route() {
+ let mut server = mockito::Server::new_async().await;
+ let router_url = format!("{}/v1/chat/completions", server.url());
+
+ let mock_response = serde_json::json!({
+ "id": "chatcmpl-mock",
+ "object": "chat.completion",
+ "created": 1234567890,
+ "model": "plano-orchestrator",
+ "choices": [{
+ "index": 0,
+ "message": {
+ "role": "assistant",
+ "content": "{\"route\": \"code_generation\"}"
+ },
+ "finish_reason": "stop"
+ }],
+ "usage": {"prompt_tokens": 100, "completion_tokens": 10, "total_tokens": 110}
+ });
+
+ let _mock = server
+ .mock("POST", "/v1/chat/completions")
+ .with_status(200)
+ .with_header("content-type", "application/json")
+ .with_body(mock_response.to_string())
+ .expect_at_least(1)
+ .create_async()
+ .await;
+
+ let prefs = make_routing_prefs();
+ let session_cache = Arc::new(MemorySessionCache::new(1000));
+ let orchestrator_service = Arc::new(OrchestratorService::with_routing(
+ router_url,
+ "Plano-Orchestrator".to_string(),
+ "plano-orchestrator".to_string(),
+ Some(prefs.clone()),
+ None,
+ None,
+ session_cache,
+ None,
+ 2048,
+ ));
+
+ // Warm up: a few requests to stabilize allocator state
+ for _ in 0..10 {
+ let msgs = make_messages(5);
+ let _ = orchestrator_service
+ .determine_route(&msgs, None, "warmup")
+ .await;
+ }
+
+ // Snapshot memory after warmup
+ let baseline = get_allocated();
+
+ let num_iterations = 2000;
+
+ for i in 0..num_iterations {
+ let msgs = make_messages(5 + (i % 10));
+ let inline = if i % 3 == 0 {
+ Some(make_routing_prefs())
+ } else {
+ None
+ };
+ let _ = orchestrator_service
+ .determine_route(&msgs, inline, &format!("req-{i}"))
+ .await;
+ }
+
+ let after = get_allocated();
+
+ let growth = after.saturating_sub(baseline);
+ let growth_mb = growth as f64 / (1024.0 * 1024.0);
+ let per_request = growth.checked_div(num_iterations).unwrap_or(0);
+
+ eprintln!("=== Routing Stress Test Results ===");
+ eprintln!(" Iterations: {num_iterations}");
+ eprintln!(" Baseline alloc: {} bytes", baseline);
+ eprintln!(" Final alloc: {} bytes", after);
+ eprintln!(" Growth: {} bytes ({growth_mb:.2} MB)", growth);
+ eprintln!(" Per-request: {} bytes", per_request);
+
+ // Allow up to 256 bytes per request of retained growth (connection pool, etc.)
+ // A true leak would show thousands of bytes per request.
+ assert!(
+ per_request < 256,
+ "Possible memory leak: {per_request} bytes/request retained after {num_iterations} iterations"
+ );
+ }
+
+ /// Stress test with high concurrency: many parallel determine_route calls.
+ #[tokio::test]
+ async fn stress_test_routing_concurrent() {
+ let mut server = mockito::Server::new_async().await;
+ let router_url = format!("{}/v1/chat/completions", server.url());
+
+ let mock_response = serde_json::json!({
+ "id": "chatcmpl-mock",
+ "object": "chat.completion",
+ "created": 1234567890,
+ "model": "plano-orchestrator",
+ "choices": [{
+ "index": 0,
+ "message": {
+ "role": "assistant",
+ "content": "{\"route\": \"summarization\"}"
+ },
+ "finish_reason": "stop"
+ }],
+ "usage": {"prompt_tokens": 100, "completion_tokens": 10, "total_tokens": 110}
+ });
+
+ let _mock = server
+ .mock("POST", "/v1/chat/completions")
+ .with_status(200)
+ .with_header("content-type", "application/json")
+ .with_body(mock_response.to_string())
+ .expect_at_least(1)
+ .create_async()
+ .await;
+
+ let prefs = make_routing_prefs();
+ let session_cache = Arc::new(MemorySessionCache::new(1000));
+ let orchestrator_service = Arc::new(OrchestratorService::with_routing(
+ router_url,
+ "Plano-Orchestrator".to_string(),
+ "plano-orchestrator".to_string(),
+ Some(prefs),
+ None,
+ None,
+ session_cache,
+ None,
+ 2048,
+ ));
+
+ // Warm up
+ for _ in 0..20 {
+ let msgs = make_messages(3);
+ let _ = orchestrator_service
+ .determine_route(&msgs, None, "warmup")
+ .await;
+ }
+
+ let baseline = get_allocated();
+
+ let concurrency = 50;
+ let requests_per_task = 100;
+ let total = concurrency * requests_per_task;
+
+ let mut handles = vec![];
+ for t in 0..concurrency {
+ let svc = Arc::clone(&orchestrator_service);
+ let handle = tokio::spawn(async move {
+ for r in 0..requests_per_task {
+ let msgs = make_messages(3 + (r % 8));
+ let _ = svc
+ .determine_route(&msgs, None, &format!("req-{t}-{r}"))
+ .await;
+ }
+ });
+ handles.push(handle);
+ }
+
+ for h in handles {
+ h.await.unwrap();
+ }
+
+ let after = get_allocated();
+ let growth = after.saturating_sub(baseline);
+ let per_request = growth / total;
+
+ eprintln!("=== Concurrent Routing Stress Test Results ===");
+ eprintln!(" Tasks: {concurrency} x {requests_per_task} = {total}");
+ eprintln!(" Baseline: {} bytes", baseline);
+ eprintln!(" Final: {} bytes", after);
+ eprintln!(
+ " Growth: {} bytes ({:.2} MB)",
+ growth,
+ growth as f64 / 1_048_576.0
+ );
+ eprintln!(" Per-request: {} bytes", per_request);
+
+ assert!(
+ per_request < 512,
+ "Possible memory leak under concurrency: {per_request} bytes/request retained after {total} requests"
+ );
+ }
+
+ #[cfg(feature = "jemalloc")]
+ fn get_allocated() -> usize {
+ tikv_jemalloc_ctl::epoch::advance().unwrap();
+ tikv_jemalloc_ctl::stats::allocated::read().unwrap_or(0)
+ }
+
+ #[cfg(not(feature = "jemalloc"))]
+ fn get_allocated() -> usize {
+ 0
+ }
+}
diff --git a/crates/brightstaff/src/signals/analyzer.rs b/crates/brightstaff/src/signals/analyzer.rs
index 5ee3c7d9..35e342eb 100644
--- a/crates/brightstaff/src/signals/analyzer.rs
+++ b/crates/brightstaff/src/signals/analyzer.rs
@@ -1,3190 +1,572 @@
-//! Agentic Signals - Behavioral quality indicators for agent interactions
+//! Top-level signal analyzer.
//!
-//! This module implements various signals that serve as early warning indicators
-//! of brilliant successes or failures in agentic interactions. These signals are
-//! derived from conversation patterns and can be computed algorithmically from
-//! message arrays.
-
-use serde::{Deserialize, Serialize};
-use std::collections::{HashMap, HashSet};
-use std::sync::LazyLock;
+//! Direct port of `signals/analyzer.py`. Orchestrates all detectors across
+//! the three layers (interaction / execution / environment) and produces a
+//! `SignalReport`.
use hermesllm::apis::openai::{Message, Role};
+use hermesllm::transforms::ExtractText;
-// ============================================================================
-// Constants
-// ============================================================================
+use super::environment::exhaustion::analyze_exhaustion;
+use super::execution::failure::analyze_failure;
+use super::execution::loops::analyze_loops;
+use super::interaction::disengagement::analyze_disengagement;
+use super::interaction::misalignment::analyze_misalignment;
+use super::interaction::satisfaction::analyze_satisfaction;
+use super::interaction::stagnation::{analyze_stagnation, ShareGptMsg};
+use super::schemas::{
+ EnvironmentSignals, ExecutionSignals, InteractionQuality, InteractionSignals, SignalReport,
+ SignalType, TurnMetrics,
+};
+use super::text_processing::NormalizedMessage;
-/// Flag emoji for marking spans/operations worth investigating
+/// Marker appended to the span operation name when concerning signals are
+/// detected. The 🚩 emoji (U+1F6A9) matches the pre-port implementation so
+/// downstream consumers that search for flagged traces by span-name emoji
+/// keep working.
pub const FLAG_MARKER: &str = "\u{1F6A9}";
-/// Size of character n-grams for similarity matching (3 = trigrams)
-const NGRAM_SIZE: usize = 3;
+/// ShareGPT-shaped row used as the canonical input to the analyzer's
+/// detectors. `from` is one of `"human"`, `"gpt"`, `"function_call"`,
+/// `"observation"`. `value` is the raw message body.
+#[derive(Debug, Clone, Copy)]
+pub struct ShareGptMessage<'a> {
+ pub from: &'a str,
+ pub value: &'a str,
+}
-// ============================================================================
-// Normalized Message Processing
-// ============================================================================
-
-/// Pre-processed message with normalized text and tokens for efficient matching
+/// Configuration knobs for the analyzer. Defaults match
+/// `signals/analyzer.py:SignalAnalyzer.__init__`.
#[derive(Debug, Clone)]
-struct NormalizedMessage {
- /// Original raw text
- raw: String,
- /// Tokens (words) extracted from the message
- tokens: Vec,
- /// Token set for fast lookup
- token_set: HashSet,
- /// Bigram set for fast similarity computation
- bigram_set: HashSet,
- /// Character ngram set for robust similarity matching
- char_ngram_set: HashSet,
- /// Token frequency map for multiset cosine similarity
- token_frequency: HashMap,
+pub struct SignalAnalyzerConfig {
+ pub baseline_turns: usize,
+ pub char_ngram_threshold: f32,
+ pub token_cosine_threshold: f32,
+ pub max_message_length: usize,
+ pub max_messages: usize,
}
-impl NormalizedMessage {
- #[allow(dead_code)] // Used in tests for algorithm validation
- fn from_text(text: &str) -> Self {
- Self::from_text_with_limit(text, usize::MAX)
- }
-
- fn from_text_with_limit(text: &str, max_length: usize) -> Self {
- // Truncate to max_length characters to prevent unbounded computation
- // Keep head (20%) + tail (80%) to preserve both context and intent
-
- let char_count = text.chars().count();
-
- let raw = if char_count <= max_length {
- text.to_string()
- } else {
- // Split: 20% head, 79% tail, 1 char space delimiter
- let head_len = max_length / 5;
- let tail_len = max_length - head_len - 1;
-
- let head: String = text.chars().take(head_len).collect();
- let tail: String = text.chars().skip(char_count - tail_len).collect();
-
- format!("{} {}", head, tail)
- };
-
- // Normalize unicode punctuation to ASCII equivalents
- let normalized_unicode = raw
- .replace(['\u{2019}', '\u{2018}'], "'") // U+2019/U+2018 SINGLE QUOTATION MARKs
- .replace(['\u{201C}', '\u{201D}'], "\"") // U+201C/U+201D DOUBLE QUOTATION MARKs
- .replace(['\u{2013}', '\u{2014}'], "-"); // U+2013/U+2014 EN/EM DASHes
-
- // Normalize: lowercase, collapse whitespace
- let normalized = normalized_unicode
- .to_lowercase()
- .split_whitespace()
- .collect::>()
- .join(" ");
-
- // Tokenize: split on whitespace and strip punctuation from boundaries
- let tokens: Vec = normalized
- .split_whitespace()
- .map(|word| {
- // Strip leading/trailing punctuation but keep internal punctuation
- word.trim_matches(|c: char| c.is_ascii_punctuation())
- .to_string()
- })
- .filter(|w| !w.is_empty())
- .collect();
-
- let token_set: HashSet = tokens.iter().cloned().collect();
-
- // Generate bigram set directly for similarity matching
- let bigram_set: HashSet = tokens
- .windows(2)
- .map(|w| format!("{} {}", w[0], w[1]))
- .collect();
-
- // Generate character ngram set for robust similarity matching
- // Uses tokens (with punctuation stripped) for consistency with pattern matching
- let tokens_text = tokens.join(" ");
- let char_ngram_set: HashSet = tokens_text
- .chars()
- .collect::>()
- .windows(NGRAM_SIZE)
- .map(|w| w.iter().collect::())
- .collect();
-
- // Compute token frequency map for cosine similarity
- let mut token_frequency: HashMap = HashMap::new();
- for token in &tokens {
- *token_frequency.entry(token.clone()).or_insert(0) += 1;
- }
-
- Self {
- raw,
- tokens,
- token_set,
- bigram_set,
- char_ngram_set,
- token_frequency,
- }
- }
-
- /// Check if a single token exists in the message (word boundary aware)
- fn contains_token(&self, token: &str) -> bool {
- self.token_set.contains(token)
- }
-
- /// Check if a phrase (sequence of tokens) exists in the message
- fn contains_phrase(&self, phrase: &str) -> bool {
- let phrase_tokens: Vec<&str> = phrase.split_whitespace().collect();
- if phrase_tokens.is_empty() {
- return false;
- }
-
- if phrase_tokens.len() == 1 {
- return self.contains_token(phrase_tokens[0]);
- }
-
- // Multi-word phrase: check for sequence in tokens
- self.tokens.windows(phrase_tokens.len()).any(|window| {
- window
- .iter()
- .zip(phrase_tokens.iter())
- .all(|(token, phrase_token)| token == phrase_token)
- })
- }
-
- /// Calculate character ngram similarity between this message and a pattern
- /// Returns a similarity score between 0.0 and 1.0
- /// This is robust to typos, small edits, and word insertions
- #[allow(dead_code)] // Used in tests for algorithm validation
- fn char_ngram_similarity(&self, pattern: &str) -> f64 {
- // Normalize the pattern: lowercase and remove ALL punctuation
- // This makes "doesn't" → "doesnt" for robust typo matching
- let normalized_pattern = pattern
- .to_lowercase()
- .chars()
- .filter(|c| c.is_alphanumeric() || c.is_whitespace())
- .collect::()
- .split_whitespace()
- .collect::>()
- .join(" ");
-
- // Generate ngrams for the pattern
- let pattern_ngrams: HashSet = normalized_pattern
- .chars()
- .collect::>()
- .windows(NGRAM_SIZE)
- .map(|w| w.iter().collect::())
- .collect();
-
- if self.char_ngram_set.is_empty() && pattern_ngrams.is_empty() {
- return 1.0; // Both empty = identical
- }
-
- if self.char_ngram_set.is_empty() || pattern_ngrams.is_empty() {
- return 0.0;
- }
-
- // Compute Jaccard similarity (intersection / union)
- let intersection = self.char_ngram_set.intersection(&pattern_ngrams).count();
- let union = self.char_ngram_set.union(&pattern_ngrams).count();
-
- if union == 0 {
- return 0.0;
- }
-
- intersection as f64 / union as f64
- }
-
- /// Calculate token-based cosine similarity using term frequencies
- /// Returns a similarity score between 0.0 and 1.0
- /// This handles word frequency and is stable for longer messages
- #[allow(dead_code)] // Used in tests for algorithm validation
- fn token_cosine_similarity(&self, pattern: &str) -> f64 {
- // Tokenize and compute frequencies for the pattern
- let pattern_tokens: Vec = pattern
- .to_lowercase()
- .split_whitespace()
- .map(|word| {
- word.trim_matches(|c: char| c.is_ascii_punctuation())
- .to_string()
- })
- .filter(|w| !w.is_empty())
- .collect();
-
- let mut pattern_frequency: HashMap = HashMap::new();
- for token in &pattern_tokens {
- *pattern_frequency.entry(token.clone()).or_insert(0) += 1;
- }
-
- if self.token_frequency.is_empty() && pattern_frequency.is_empty() {
- return 1.0;
- }
-
- if self.token_frequency.is_empty() || pattern_frequency.is_empty() {
- return 0.0;
- }
-
- // Compute cosine similarity
- // cosine_sim = dot_product / (norm1 * norm2)
-
- let mut dot_product = 0.0;
- let mut norm1_squared = 0.0;
- let mut norm2_squared = 0.0;
-
- // Collect all unique tokens from both sets
- let all_tokens: HashSet = self
- .token_frequency
- .keys()
- .chain(pattern_frequency.keys())
- .cloned()
- .collect();
-
- for token in all_tokens {
- let freq1 = *self.token_frequency.get(&token).unwrap_or(&0) as f64;
- let freq2 = *pattern_frequency.get(&token).unwrap_or(&0) as f64;
-
- dot_product += freq1 * freq2;
- norm1_squared += freq1 * freq1;
- norm2_squared += freq2 * freq2;
- }
-
- let norm1 = norm1_squared.sqrt();
- let norm2 = norm2_squared.sqrt();
-
- if norm1 == 0.0 || norm2 == 0.0 {
- return 0.0;
- }
-
- dot_product / (norm1 * norm2)
- }
-
- /// Layered phrase matching: exact → character ngram → token cosine
- /// Returns true if the pattern matches using any layer
- #[allow(dead_code)] // Kept for reference; production uses matches_normalized_pattern
- fn layered_contains_phrase(
- &self,
- pattern: &str,
- char_ngram_threshold: f64,
- token_cosine_threshold: f64,
- ) -> bool {
- // Layer 0: Exact phrase match (fastest)
- if self.contains_phrase(pattern) {
- return true;
- }
-
- // Layer 1: Character ngram similarity (typo/edit robustness)
- // Check whole message first (for short messages)
- if self.char_ngram_similarity(pattern) >= char_ngram_threshold {
- return true;
- }
-
- // ngram containment check for patterns buried in longer messages
- // If ALL of the pattern's ngrams exist in the message, the pattern must be
- // present (possibly with minor variations like missing apostrophes).
- // This is O(pattern_ngrams) lookups vs expensive window sliding.
- if self.char_ngram_containment(pattern) >= 1.0 {
- return true;
- }
-
- // Layer 2: Token cosine similarity (semantic stability for long messages)
- if self.token_cosine_similarity(pattern) >= token_cosine_threshold {
- return true;
- }
-
- false
- }
-
- fn char_ngram_containment(&self, pattern: &str) -> f64 {
- // Normalize the pattern the same way as char_ngram_similarity
- let normalized_pattern = pattern
- .to_lowercase()
- .chars()
- .filter(|c| c.is_alphanumeric() || c.is_whitespace())
- .collect::()
- .split_whitespace()
- .collect::>()
- .join(" ");
-
- // Generate ngrams for the pattern
- let pattern_ngrams: HashSet = normalized_pattern
- .chars()
- .collect::>()
- .windows(NGRAM_SIZE)
- .map(|w| w.iter().collect::())
- .collect();
-
- if pattern_ngrams.is_empty() {
- return 0.0;
- }
-
- // Count how many pattern ngrams exist in the message
- let contained = pattern_ngrams
- .iter()
- .filter(|t| self.char_ngram_set.contains(*t))
- .count();
-
- contained as f64 / pattern_ngrams.len() as f64
- }
-
- /// Fast matching against a pre-normalized pattern
- /// This avoids re-normalizing and re-computing ngrams for each pattern
- fn matches_normalized_pattern(
- &self,
- pattern: &NormalizedPattern,
- char_ngram_threshold: f64,
- token_cosine_threshold: f64,
- ) -> bool {
- // Layer 0: Exact phrase match (fastest)
- if self.contains_phrase(&pattern.raw) {
- return true;
- }
-
- // Layer 1: Character ngram similarity using pre-computed ngrams
- if !self.char_ngram_set.is_empty() && !pattern.char_ngram_set.is_empty() {
- let intersection = self
- .char_ngram_set
- .intersection(&pattern.char_ngram_set)
- .count();
- let union = self.char_ngram_set.union(&pattern.char_ngram_set).count();
- if union > 0 {
- let similarity = intersection as f64 / union as f64;
- if similarity >= char_ngram_threshold {
- return true;
- }
- }
- }
-
- // Ngram containment check using pre-computed ngrams
- if !pattern.char_ngram_set.is_empty() {
- let contained = pattern
- .char_ngram_set
- .iter()
- .filter(|t| self.char_ngram_set.contains(*t))
- .count();
- let containment = contained as f64 / pattern.char_ngram_set.len() as f64;
- if containment >= 1.0 {
- return true;
- }
- }
-
- // Layer 2: Token cosine similarity using pre-computed frequencies
- if !self.token_frequency.is_empty() && !pattern.token_frequency.is_empty() {
- let mut dot_product = 0.0;
- let mut norm1_squared = 0.0;
- let mut norm2_squared = 0.0;
-
- // Iterate over pattern tokens (usually smaller set)
- for (token, &freq2) in &pattern.token_frequency {
- let freq1 = *self.token_frequency.get(token).unwrap_or(&0) as f64;
- let freq2 = freq2 as f64;
- dot_product += freq1 * freq2;
- norm2_squared += freq2 * freq2;
- }
-
- // Add self tokens not in pattern for norm1
- for &freq1 in self.token_frequency.values() {
- norm1_squared += (freq1 as f64) * (freq1 as f64);
- }
-
- let norm1 = norm1_squared.sqrt();
- let norm2 = norm2_squared.sqrt();
-
- if norm1 > 0.0 && norm2 > 0.0 {
- let similarity = dot_product / (norm1 * norm2);
- if similarity >= token_cosine_threshold {
- return true;
- }
- }
- }
-
- false
- }
-}
-
-// ============================================================================
-// Normalized Pattern (pre-computed for performance)
-// ============================================================================
-
-/// Pre-processed pattern with normalized text and pre-computed ngrams/tokens
-/// This avoids redundant computation when matching against many messages
-#[derive(Debug, Clone)]
-struct NormalizedPattern {
- /// Original raw pattern text
- raw: String,
- /// Character ngram set for similarity matching
- char_ngram_set: HashSet,
- /// Token frequency map for cosine similarity
- token_frequency: HashMap,
-}
-
-impl NormalizedPattern {
- fn new(pattern: &str) -> Self {
- // Normalize: lowercase and remove ALL punctuation
- let normalized = pattern
- .to_lowercase()
- .chars()
- .filter(|c| c.is_alphanumeric() || c.is_whitespace())
- .collect::()
- .split_whitespace()
- .collect::>()
- .join(" ");
-
- // Generate ngrams
- let char_ngram_set: HashSet = normalized
- .chars()
- .collect::>()
- .windows(NGRAM_SIZE)
- .map(|w| w.iter().collect::())
- .collect();
-
- // Compute token frequency map
- let tokens: Vec = normalized
- .split_whitespace()
- .map(|s| s.to_string())
- .collect();
- let mut token_frequency: HashMap = HashMap::new();
- for token in tokens {
- *token_frequency.entry(token).or_insert(0) += 1;
- }
-
- Self {
- raw: pattern.to_string(),
- char_ngram_set,
- token_frequency,
- }
- }
-}
-
-/// Helper to create a static slice of normalized patterns
-fn normalize_patterns(patterns: &[&str]) -> Vec {
- patterns.iter().map(|p| NormalizedPattern::new(p)).collect()
-}
-
-// ============================================================================
-// Pre-computed Pattern Caches (initialized once at startup)
-// ============================================================================
-
-static REPAIR_PATTERNS: LazyLock> = LazyLock::new(|| {
- normalize_patterns(&[
- // Explicit corrections
- "i meant",
- "i mean",
- "sorry, i meant",
- "what i meant was",
- "what i actually meant",
- "i was trying to say",
- "let me correct that",
- "correction",
- "i misspoke",
- // Negations and disagreements
- "no, i",
- "no i",
- "nah i",
- "nope i",
- "not what i",
- "that's not",
- "that's not what",
- "that isn't what",
- "not quite",
- "not exactly",
- // Rephrasing indicators
- "let me rephrase",
- "let me try again",
- "let me clarify",
- "to clarify",
- "to be clear",
- "let me explain",
- "what i'm trying to",
- "what i'm saying",
- "in other words",
- // Actual/really emphasis
- "actually i",
- "actually no",
- "what i actually",
- "i actually",
- "i really meant",
- // Mistake acknowledgment
- "i was wrong",
- "my mistake",
- "my bad",
- "i should have said",
- "i should clarify",
- // Wait/hold indicators
- "wait, i",
- "wait no",
- "hold on",
- "hang on",
- ])
-});
-
-static COMPLAINT_PATTERNS: LazyLock> = LazyLock::new(|| {
- normalize_patterns(&[
- // Useless/unhelpful (multi-word only)
- "this is useless",
- "not helpful",
- "doesn't help",
- "not helping",
- "you're not helping",
- "no help",
- "unhelpful",
- // Not working
- "this doesn't work",
- "doesn't work",
- "not working",
- "isn't working",
- "won't work",
- "still doesn't work",
- "still not working",
- // Not fixing/solving
- "doesn't fix",
- "not fixing",
- "doesn't solve",
- "doesn't seem to work",
- "doesn't seem to fix",
- "not resolving",
- // Waste/pointless
- "waste of time",
- "wasting my time",
- // Ridiculous/absurd
- "this is ridiculous",
- "ridiculous",
- "this is absurd",
- "absurd",
- "this is insane",
- "insane",
- // Stupid/dumb (as adjectives, not as standalone tokens)
- "this is stupid",
- "this is dumb",
- // Quality complaints (multi-word)
- "this sucks",
- "not good enough",
- // Capability questions
- "why can't you",
- "can't you",
- // Frustration
- "this is frustrating",
- "frustrated",
- "incomplete",
- "overwhelm",
- "overwhelmed",
- "overwhelming",
- "exhausted",
- "struggled",
- // same issue
- "same issue",
- // polite dissatisfaction
- "i'm disappointed",
- "thanks, but",
- "appreciate it, but",
- "good, but",
- // Fed up/done
- "i give up",
- "give up",
- "fed up",
- "had enough",
- "can't take",
- // Bot-specific complaints
- "useless bot",
- "dumb bot",
- "stupid bot",
- ])
-});
-
-static CONFUSION_PATTERNS: LazyLock> = LazyLock::new(|| {
- normalize_patterns(&[
- // Don't understand
- "i don't understand",
- "don't understand",
- "not understanding",
- "can't understand",
- "don't get it",
- "don't follow",
- // Confused state
- "i'm confused",
- "so confused",
- // Makes no sense
- "makes no sense",
- "doesn't make sense",
- "not making sense",
- // What do you mean (keep multi-word)
- "what do you mean",
- "what does that mean",
- "what are you saying",
- // Lost/unclear
- "i'm lost",
- "totally lost",
- "lost me",
- // No clue
- "no clue",
- "no idea",
- // Come again
- "come again",
- "say that again",
- "repeat that",
- ])
-});
-
-static GRATITUDE_PATTERNS: LazyLock> = LazyLock::new(|| {
- normalize_patterns(&[
- // Standard gratitude
- "thank you",
- "thanks",
- "thank u",
- "thankyou",
- "thx",
- "ty",
- "tyvm",
- "tysm",
- "thnx",
- "thnks",
- // Strong gratitude
- "thanks so much",
- "thank you so much",
- "thanks a lot",
- "thanks a bunch",
- "much appreciated",
- "really appreciate",
- "greatly appreciate",
- "appreciate it",
- "appreciate that",
- "i appreciate",
- "grateful",
- "so grateful",
- // Helpfulness acknowledgment
- "that's helpful",
- "very helpful",
- "super helpful",
- "really helpful",
- "that helps",
- "this helps",
- "helpful",
- // Perfection expressions
- "perfect",
- "that's perfect",
- "just perfect",
- "exactly what i needed",
- "exactly right",
- "just what i needed",
- "that's exactly",
- // Informal positive
- "you're the best",
- "you rock",
- "you're awesome",
- "awesome sauce",
- "legend",
- ])
-});
-
-static SATISFACTION_PATTERNS: LazyLock> = LazyLock::new(|| {
- normalize_patterns(&[
- // Works/functions
- "that works",
- "this works",
- "works great",
- "works perfectly",
- "works for me",
- // Great variations
- "that's great",
- "that's amazing",
- "this is great",
- "sounds great",
- "looks great",
- "great job",
- // Excellent/perfect
- "excellent",
- "outstanding",
- "superb",
- "spectacular",
- // Awesome/amazing
- "awesome",
- "that's awesome",
- "amazing",
- "incredible",
- // Love expressions
- "love it",
- "love this",
- "i love",
- "loving it",
- "love that",
- // Brilliant/wonderful
- "brilliant",
- "wonderful",
- "fantastic",
- "fabulous",
- "marvelous",
- ])
-});
-
-static SUCCESS_PATTERNS: LazyLock> = LazyLock::new(|| {
- normalize_patterns(&[
- // Understanding confirmation
- "got it",
- "i got it",
- "understand",
- "understood",
- "i understand",
- "makes sense",
- "clear now",
- "i see",
- // Success/completion
- "success",
- "successful",
- "it worked",
- "that worked",
- "this worked",
- "worked",
- // Problem resolution
- "solved",
- "resolved",
- "fixed",
- "fixed it",
- "issue resolved",
- "problem solved",
- // Working state
- "working now",
- "it's working",
- "works now",
- "working fine",
- "working great",
- // Completion
- "all set",
- "all good",
- "we're good",
- "i'm good",
- "all done",
- "done",
- "complete",
- "finished",
- // Perfect fit
- "spot on",
- "nailed it",
- "bingo",
- "exactly",
- "just right",
- ])
-});
-
-static HUMAN_AGENT_PATTERNS: LazyLock> = LazyLock::new(|| {
- normalize_patterns(&[
- // Speak to human
- "speak to a human",
- "speak to human",
- "speak with a human",
- "speak with human",
- "talk to a human",
- "talk to human",
- "talk to a person",
- "talk to person",
- "talk to someone",
- // Human/real agent
- "human agent",
- "real agent",
- "actual agent",
- "live agent",
- "human support",
- // Real/actual person
- "real person",
- "actual person",
- "real human",
- "actual human",
- "someone real",
- // Need/want human
- "need a human",
- "need human",
- "want a human",
- "want human",
- "get me a human",
- "get me human",
- "get me someone",
- // Transfer/connect
- "transfer me",
- "connect me",
- "escalate this",
- // Representative (removed standalone "rep" - too many false positives)
- "representative",
- "customer service rep",
- "customer service representative",
- // Not a bot
- "not a bot",
- "not talking to a bot",
- "tired of bots",
- ])
-});
-
-static SUPPORT_PATTERNS: LazyLock> = LazyLock::new(|| {
- normalize_patterns(&[
- // Contact support
- "contact support",
- "call support",
- "reach support",
- "get support",
- // Customer support
- "customer support",
- "customer service",
- "tech support",
- "technical support",
- // Help desk
- "help desk",
- "helpdesk",
- "support desk",
- // Talk to support
- "talk to support",
- "speak to support",
- "speak with support",
- "chat with support",
- // Need help
- "need real help",
- "need actual help",
- "help me now",
- ])
-});
-
-static QUIT_PATTERNS: LazyLock> = LazyLock::new(|| {
- normalize_patterns(&[
- // Give up
- "i give up",
- "give up",
- "giving up",
- // Quit/leaving
- "i'm going to quit",
- "i quit",
- "quitting",
- "i'm leaving",
- "i'm done",
- "i'm out",
- // Forget it
- "forget it",
- "forget this",
- "screw it",
- "screw this",
- // Never mind
- "never mind",
- "nevermind",
- "don't bother",
- "not worth it",
- // Hopeless
- "this is hopeless",
- // Going elsewhere
- "going elsewhere",
- "try somewhere else",
- "look elsewhere",
- "find another",
- ])
-});
-
-// ============================================================================
-// Core Signal Types
-// ============================================================================
-
-/// Overall quality assessment for an agent interaction session
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
-pub enum InteractionQuality {
- /// Excellent interaction with strong positive signals
- Excellent,
- /// Good interaction with mostly positive signals
- Good,
- /// Neutral interaction with mixed signals
- Neutral,
- /// Poor interaction with concerning signals
- Poor,
- /// Critical interaction with severe negative signals
- Severe,
-}
-
-/// Container for all computed signals for a conversation
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct SignalReport {
- /// Turn count and efficiency metrics
- pub turn_count: TurnCountSignal,
- /// Follow-up and repair frequency
- pub follow_up: FollowUpSignal,
- /// User frustration indicators
- pub frustration: FrustrationSignal,
- /// Repetition and looping behavior
- pub repetition: RepetitionSignal,
- /// Positive feedback indicators
- pub positive_feedback: PositiveFeedbackSignal,
- /// User escalation requests
- pub escalation: EscalationSignal,
- /// Overall quality assessment
- pub overall_quality: InteractionQuality,
- /// Human-readable summary
- pub summary: String,
-}
-
-// ============================================================================
-// Individual Signal Types
-// ============================================================================
-
-/// Turn count and efficiency metrics
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct TurnCountSignal {
- /// Total number of turns (user-agent exchanges)
- pub total_turns: usize,
- /// Number of user messages
- pub user_turns: usize,
- /// Number of assistant messages
- pub assistant_turns: usize,
- /// Whether the turn count is concerning (> 7)
- pub is_concerning: bool,
- /// Whether the turn count is excessive (> 12)
- pub is_excessive: bool,
- /// Efficiency score (0.0-1.0, lower turns = higher score)
- pub efficiency_score: f64,
-}
-
-/// Follow-up and repair frequency signal
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct FollowUpSignal {
- /// Number of detected repair attempts
- pub repair_count: usize,
- /// Ratio of repairs to total user turns
- pub repair_ratio: f64,
- /// Whether repair ratio is concerning (> 0.3)
- pub is_concerning: bool,
- /// List of detected repair phrases
- pub repair_phrases: Vec,
-}
-
-/// User frustration indicators
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct FrustrationSignal {
- /// Number of frustration indicators detected
- pub frustration_count: usize,
- /// Whether frustration is detected
- pub has_frustration: bool,
- /// Severity level (0-3: none, mild, moderate, severe)
- pub severity: u8,
- /// List of detected frustration indicators
- pub indicators: Vec,
-}
-
-/// Individual frustration indicator
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct FrustrationIndicator {
- /// Type of frustration detected
- pub indicator_type: FrustrationType,
- /// Message index where detected
- pub message_index: usize,
- /// Relevant text snippet
- pub snippet: String,
-}
-
-/// Types of frustration indicators
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
-pub enum FrustrationType {
- /// Negative sentiment detected
- NegativeSentiment,
- /// All caps typing
- AllCaps,
- /// Excessive punctuation
- ExcessivePunctuation,
- /// Profanity detected
- Profanity,
- /// Direct complaint
- DirectComplaint,
- /// Expression of confusion
- Confusion,
-}
-
-/// Repetition and looping behavior signal
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct RepetitionSignal {
- /// Number of repetitions detected
- pub repetition_count: usize,
- /// Whether significant looping detected (> 2 repetitions)
- pub has_looping: bool,
- /// Severity level (0-3: none, mild, moderate, severe)
- pub severity: u8,
- /// List of detected repetitions
- pub repetitions: Vec,
-}
-
-/// Individual repetition instance
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct RepetitionInstance {
- /// Message indices involved in repetition
- pub message_indices: Vec,
- /// Similarity score (0.0-1.0)
- pub similarity: f64,
- /// Type of repetition
- pub repetition_type: RepetitionType,
-}
-
-/// Types of repetition
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
-pub enum RepetitionType {
- /// Exact repetition
- Exact,
- /// Near-duplicate (high similarity)
- NearDuplicate,
- /// Semantic repetition (similar meaning)
- Semantic,
-}
-
-/// Positive feedback indicators
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct PositiveFeedbackSignal {
- /// Number of positive indicators detected
- pub positive_count: usize,
- /// Whether positive feedback is present
- pub has_positive_feedback: bool,
- /// Confidence score (0.0-1.0)
- pub confidence: f64,
- /// List of detected positive indicators
- pub indicators: Vec,
-}
-
-/// Individual positive indicator
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct PositiveIndicator {
- /// Type of positive feedback
- pub indicator_type: PositiveType,
- /// Message index where detected
- pub message_index: usize,
- /// Relevant text snippet
- pub snippet: String,
-}
-
-/// Types of positive indicators
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
-pub enum PositiveType {
- /// Expression of gratitude
- Gratitude,
- /// Explicit satisfaction
- Satisfaction,
- /// Confirmation of success
- Success,
- /// Positive sentiment
- PositiveSentiment,
- /// Natural topic transition
- TopicTransition,
-}
-
-/// User escalation signal
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct EscalationSignal {
- /// Whether escalation was requested
- pub escalation_requested: bool,
- /// Number of escalation requests
- pub escalation_count: usize,
- /// List of detected escalation requests
- pub requests: Vec,
-}
-
-/// Individual escalation request
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct EscalationRequest {
- /// Message index where detected
- pub message_index: usize,
- /// Relevant text snippet
- pub snippet: String,
- /// Type of escalation
- pub escalation_type: EscalationType,
-}
-
-/// Types of escalation
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
-pub enum EscalationType {
- /// Request for human agent
- HumanAgent,
- /// Request for support
- Support,
- /// Threat to quit/leave
- ThreatToQuit,
- /// General help request
- HelpRequest,
-}
-
-// ============================================================================
-// Signal Analyzer
-// ============================================================================
-
-/// Trait for analyzing conversation signals
-pub trait SignalAnalyzer {
- /// Analyze a conversation and generate a complete signal report
- fn analyze(&self, messages: &[Message]) -> SignalReport;
-}
-
-/// Text-based implementation of signal analyzer that computes all signals from a message array
-pub struct TextBasedSignalAnalyzer {
- /// Baseline expected turns for normal interactions
- baseline_turns: usize,
- /// Threshold for character ngram similarity (0.0-1.0)
- char_ngram_threshold: f64,
- /// Threshold for token cosine similarity (0.0-1.0)
- token_cosine_threshold: f64,
- /// Maximum message length in characters (prevents unbounded computation)
- max_message_length: usize,
- /// Maximum number of messages to process (prevents unbounded computation)
- max_messages: usize,
- /// Maximum window size for repetition detection (prevents O(n²) explosion)
- max_repetition_window: usize,
-}
-
-impl TextBasedSignalAnalyzer {
- /// Extract text content from MessageContent, skipping non-text content
- fn extract_text(content: &Option) -> Option {
- match content {
- Some(hermesllm::apis::openai::MessageContent::Text(text)) => Some(text.clone()),
- // Tool calls and other structured content are skipped
- _ => None,
- }
- }
-
- /// Create a new signal analyzer with default settings
- pub fn new() -> Self {
+impl Default for SignalAnalyzerConfig {
+ fn default() -> Self {
Self {
baseline_turns: 5,
- char_ngram_threshold: 0.50, // Lowered to handle typos and small edits realistically
- token_cosine_threshold: 0.60, // Lowered for better semantic match in varied contexts
- max_message_length: 2000, // Prevent unbounded ngram generation
- max_messages: 100, // Prevent unbounded message processing
- max_repetition_window: 20, // Prevent O(n²) explosion in repetition detection
- }
- }
-
- /// Create a new signal analyzer with custom baseline
- pub fn with_baseline(baseline_turns: usize) -> Self {
- Self {
- baseline_turns,
- char_ngram_threshold: 0.50,
+ char_ngram_threshold: 0.65,
token_cosine_threshold: 0.60,
max_message_length: 2000,
max_messages: 100,
- max_repetition_window: 20,
}
}
-
- /// Create a new signal analyzer with custom settings
- ///
- /// # Arguments
- /// * `baseline_turns` - Expected baseline turns for normal interactions
- /// * `char_ngram_threshold` - Threshold for character ngram similarity (0.0-1.0)
- /// * `token_cosine_threshold` - Threshold for token cosine similarity (0.0-1.0)
- pub fn with_settings(
- baseline_turns: usize,
- char_ngram_threshold: f64,
- token_cosine_threshold: f64,
- ) -> Self {
- Self {
- baseline_turns,
- char_ngram_threshold,
- token_cosine_threshold,
- max_message_length: 2000,
- max_messages: 100,
- max_repetition_window: 20,
- }
- }
-
- /// Create a new signal analyzer with full custom settings including computation limits
- ///
- /// # Arguments
- /// * `baseline_turns` - Expected baseline turns for normal interactions
- /// * `char_ngram_threshold` - Threshold for character ngram similarity (0.0-1.0)
- /// * `token_cosine_threshold` - Threshold for token cosine similarity (0.0-1.0)
- /// * `max_message_length` - Maximum characters per message to process
- /// * `max_messages` - Maximum number of messages to process
- /// * `max_repetition_window` - Maximum messages to compare for repetition detection
- pub fn with_full_settings(
- baseline_turns: usize,
- char_ngram_threshold: f64,
- token_cosine_threshold: f64,
- max_message_length: usize,
- max_messages: usize,
- max_repetition_window: usize,
- ) -> Self {
- Self {
- baseline_turns,
- char_ngram_threshold,
- token_cosine_threshold,
- max_message_length,
- max_messages,
- max_repetition_window,
- }
- }
-
- // ========================================================================
- // Individual Signal Analyzers
- // ========================================================================
-
- /// Analyze turn count and efficiency
- fn analyze_turn_count(&self, messages: &[Message]) -> TurnCountSignal {
- let mut user_turns = 0;
- let mut assistant_turns = 0;
-
- for message in messages {
- match message.role {
- Role::User => user_turns += 1,
- Role::Assistant => assistant_turns += 1,
- _ => {}
- }
- }
-
- let total_turns = user_turns + assistant_turns;
- let is_concerning = total_turns > 7;
- let is_excessive = total_turns > 12;
-
- // Calculate efficiency score (exponential decay after baseline)
- let efficiency_score = if total_turns == 0 || total_turns <= self.baseline_turns {
- 1.0
- } else {
- let excess = total_turns - self.baseline_turns;
- 1.0 / (1.0 + (excess as f64 * 0.3))
- };
-
- TurnCountSignal {
- total_turns,
- user_turns,
- assistant_turns,
- is_concerning,
- is_excessive,
- efficiency_score,
- }
- }
-
- /// Analyze follow-up and repair frequency
- fn analyze_follow_up(
- &self,
- normalized_messages: &[(usize, Role, NormalizedMessage)],
- ) -> FollowUpSignal {
- let mut repair_count = 0;
- let mut repair_phrases = Vec::new();
- let mut user_turn_count = 0;
-
- for (i, role, norm_msg) in normalized_messages {
- if *role != Role::User {
- continue;
- }
-
- user_turn_count += 1;
-
- // Use per-turn boolean to prevent double-counting
- let mut found_in_turn = false;
-
- // Use pre-computed patterns for fast matching
- for pattern in REPAIR_PATTERNS.iter() {
- if norm_msg.matches_normalized_pattern(
- pattern,
- self.char_ngram_threshold,
- self.token_cosine_threshold,
- ) {
- repair_count += 1;
- repair_phrases.push(format!("Turn {}: '{}'", i + 1, pattern.raw));
- found_in_turn = true;
- break;
- }
- }
-
- // Only check for semantic similarity if no pattern matched
- if !found_in_turn && *i >= 2 {
- // Find previous user message
- for j in (0..*i).rev() {
- let (_, prev_role, prev_norm_msg) = &normalized_messages[j];
- if *prev_role == Role::User {
- if self.is_similar_rephrase(norm_msg, prev_norm_msg) {
- repair_count += 1;
- repair_phrases
- .push(format!("Turn {}: Similar rephrase detected", i + 1));
- }
- break;
- }
- }
- }
- }
-
- let repair_ratio = if user_turn_count == 0 {
- 0.0
- } else {
- repair_count as f64 / user_turn_count as f64
- };
-
- let is_concerning = repair_ratio > 0.3;
-
- FollowUpSignal {
- repair_count,
- repair_ratio,
- is_concerning,
- repair_phrases,
- }
- }
-
- /// Analyze user frustration indicators
- fn analyze_frustration(
- &self,
- normalized_messages: &[(usize, Role, NormalizedMessage)],
- ) -> FrustrationSignal {
- let mut indicators = Vec::new();
-
- // Profanity list - only as standalone tokens, not substrings
- let profanity_tokens = [
- "damn", "damnit", "crap", "wtf", "ffs", "bullshit", "shit", "fuck", "fucking",
- ];
-
- for (i, role, norm_msg) in normalized_messages {
- if *role != Role::User {
- continue;
- }
-
- let text = &norm_msg.raw;
-
- // Check for all caps (at least 10 chars and 80% uppercase)
- let alpha_chars: String = text.chars().filter(|c| c.is_alphabetic()).collect();
- if alpha_chars.len() >= 10 {
- let upper_count = alpha_chars.chars().filter(|c| c.is_uppercase()).count();
- let upper_ratio = upper_count as f64 / alpha_chars.len() as f64;
- if upper_ratio >= 0.8 {
- indicators.push(FrustrationIndicator {
- indicator_type: FrustrationType::AllCaps,
- message_index: *i,
- snippet: text.chars().take(50).collect(),
- });
- }
- }
-
- // Check for excessive punctuation
- let question_marks = text.matches('?').count();
- let exclamation_marks = text.matches('!').count();
- if question_marks >= 3 || exclamation_marks >= 3 {
- indicators.push(FrustrationIndicator {
- indicator_type: FrustrationType::ExcessivePunctuation,
- message_index: *i,
- snippet: text.chars().take(50).collect(),
- });
- }
-
- // Check for complaint patterns using pre-computed patterns
- for pattern in COMPLAINT_PATTERNS.iter() {
- if norm_msg.matches_normalized_pattern(
- pattern,
- self.char_ngram_threshold,
- self.token_cosine_threshold,
- ) {
- indicators.push(FrustrationIndicator {
- indicator_type: FrustrationType::DirectComplaint,
- message_index: *i,
- snippet: pattern.raw.clone(),
- });
- break;
- }
- }
-
- // Check for confusion patterns using pre-computed patterns
- for pattern in CONFUSION_PATTERNS.iter() {
- if norm_msg.matches_normalized_pattern(
- pattern,
- self.char_ngram_threshold,
- self.token_cosine_threshold,
- ) {
- indicators.push(FrustrationIndicator {
- indicator_type: FrustrationType::Confusion,
- message_index: *i,
- snippet: pattern.raw.clone(),
- });
- break;
- }
- }
-
- // Check for profanity (token-based, not substring)
- for token in &profanity_tokens {
- if norm_msg.contains_token(token) {
- indicators.push(FrustrationIndicator {
- indicator_type: FrustrationType::Profanity,
- message_index: *i,
- snippet: token.to_string(),
- });
- break;
- }
- }
- }
-
- let frustration_count = indicators.len();
- let has_frustration = frustration_count > 0;
-
- // Calculate severity
- let severity = if frustration_count == 0 {
- 0
- } else if frustration_count <= 2 {
- 1
- } else if frustration_count <= 4 {
- 2
- } else {
- 3
- };
-
- FrustrationSignal {
- frustration_count,
- has_frustration,
- severity,
- indicators,
- }
- }
-
- /// Analyze repetition and looping behavior
- fn analyze_repetition(
- &self,
- normalized_messages: &[(usize, Role, NormalizedMessage)],
- ) -> RepetitionSignal {
- let mut repetitions = Vec::new();
-
- // Collect assistant messages with normalized content
- let assistant_messages: Vec<(usize, &NormalizedMessage)> = normalized_messages
- .iter()
- .filter(|(_, role, _)| *role == Role::Assistant)
- .map(|(i, _, norm_msg)| (*i, norm_msg))
- .collect();
-
- // Limit the window size to prevent O(n²) explosion
- // Only compare messages within the max_repetition_window
- let window_size = self.max_repetition_window.min(assistant_messages.len());
-
- // Check for exact or near-duplicate responses using bigram similarity
- // Only compare within the sliding window
- for i in 0..assistant_messages.len() {
- let window_start = i + 1;
- let window_end = (i + 1 + window_size).min(assistant_messages.len());
-
- for j in window_start..window_end {
- let (idx_i, norm_msg_i) = &assistant_messages[i];
- let (idx_j, norm_msg_j) = &assistant_messages[j];
-
- // Skip if messages are too short
- if norm_msg_i.tokens.len() < 5 || norm_msg_j.tokens.len() < 5 {
- continue;
- }
-
- // Calculate bigram-based similarity (more accurate for near-duplicates)
- let similarity = self.calculate_bigram_similarity(norm_msg_i, norm_msg_j);
-
- // Exact match - lowered from 0.95 to 0.85 for bigram similarity
- if similarity >= 0.85 {
- repetitions.push(RepetitionInstance {
- message_indices: vec![*idx_i, *idx_j],
- similarity,
- repetition_type: RepetitionType::Exact,
- });
- }
- // Near duplicate - lowered from 0.75 to 0.50 to catch subtle repetitions
- else if similarity >= 0.50 {
- repetitions.push(RepetitionInstance {
- message_indices: vec![*idx_i, *idx_j],
- similarity,
- repetition_type: RepetitionType::NearDuplicate,
- });
- }
- }
- }
-
- let repetition_count = repetitions.len();
- let has_looping = repetition_count > 2;
-
- let severity = if repetition_count == 0 {
- 0
- } else if repetition_count <= 2 {
- 1
- } else if repetition_count <= 4 {
- 2
- } else {
- 3
- };
-
- RepetitionSignal {
- repetition_count,
- has_looping,
- severity,
- repetitions,
- }
- }
-
- /// Calculate bigram similarity using cached bigram sets
- fn calculate_bigram_similarity(
- &self,
- norm_msg1: &NormalizedMessage,
- norm_msg2: &NormalizedMessage,
- ) -> f64 {
- // Use pre-cached bigram sets for O(1) lookups
- let set1 = &norm_msg1.bigram_set;
- let set2 = &norm_msg2.bigram_set;
-
- if set1.is_empty() && set2.is_empty() {
- return 1.0; // Both empty = identical
- }
-
- if set1.is_empty() || set2.is_empty() {
- return 0.0;
- }
-
- let intersection = set1.intersection(set2).count();
- let union = set1.union(set2).count();
-
- if union == 0 {
- return 0.0;
- }
-
- intersection as f64 / union as f64
- }
-
- /// Analyze positive feedback indicators
- fn analyze_positive_feedback(
- &self,
- normalized_messages: &[(usize, Role, NormalizedMessage)],
- ) -> PositiveFeedbackSignal {
- let mut indicators = Vec::new();
-
- for (i, role, norm_msg) in normalized_messages {
- if *role != Role::User {
- continue;
- }
-
- // Use per-turn boolean to prevent double-counting
- let mut found_in_turn = false;
-
- // Check gratitude using pre-computed patterns
- for pattern in GRATITUDE_PATTERNS.iter() {
- if norm_msg.matches_normalized_pattern(
- pattern,
- self.char_ngram_threshold,
- self.token_cosine_threshold,
- ) {
- indicators.push(PositiveIndicator {
- indicator_type: PositiveType::Gratitude,
- message_index: *i,
- snippet: pattern.raw.clone(),
- });
- found_in_turn = true;
- break;
- }
- }
-
- if found_in_turn {
- continue;
- }
-
- // Check satisfaction using pre-computed patterns
- for pattern in SATISFACTION_PATTERNS.iter() {
- if norm_msg.matches_normalized_pattern(
- pattern,
- self.char_ngram_threshold,
- self.token_cosine_threshold,
- ) {
- indicators.push(PositiveIndicator {
- indicator_type: PositiveType::Satisfaction,
- message_index: *i,
- snippet: pattern.raw.clone(),
- });
- found_in_turn = true;
- break;
- }
- }
-
- if found_in_turn {
- continue;
- }
-
- // Check success confirmation using pre-computed patterns
- for pattern in SUCCESS_PATTERNS.iter() {
- if norm_msg.matches_normalized_pattern(
- pattern,
- self.char_ngram_threshold,
- self.token_cosine_threshold,
- ) {
- indicators.push(PositiveIndicator {
- indicator_type: PositiveType::Success,
- message_index: *i,
- snippet: pattern.raw.clone(),
- });
- break;
- }
- }
- }
-
- let positive_count = indicators.len();
- let has_positive_feedback = positive_count > 0;
-
- // Calculate confidence based on number and diversity of indicators
- let confidence = if positive_count == 0 {
- 0.0
- } else if positive_count == 1 {
- 0.6
- } else if positive_count == 2 {
- 0.8
- } else {
- 0.95
- };
-
- PositiveFeedbackSignal {
- positive_count,
- has_positive_feedback,
- confidence,
- indicators,
- }
- }
-
- /// Analyze user escalation requests
- fn analyze_escalation(
- &self,
- normalized_messages: &[(usize, Role, NormalizedMessage)],
- ) -> EscalationSignal {
- let mut requests = Vec::new();
-
- for (i, role, norm_msg) in normalized_messages {
- if *role != Role::User {
- continue;
- }
-
- let mut found_human_agent = false;
-
- // Check for human agent request using pre-computed patterns
- for pattern in HUMAN_AGENT_PATTERNS.iter() {
- if norm_msg.matches_normalized_pattern(
- pattern,
- self.char_ngram_threshold,
- self.token_cosine_threshold,
- ) {
- requests.push(EscalationRequest {
- message_index: *i,
- snippet: pattern.raw.clone(),
- escalation_type: EscalationType::HumanAgent,
- });
- found_human_agent = true;
- break;
- }
- }
-
- // Check for support request (only if no human agent request found)
- // HumanAgent and Support are too similar and often match the same phrase
- if !found_human_agent {
- for pattern in SUPPORT_PATTERNS.iter() {
- if norm_msg.matches_normalized_pattern(
- pattern,
- self.char_ngram_threshold,
- self.token_cosine_threshold,
- ) {
- requests.push(EscalationRequest {
- message_index: *i,
- snippet: pattern.raw.clone(),
- escalation_type: EscalationType::Support,
- });
- break;
- }
- }
- }
-
- // Check for quit threats (independent of HumanAgent/Support)
- // A message can contain both "give up" (quit) and "speak to human" (escalation)
- for pattern in QUIT_PATTERNS.iter() {
- if norm_msg.matches_normalized_pattern(
- pattern,
- self.char_ngram_threshold,
- self.token_cosine_threshold,
- ) {
- requests.push(EscalationRequest {
- message_index: *i,
- snippet: pattern.raw.clone(),
- escalation_type: EscalationType::ThreatToQuit,
- });
- break;
- }
- }
- }
-
- let escalation_count = requests.len();
- let escalation_requested = escalation_count > 0;
-
- EscalationSignal {
- escalation_requested,
- escalation_count,
- requests,
- }
- }
-
- // ========================================================================
- // Helper Methods
- // ========================================================================
-
- /// Check if two messages are similar rephrases
- fn is_similar_rephrase(
- &self,
- norm_msg1: &NormalizedMessage,
- norm_msg2: &NormalizedMessage,
- ) -> bool {
- // Skip if too short
- if norm_msg1.tokens.len() < 3 || norm_msg2.tokens.len() < 3 {
- return false;
- }
-
- // Common stopwords to downweight
- let stopwords: HashSet<&str> = [
- "i", "me", "my", "you", "the", "a", "an", "is", "are", "was", "were", "to", "with",
- "for", "of", "at", "by", "in", "on", "it", "this", "that", "can", "could", "do",
- "does", "did", "will", "would", "should", "be",
- ]
- .iter()
- .cloned()
- .collect();
-
- // Filter out stopwords for meaningful overlap
- let tokens1: HashSet<_> = norm_msg1
- .tokens
- .iter()
- .filter(|t| !stopwords.contains(t.as_str()))
- .collect();
- let tokens2: HashSet<_> = norm_msg2
- .tokens
- .iter()
- .filter(|t| !stopwords.contains(t.as_str()))
- .collect();
-
- // Need at least 2 non-stopword tokens
- if tokens1.len() < 2 || tokens2.len() < 2 {
- return false;
- }
-
- let intersection = tokens1.intersection(&tokens2).count();
- let min_size = tokens1.len().min(tokens2.len());
-
- // High overlap suggests rephrase
- let overlap_ratio = intersection as f64 / min_size as f64;
- overlap_ratio >= 0.6
- }
-
- /// Assess overall interaction quality based on all signals
- fn assess_overall_quality(
- &self,
- turn_count: &TurnCountSignal,
- follow_up: &FollowUpSignal,
- frustration: &FrustrationSignal,
- repetition: &RepetitionSignal,
- positive: &PositiveFeedbackSignal,
- escalation: &EscalationSignal,
- ) -> InteractionQuality {
- // Critical conditions - immediate fail
- if escalation.escalation_requested
- || frustration.severity >= 3
- || repetition.severity >= 3
- || turn_count.is_excessive
- {
- return InteractionQuality::Severe;
- }
-
- // Calculate quality score
- let mut score = 50.0; // Start at neutral
-
- // Positive factors
- if positive.has_positive_feedback {
- score += 20.0 * positive.confidence;
- }
- score += turn_count.efficiency_score * 10.0;
-
- // Negative factors
- if frustration.has_frustration {
- score -= frustration.severity as f64 * 10.00;
- }
- if follow_up.is_concerning {
- score -= 15.0;
- }
- if repetition.has_looping {
- score -= repetition.severity as f64 * 8.0;
- }
- if turn_count.is_concerning {
- score -= 10.0;
- }
-
- // Map score to quality level
- if score >= 75.0 {
- InteractionQuality::Excellent
- } else if score >= 60.0 {
- InteractionQuality::Good
- } else if score >= 40.0 {
- InteractionQuality::Neutral
- } else if score >= 25.0 {
- InteractionQuality::Poor
- } else {
- InteractionQuality::Severe
- }
- }
-
- /// Generate human-readable summary
- #[allow(clippy::too_many_arguments)]
- fn generate_summary(
- &self,
- turn_count: &TurnCountSignal,
- follow_up: &FollowUpSignal,
- frustration: &FrustrationSignal,
- repetition: &RepetitionSignal,
- positive: &PositiveFeedbackSignal,
- escalation: &EscalationSignal,
- quality: &InteractionQuality,
- ) -> String {
- let mut summary_parts = Vec::new();
-
- summary_parts.push(format!("Overall Quality: {:?}", quality));
-
- summary_parts.push(format!(
- "Turn Count: {} turns (efficiency: {:.1}%)",
- turn_count.total_turns,
- turn_count.efficiency_score * 100.0
- ));
-
- if follow_up.is_concerning {
- summary_parts.push(format!(
- "⚠️ High repair rate: {:.1}% of user turns",
- follow_up.repair_ratio * 100.0
- ));
- }
-
- if frustration.has_frustration {
- summary_parts.push(format!(
- "⚠️ Frustration detected: {} indicators (severity: {})",
- frustration.frustration_count, frustration.severity
- ));
- }
-
- if repetition.has_looping {
- summary_parts.push(format!(
- "⚠️ Looping detected: {} repetitions",
- repetition.repetition_count
- ));
- }
-
- if positive.has_positive_feedback {
- summary_parts.push(format!(
- "✓ Positive feedback: {} indicators",
- positive.positive_count
- ));
- }
-
- if escalation.escalation_requested {
- summary_parts.push(format!(
- "⚠️ Escalation requested: {} requests",
- escalation.escalation_count
- ));
- }
-
- summary_parts.join(" | ")
- }
}
-impl SignalAnalyzer for TextBasedSignalAnalyzer {
- fn analyze(&self, messages: &[Message]) -> SignalReport {
- // Limit the number of messages to process (take most recent messages)
- let messages_to_process = if messages.len() > self.max_messages {
- &messages[messages.len() - self.max_messages..]
+/// Top-level analyzer.
+pub struct SignalAnalyzer {
+ cfg: SignalAnalyzerConfig,
+}
+
+impl Default for SignalAnalyzer {
+ fn default() -> Self {
+ Self::new(SignalAnalyzerConfig::default())
+ }
+}
+
+impl SignalAnalyzer {
+ pub fn new(cfg: SignalAnalyzerConfig) -> Self {
+ Self { cfg }
+ }
+
+ /// Run the full multi-layer analysis on a ShareGPT-shaped conversation.
+ pub fn analyze_sharegpt(&self, messages: &[ShareGptMessage<'_>]) -> SignalReport {
+ // Truncate to the last `max_messages` (last-N is what the Python does).
+ let slice: &[ShareGptMessage<'_>] = if messages.len() > self.cfg.max_messages {
+ &messages[messages.len() - self.cfg.max_messages..]
} else {
messages
};
+ let offset = messages.len().saturating_sub(slice.len());
- // Preprocess all messages once, filtering out non-text content (tool calls, etc.)
- // and truncating long messages
- let normalized_messages: Vec<(usize, Role, NormalizedMessage)> = messages_to_process
+ // Preprocess to absolute-indexed normalized human/gpt messages.
+ let normalized_owned: Vec<(usize, &str, NormalizedMessage)> = slice
.iter()
.enumerate()
- .filter_map(|(i, msg)| {
- Self::extract_text(&msg.content).map(|text| {
- (
- i,
- msg.role.clone(),
- NormalizedMessage::from_text_with_limit(&text, self.max_message_length),
- )
- })
+ .filter_map(|(i, m)| {
+ if (m.from == "human" || m.from == "gpt") && !m.value.is_empty() {
+ Some((
+ offset + i,
+ m.from,
+ NormalizedMessage::from_text(m.value, self.cfg.max_message_length),
+ ))
+ } else {
+ None
+ }
})
.collect();
- let turn_count = self.analyze_turn_count(messages_to_process);
- let follow_up = self.analyze_follow_up(&normalized_messages);
- let frustration = self.analyze_frustration(&normalized_messages);
- let repetition = self.analyze_repetition(&normalized_messages);
- let positive_feedback = self.analyze_positive_feedback(&normalized_messages);
- let escalation = self.analyze_escalation(&normalized_messages);
-
- let overall_quality = self.assess_overall_quality(
- &turn_count,
- &follow_up,
- &frustration,
- &repetition,
- &positive_feedback,
- &escalation,
+ let misalignment = analyze_misalignment(
+ &normalized_owned,
+ self.cfg.char_ngram_threshold,
+ self.cfg.token_cosine_threshold,
);
- let summary = self.generate_summary(
- &turn_count,
- &follow_up,
- &frustration,
- &repetition,
- &positive_feedback,
- &escalation,
- &overall_quality,
+ let stagnation_input: Vec> =
+ slice.iter().map(|m| ShareGptMsg { from: m.from }).collect();
+ let (mut stagnation, turn_metrics) = analyze_stagnation(
+ &stagnation_input,
+ &normalized_owned,
+ self.cfg.baseline_turns,
+ );
+
+ let disengagement = analyze_disengagement(
+ &normalized_owned,
+ self.cfg.char_ngram_threshold,
+ self.cfg.token_cosine_threshold,
+ );
+
+ let satisfaction = analyze_satisfaction(
+ &normalized_owned,
+ self.cfg.char_ngram_threshold,
+ self.cfg.token_cosine_threshold,
+ );
+
+ let failure = analyze_failure(slice);
+ let loops = analyze_loops(slice);
+ let exhaustion = analyze_exhaustion(slice);
+
+ // Bias the dragging signal's message_index back into absolute coords.
+ for s in &mut stagnation.signals {
+ s.message_index = offset + s.message_index.min(slice.len().saturating_sub(1));
+ }
+
+ let interaction = InteractionSignals {
+ misalignment,
+ stagnation,
+ disengagement,
+ satisfaction,
+ };
+ let execution = ExecutionSignals { failure, loops };
+ let environment = EnvironmentSignals { exhaustion };
+
+ let (overall_quality, score) = assess_quality(
+ &interaction,
+ &execution,
+ &environment,
+ turn_metrics.user_turns,
+ );
+ let summary = generate_summary(
+ &turn_metrics,
+ &interaction,
+ &execution,
+ &environment,
+ overall_quality,
);
SignalReport {
- turn_count,
- follow_up,
- frustration,
- repetition,
- positive_feedback,
- escalation,
+ interaction,
+ execution,
+ environment,
overall_quality,
+ quality_score: score,
+ turn_metrics,
summary,
}
}
-}
-impl Default for TextBasedSignalAnalyzer {
- fn default() -> Self {
- Self::new()
+ /// Convenience entry point: convert OpenAI-shaped chat `Message`s into the
+ /// ShareGPT format the detectors operate on, then run analysis.
+ pub fn analyze_openai(&self, messages: &[Message]) -> SignalReport {
+ let owned = messages_to_sharegpt(messages);
+ let view: Vec> = owned
+ .iter()
+ .map(|(role, value)| ShareGptMessage {
+ from: role.as_str(),
+ value: value.as_str(),
+ })
+ .collect();
+ self.analyze_sharegpt(&view)
}
}
-// ============================================================================
-// Tests
-// ============================================================================
+/// Convert OpenAI-shaped messages to a sequence of ShareGPT
+/// `(role, value)` pairs.
+///
+/// Mapping (preserves original message order; tool calls are emitted as a
+/// separate `function_call` row immediately after the assistant text):
+///
+/// - `User` -> `("human", text)`
+/// - `Assistant` -> `("gpt", text)`, then one `("function_call", json)` per tool call
+/// - `Tool` -> `("observation", text)`
+/// - `System` / `Developer` -> dropped (not analyzed)
+pub fn messages_to_sharegpt(messages: &[Message]) -> Vec<(String, String)> {
+ let mut out: Vec<(String, String)> = Vec::with_capacity(messages.len());
+ for m in messages {
+ match m.role {
+ Role::User => {
+ let text = m.content.extract_text();
+ out.push(("human".to_string(), text));
+ }
+ Role::Assistant => {
+ let text = m.content.extract_text();
+ if !text.is_empty() {
+ out.push(("gpt".to_string(), text));
+ }
+ if let Some(calls) = &m.tool_calls {
+ for call in calls {
+ let payload = serde_json::json!({
+ "name": call.function.name,
+ "arguments": call.function.arguments,
+ });
+ out.push(("function_call".to_string(), payload.to_string()));
+ }
+ }
+ }
+ Role::Tool => {
+ let text = m.content.extract_text();
+ out.push(("observation".to_string(), text));
+ }
+ Role::System | Role::Developer => {}
+ }
+ }
+ out
+}
+
+// ---------------------------------------------------------------------------
+// Quality scoring (mirrors `_assess_quality` in the reference)
+// ---------------------------------------------------------------------------
+
+fn assess_quality(
+ interaction: &InteractionSignals,
+ execution: &ExecutionSignals,
+ environment: &EnvironmentSignals,
+ user_turns: usize,
+) -> (InteractionQuality, f32) {
+ // Critical: explicit escalation/quit OR severe disengagement OR severe stagnation.
+ let has_escalation_or_quit = interaction.disengagement.signals.iter().any(|s| {
+ matches!(
+ s.signal_type,
+ SignalType::DisengagementEscalation | SignalType::DisengagementQuit
+ )
+ });
+ if (interaction.disengagement.count > 0 && has_escalation_or_quit)
+ || interaction.disengagement.severity >= 3
+ || interaction.stagnation.severity >= 3
+ {
+ return (InteractionQuality::Severe, 0.0);
+ }
+
+ let mut score: f32 = 50.0;
+
+ if interaction.satisfaction.count > 0 {
+ let confidence = match interaction.satisfaction.count {
+ 1 => 0.6,
+ 2 => 0.8,
+ _ => 0.95,
+ };
+ score += 20.0 * confidence;
+ }
+
+ if interaction.disengagement.count > 0 {
+ score -= interaction.disengagement.severity as f32 * 10.0;
+ }
+ if interaction.misalignment.severity > 0 && interaction.misalignment_ratio(user_turns) > 0.3 {
+ score -= 15.0;
+ }
+ if interaction.stagnation.count > 2 {
+ score -= interaction.stagnation.severity as f32 * 8.0;
+ }
+
+ if execution.failure.count > 0 {
+ score -= execution.failure.count as f32 * 8.0;
+ }
+ if execution.loops.count > 0 {
+ score -= execution.loops.count as f32 * 5.0;
+ }
+ if environment.exhaustion.count > 0 {
+ score -= environment.exhaustion.count as f32 * 3.0;
+ }
+
+ score = score.clamp(0.0, 100.0);
+
+ let quality = if score >= 75.0 {
+ InteractionQuality::Excellent
+ } else if score >= 60.0 {
+ InteractionQuality::Good
+ } else if score >= 40.0 {
+ InteractionQuality::Neutral
+ } else if score >= 25.0 {
+ InteractionQuality::Poor
+ } else {
+ InteractionQuality::Severe
+ };
+ (quality, score)
+}
+
+/// Render the per-conversation summary string.
+///
+/// Output is structurally grouped by the paper taxonomy so a reader can see
+/// at a glance which layer fired:
+///
+/// ```text
+/// Overall Quality: severe | Turns: 7 (efficiency: 71.4%)
+/// | Interaction — misalignment: 2 (sev 1), stagnation: 0, disengagement: 2 (sev 1), satisfaction: 0
+/// | Execution — failure: 0, loops: 0
+/// | Environment — exhaustion: 0
+/// | High misalignment rate: 50.0% of user turns
+/// | Escalation requested: 1
+/// ```
+///
+/// Layer headers are always present (even when their counts are all zero) so
+/// the taxonomy is visible by inspection. Quality-driving callouts —
+/// "high misalignment rate", "looping detected", "escalation requested" —
+/// are appended after the layer summary as a separate "alerts" tail.
+fn generate_summary(
+ turn_metrics: &TurnMetrics,
+ interaction: &InteractionSignals,
+ execution: &ExecutionSignals,
+ environment: &EnvironmentSignals,
+ quality: InteractionQuality,
+) -> String {
+ let mut parts: Vec = Vec::new();
+ parts.push(format!("Overall Quality: {}", quality.as_str()));
+ parts.push(format!(
+ "Turns: {} (efficiency: {:.1}%)",
+ turn_metrics.total_turns,
+ turn_metrics.efficiency_score * 100.0
+ ));
+
+ parts.push(format!(
+ "Interaction \u{2014} {}, {}, {}, {}",
+ fmt_group("misalignment", &interaction.misalignment),
+ fmt_group("stagnation", &interaction.stagnation),
+ fmt_group("disengagement", &interaction.disengagement),
+ fmt_group("satisfaction", &interaction.satisfaction),
+ ));
+ parts.push(format!(
+ "Execution \u{2014} {}, {}",
+ fmt_group("failure", &execution.failure),
+ fmt_group("loops", &execution.loops),
+ ));
+ parts.push(format!(
+ "Environment \u{2014} {}",
+ fmt_group("exhaustion", &environment.exhaustion),
+ ));
+
+ if interaction.misalignment.count > 0 {
+ let misalignment_ratio = interaction.misalignment_ratio(turn_metrics.user_turns);
+ if misalignment_ratio > 0.3 {
+ parts.push(format!(
+ "High misalignment rate: {:.1}% of user turns",
+ misalignment_ratio * 100.0
+ ));
+ }
+ }
+ if interaction.stagnation.count > 2 {
+ parts.push(format!(
+ "Looping detected: {} repetitions",
+ interaction.stagnation.count
+ ));
+ }
+ let escalation_count = interaction
+ .disengagement
+ .signals
+ .iter()
+ .filter(|s| matches!(s.signal_type, SignalType::DisengagementEscalation))
+ .count();
+ if escalation_count > 0 {
+ parts.push(format!("Escalation requested: {}", escalation_count));
+ }
+
+ parts.join(" | ")
+}
+
+/// Render `": (sev )"`, dropping the severity suffix
+/// when the count is zero (keeps the summary readable for clean conversations).
+fn fmt_group(name: &str, group: &super::schemas::SignalGroup) -> String {
+ if group.count == 0 {
+ format!("{}: 0", name)
+ } else {
+ format!("{}: {} (sev {})", name, group.count, group.severity)
+ }
+}
#[cfg(test)]
mod tests {
use super::*;
- use hermesllm::apis::openai::MessageContent;
- use hermesllm::transforms::lib::ExtractText;
- use std::time::Instant;
+ use hermesllm::apis::openai::{Message, MessageContent, Role};
+ #[allow(unused_imports)]
+ use hermesllm::transforms::ExtractText;
- fn create_message(role: Role, content: &str) -> Message {
+ fn user(t: &str) -> Message {
Message {
- role,
- content: Some(MessageContent::Text(content.to_string())),
+ role: Role::User,
+ content: Some(MessageContent::Text(t.to_string())),
+ name: None,
+ tool_calls: None,
+ tool_call_id: None,
+ }
+ }
+ fn assistant(t: &str) -> Message {
+ Message {
+ role: Role::Assistant,
+ content: Some(MessageContent::Text(t.to_string())),
name: None,
tool_calls: None,
tool_call_id: None,
}
}
- // ========================================================================
- // Tests for New Similarity Methods
- // ========================================================================
-
#[test]
- fn test_char_ngram_similarity_exact_match() {
- let msg = NormalizedMessage::from_text("thank you very much");
- let similarity = msg.char_ngram_similarity("thank you very much");
- assert!(
- similarity > 0.95,
- "Exact match should have very high similarity"
- );
+ fn report_quality_neutral_for_short_clean_chat() {
+ let msgs = vec![
+ user("Hello, can you help me with a question?"),
+ assistant("Of course, what's your question?"),
+ user("How does X work?"),
+ assistant("X works by ..."),
+ ];
+ let r = SignalAnalyzer::default().analyze_openai(&msgs);
+ assert!(matches!(
+ r.overall_quality,
+ InteractionQuality::Neutral | InteractionQuality::Good | InteractionQuality::Excellent
+ ));
+ assert!(r.summary.starts_with("Overall Quality:"));
}
#[test]
- fn test_char_ngram_similarity_typo() {
- let msg = NormalizedMessage::from_text("thank you very much");
- // Common typo: "thnks" instead of "thanks"
- let similarity = msg.char_ngram_similarity("thnks you very much");
- assert!(
- similarity > 0.50,
- "Should handle single-character typo with decent similarity: {}",
- similarity
- );
- }
-
- #[test]
- fn test_char_ngram_similarity_small_edit() {
- let msg = NormalizedMessage::from_text("this doesn't work");
- let similarity = msg.char_ngram_similarity("this doesnt work");
- assert!(
- similarity > 0.70,
- "Should handle punctuation removal gracefully: {}",
- similarity
- );
- }
-
- #[test]
- fn test_char_ngram_similarity_word_insertion() {
- let msg = NormalizedMessage::from_text("i don't understand");
- let similarity = msg.char_ngram_similarity("i really don't understand");
- assert!(
- similarity > 0.40,
- "Should be robust to word insertions: {}",
- similarity
- );
- }
-
- #[test]
- fn test_token_cosine_similarity_exact_match() {
- let msg = NormalizedMessage::from_text("this is not helpful");
- let similarity = msg.token_cosine_similarity("this is not helpful");
- assert!(
- (similarity - 1.0).abs() < 0.01,
- "Exact match should have cosine similarity of 1.0"
- );
- }
-
- #[test]
- fn test_token_cosine_similarity_word_order() {
- let msg = NormalizedMessage::from_text("not helpful at all");
- let similarity = msg.token_cosine_similarity("helpful not at all");
- assert!(
- similarity > 0.95,
- "Should be robust to word order changes: {}",
- similarity
- );
- }
-
- #[test]
- fn test_token_cosine_similarity_frequency() {
- let msg = NormalizedMessage::from_text("help help help please");
- let similarity = msg.token_cosine_similarity("help please");
- assert!(
- similarity > 0.7 && similarity < 1.0,
- "Should account for frequency differences: {}",
- similarity
- );
- }
-
- #[test]
- fn test_token_cosine_similarity_long_message_with_context() {
- let msg = NormalizedMessage::from_text(
- "I've been trying to set up my account for the past hour \
- and the verification email never arrived. I checked my spam folder \
- and still nothing. This is really frustrating and not helpful at all.",
- );
- let similarity = msg.token_cosine_similarity("not helpful");
- assert!(
- similarity > 0.15 && similarity < 0.7,
- "Should detect pattern in long message with lower but non-zero similarity: {}",
- similarity
- );
- }
-
- #[test]
- fn test_layered_matching_exact_hit() {
- let msg = NormalizedMessage::from_text("thank you so much");
- assert!(
- msg.layered_contains_phrase("thank you", 0.50, 0.60),
- "Should match exact phrase in Layer 0"
- );
- }
-
- #[test]
- fn test_layered_matching_typo_hit() {
- // Test that shows layered matching is more robust than exact matching alone
- let msg = NormalizedMessage::from_text("it doesnt work for me");
-
- // "doesnt work" should match "doesn't work" via character ngrams (high overlap)
- assert!(
- msg.layered_contains_phrase("doesn't work", 0.50, 0.60),
- "Should match 'doesnt work' to 'doesn't work' via character ngrams"
- );
- }
-
- #[test]
- fn test_layered_matching_word_order_hit() {
- let msg = NormalizedMessage::from_text("helpful not very");
- assert!(
- msg.layered_contains_phrase("not helpful", 0.50, 0.60),
- "Should match reordered words via token cosine in Layer 2"
- );
- }
-
- #[test]
- fn test_layered_matching_long_message_with_pattern() {
- let msg = NormalizedMessage::from_text(
- "I've tried everything and followed all the instructions \
- but this is not helpful at all and I'm getting frustrated",
- );
- assert!(
- msg.layered_contains_phrase("not helpful", 0.50, 0.60),
- "Should detect pattern buried in long message"
- );
- }
-
- #[test]
- fn test_layered_matching_no_match() {
- let msg = NormalizedMessage::from_text("everything is working perfectly");
- assert!(
- !msg.layered_contains_phrase("not helpful", 0.50, 0.60),
- "Should not match completely different content"
- );
- }
-
- #[test]
- fn test_char_ngram_vs_token_cosine_tradeoffs() {
- // Character ngrams handle character-level changes well
- let msg1 = NormalizedMessage::from_text("this doesnt work");
- let char_sim1 = msg1.char_ngram_similarity("this doesn't work");
- assert!(
- char_sim1 > 0.70,
- "Character ngrams should handle punctuation: {}",
- char_sim1
- );
-
- // Token cosine is better for word order and long messages with semantic overlap
- let msg2 =
- NormalizedMessage::from_text("I really appreciate all your help with this issue today");
- let token_sim2 = msg2.token_cosine_similarity("thank you for help");
- assert!(
- token_sim2 > 0.15,
- "Token cosine should detect semantic overlap: {}",
- token_sim2
- );
- }
-
- // ========================================================================
- // Existing Tests
- // ========================================================================
-
- fn preprocess_messages(messages: &[Message]) -> Vec<(usize, Role, NormalizedMessage)> {
- messages
+ fn report_severe_when_user_escalates() {
+ let msgs = vec![
+ user("This isn't helpful at all"),
+ assistant("I'm sorry, can you tell me more?"),
+ user("Get me a human, this is useless"),
+ ];
+ let r = SignalAnalyzer::default().analyze_openai(&msgs);
+ assert_eq!(r.overall_quality, InteractionQuality::Severe);
+ assert!(r
+ .interaction
+ .disengagement
+ .signals
.iter()
- .enumerate()
- .map(|(i, msg)| {
- let text = msg.content.extract_text();
- (i, msg.role.clone(), NormalizedMessage::from_text(&text))
- })
- .collect()
+ .any(|s| matches!(s.signal_type, SignalType::DisengagementEscalation)));
}
#[test]
- fn test_turn_count_efficient() {
- let start = Instant::now();
- let analyzer = TextBasedSignalAnalyzer::new();
- let messages = vec![
- create_message(Role::User, "Hello"),
- create_message(Role::Assistant, "Hi! How can I help?"),
- create_message(Role::User, "Thanks!"),
+ fn report_excellent_when_user_satisfied() {
+ let msgs = vec![
+ user("Can you summarize this report?"),
+ assistant("Here's a summary: ..."),
+ user("That's perfect, exactly what I needed, you're awesome!"),
];
-
- let signal = analyzer.analyze_turn_count(&messages);
- assert_eq!(signal.total_turns, 3);
- assert_eq!(signal.user_turns, 2);
- assert_eq!(signal.assistant_turns, 1);
- assert!(!signal.is_concerning);
- assert!(!signal.is_excessive);
- assert!(signal.efficiency_score > 0.9);
- println!("test_turn_count_efficient took: {:?}", start.elapsed());
+ let r = SignalAnalyzer::default().analyze_openai(&msgs);
+ assert!(r.interaction.satisfaction.count > 0);
+ assert!(matches!(
+ r.overall_quality,
+ InteractionQuality::Good | InteractionQuality::Excellent
+ ));
}
#[test]
- fn test_turn_count_excessive() {
- let start = Instant::now();
- let analyzer = TextBasedSignalAnalyzer::new();
- let mut messages = Vec::new();
- for i in 0..15 {
- messages.push(create_message(
- if i % 2 == 0 {
- Role::User
- } else {
- Role::Assistant
- },
- &format!("Message {}", i),
- ));
- }
-
- let signal = analyzer.analyze_turn_count(&messages);
- assert_eq!(signal.total_turns, 15);
- assert!(signal.is_concerning);
- assert!(signal.is_excessive);
- assert!(signal.efficiency_score < 0.5);
- println!("test_turn_count_excessive took: {:?}", start.elapsed());
- }
-
- #[test]
- fn test_follow_up_detection() {
- let start = Instant::now();
- let analyzer = TextBasedSignalAnalyzer::new();
- let messages = vec![
- create_message(Role::User, "Show me restaurants"),
- create_message(Role::Assistant, "Here are some options"),
- create_message(Role::User, "No, I meant Italian restaurants"),
- create_message(Role::Assistant, "Here are Italian restaurants"),
+ fn repro_gratitude_does_not_trigger_misalignment() {
+ let msgs = vec![
+ user("What is the weather in Istanbul?"),
+ assistant("Istanbul is 14C and partly cloudy."),
+ user("That worked, exactly what I needed. Thanks, that is perfect!"),
];
-
- let normalized_messages = preprocess_messages(&messages);
- let signal = analyzer.analyze_follow_up(&normalized_messages);
- assert_eq!(signal.repair_count, 1);
- assert!(signal.repair_ratio > 0.0);
- println!("test_follow_up_detection took: {:?}", start.elapsed());
- }
-
- #[test]
- fn test_frustration_detection() {
- let start = Instant::now();
- let analyzer = TextBasedSignalAnalyzer::new();
- let messages = vec![
- create_message(Role::User, "THIS IS RIDICULOUS!!!"),
- create_message(Role::Assistant, "I apologize for the frustration"),
- create_message(Role::User, "This doesn't work at all"),
- ];
-
- let normalized_messages = preprocess_messages(&messages);
- let signal = analyzer.analyze_frustration(&normalized_messages);
- assert!(signal.has_frustration);
- assert!(signal.frustration_count >= 2);
- assert!(signal.severity > 0);
- println!("test_frustration_detection took: {:?}", start.elapsed());
- }
-
- #[test]
- fn test_positive_feedback_detection() {
- let start = Instant::now();
- let analyzer = TextBasedSignalAnalyzer::new();
- let messages = vec![
- create_message(Role::User, "Can you help me?"),
- create_message(Role::Assistant, "Sure!"),
- create_message(Role::User, "Thank you! That's exactly what I needed."),
- ];
-
- let normalized_messages = preprocess_messages(&messages);
- let signal = analyzer.analyze_positive_feedback(&normalized_messages);
- assert!(signal.has_positive_feedback);
- assert!(signal.positive_count >= 1);
- assert!(signal.confidence > 0.5);
- println!(
- "test_positive_feedback_detection took: {:?}",
- start.elapsed()
- );
- }
-
- #[test]
- fn test_escalation_detection() {
- let start = Instant::now();
- let analyzer = TextBasedSignalAnalyzer::new();
- let messages = vec![
- create_message(Role::User, "This isn't working"),
- create_message(Role::Assistant, "Let me help"),
- create_message(Role::User, "I need to speak to a human agent"),
- ];
-
- let normalized_messages = preprocess_messages(&messages);
- let signal = analyzer.analyze_escalation(&normalized_messages);
- assert!(signal.escalation_requested);
- assert_eq!(signal.escalation_count, 1);
- println!("test_escalation_detection took: {:?}", start.elapsed());
- }
-
- #[test]
- fn test_repetition_detection() {
- let start = Instant::now();
- let analyzer = TextBasedSignalAnalyzer::new();
- let messages = vec![
- create_message(Role::User, "What's the weather?"),
- create_message(
- Role::Assistant,
- "I can help you with the weather information",
- ),
- create_message(Role::User, "Show me the forecast"),
- create_message(Role::Assistant, "Sure, I can help you with the forecast"),
- create_message(Role::User, "Stop repeating yourself"),
- ];
-
- let normalized_messages = preprocess_messages(&messages);
- let signal = analyzer.analyze_repetition(&normalized_messages);
-
- for rep in &signal.repetitions {
- println!(
- " - Messages {:?}, similarity: {:.3}, type: {:?}",
- rep.message_indices, rep.similarity, rep.repetition_type
+ let r = SignalAnalyzer::default().analyze_openai(&msgs);
+ for s in &r.interaction.misalignment.signals {
+ eprintln!(
+ "misalignment fired: type={:?} idx={} snippet={:?} meta={:?}",
+ s.signal_type, s.message_index, s.snippet, s.metadata
);
}
-
- assert!(signal.repetition_count > 0,
- "Should detect the subtle repetition between 'I can help you with the weather information' \
- and 'Sure, I can help you with the forecast'");
- println!("test_repetition_detection took: {:?}", start.elapsed());
- }
-
- #[test]
- fn test_full_analysis_excellent() {
- let start = Instant::now();
- let analyzer = TextBasedSignalAnalyzer::new();
- let messages = vec![
- create_message(Role::User, "I need to book a flight"),
- create_message(Role::Assistant, "Sure! Where would you like to go?"),
- create_message(Role::User, "New York"),
- create_message(Role::Assistant, "Great! I found several options."),
- create_message(Role::User, "Perfect!"),
- ];
-
- let report = analyzer.analyze(&messages);
- assert!(matches!(
- report.overall_quality,
- InteractionQuality::Excellent | InteractionQuality::Good
- ));
- assert!(report.positive_feedback.has_positive_feedback);
- assert!(!report.frustration.has_frustration);
- println!("test_full_analysis_excellent took: {:?}", start.elapsed());
- }
-
- #[test]
- fn test_full_analysis_poor() {
- let start = Instant::now();
- let analyzer = TextBasedSignalAnalyzer::new();
- let messages = vec![
- create_message(Role::User, "Help me"),
- create_message(Role::Assistant, "How can I assist?"),
- create_message(Role::User, "No, I meant something else"),
- create_message(Role::Assistant, "What do you need?"),
- create_message(Role::User, "THIS DOESN'T WORK!!!"),
- create_message(Role::Assistant, "I apologize"),
- create_message(Role::User, "Let me speak to a human"),
- ];
-
- let report = analyzer.analyze(&messages);
- assert!(matches!(
- report.overall_quality,
- InteractionQuality::Poor | InteractionQuality::Severe
- ));
- assert!(report.frustration.has_frustration);
- assert!(report.escalation.escalation_requested);
- println!("test_full_analysis_poor took: {:?}", start.elapsed());
- }
-
- #[test]
- fn test_fuzzy_matching_gratitude() {
- let start = Instant::now();
- let analyzer = TextBasedSignalAnalyzer::new();
- let messages = vec![
- create_message(Role::User, "Can you help me?"),
- create_message(Role::Assistant, "Sure!"),
- create_message(Role::User, "thnaks! that's exactly what i needed."),
- ];
-
- let normalized_messages = preprocess_messages(&messages);
- let signal = analyzer.analyze_positive_feedback(&normalized_messages);
- assert!(signal.has_positive_feedback);
- assert!(signal.positive_count >= 1);
- println!("test_fuzzy_matching_gratitude took: {:?}", start.elapsed());
- }
-
- #[test]
- fn test_fuzzy_matching_escalation() {
- let start = Instant::now();
- let analyzer = TextBasedSignalAnalyzer::new();
- let messages = vec![
- create_message(Role::User, "This isn't working"),
- create_message(Role::Assistant, "Let me help"),
- create_message(Role::User, "i need to speek to a human agnet"),
- ];
-
- let normalized_messages = preprocess_messages(&messages);
- let signal = analyzer.analyze_escalation(&normalized_messages);
- assert!(signal.escalation_requested);
- assert_eq!(signal.escalation_count, 1);
- println!("test_fuzzy_matching_escalation took: {:?}", start.elapsed());
- }
-
- #[test]
- fn test_fuzzy_matching_repair() {
- let start = Instant::now();
- let analyzer = TextBasedSignalAnalyzer::new();
- let messages = vec![
- create_message(Role::User, "Show me restaurants"),
- create_message(Role::Assistant, "Here are some options"),
- create_message(Role::User, "no i ment Italian restaurants"),
- create_message(Role::Assistant, "Here are Italian restaurants"),
- ];
-
- let normalized_messages = preprocess_messages(&messages);
- let signal = analyzer.analyze_follow_up(&normalized_messages);
- assert!(signal.repair_count >= 1);
- println!("test_fuzzy_matching_repair took: {:?}", start.elapsed());
- }
-
- #[test]
- fn test_fuzzy_matching_complaint() {
- let start = Instant::now();
- let analyzer = TextBasedSignalAnalyzer::new();
- // Use a complaint that should match - "doesnt work" is close enough to "doesn't work"
- let messages = vec![
- create_message(Role::User, "this doesnt work at all"), // Common typo: missing apostrophe
- create_message(Role::Assistant, "I apologize"),
- ];
-
- let normalized_messages = preprocess_messages(&messages);
- let signal = analyzer.analyze_frustration(&normalized_messages);
-
- // The layered matching should catch this via character ngrams or token cosine
- // "doesnt work" has high character-level similarity to "doesn't work"
- assert!(
- signal.has_frustration,
- "Should detect frustration from complaint pattern"
- );
- assert!(signal.frustration_count >= 1);
- println!("test_fuzzy_matching_complaint took: {:?}", start.elapsed());
- }
-
- #[test]
- fn test_exact_match_priority() {
- let start = Instant::now();
- let analyzer = TextBasedSignalAnalyzer::new();
- let messages = vec![create_message(Role::User, "thank you so much")];
-
- let normalized_messages = preprocess_messages(&messages);
- let signal = analyzer.analyze_positive_feedback(&normalized_messages);
- assert!(signal.has_positive_feedback);
- // Should detect exact match, not fuzzy
- assert!(signal.indicators[0].snippet.contains("thank you"));
- assert!(!signal.indicators[0].snippet.contains("fuzzy"));
- println!("test_exact_match_priority took: {:?}", start.elapsed());
- }
-
- // ========================================================================
- // Anti-Tests: Verify fixes stay fixed
- // ========================================================================
-
- #[test]
- fn test_hello_not_profanity() {
- let analyzer = TextBasedSignalAnalyzer::new();
- let messages = vec![create_message(Role::User, "hello there")];
-
- let normalized_messages = preprocess_messages(&messages);
- let signal = analyzer.analyze_frustration(&normalized_messages);
- assert!(
- !signal.has_frustration,
- "\"hello\" should not trigger profanity detection"
- );
- }
-
- #[test]
- fn test_prepare_not_escalation() {
- let analyzer = TextBasedSignalAnalyzer::new();
- let messages = vec![create_message(
- Role::User,
- "Can you help me prepare for the meeting?",
- )];
-
- let normalized_messages = preprocess_messages(&messages);
- let signal = analyzer.analyze_escalation(&normalized_messages);
- assert!(
- !signal.escalation_requested,
- "\"prepare\" should not trigger escalation (rep pattern removed)"
- );
- }
-
- #[test]
- fn test_unicode_apostrophe_confusion() {
- let analyzer = TextBasedSignalAnalyzer::new();
- let messages = vec![
- create_message(Role::User, "I'm confused"), // Unicode apostrophe
- ];
-
- let normalized_messages = preprocess_messages(&messages);
- let signal = analyzer.analyze_frustration(&normalized_messages);
- assert!(
- signal.has_frustration,
- "Unicode apostrophe 'I'm confused' should trigger confusion"
- );
- }
-
- #[test]
- fn test_unicode_quotes_work() {
- let analyzer = TextBasedSignalAnalyzer::new();
- let messages = vec![create_message(
- Role::User,
- "\u{201C}doesn\u{2019}t work\u{201D} with unicode quotes",
- )];
-
- let normalized_messages = preprocess_messages(&messages);
- let signal = analyzer.analyze_frustration(&normalized_messages);
- assert!(
- signal.has_frustration,
- "Unicode quotes should be normalized and match patterns"
- );
- }
-
- #[test]
- fn test_absolute_not_profanity() {
- let analyzer = TextBasedSignalAnalyzer::new();
- let messages = vec![create_message(Role::User, "That's absolute nonsense")];
-
- let normalized_messages = preprocess_messages(&messages);
- let signal = analyzer.analyze_frustration(&normalized_messages);
- // Should match on "nonsense" logic, not on "bs" substring
- let has_bs_match = signal
- .indicators
- .iter()
- .any(|ind| ind.snippet.contains("bs"));
- assert!(
- !has_bs_match,
- "\"absolute\" should not trigger 'bs' profanity match"
- );
- }
-
- #[test]
- fn test_stopwords_not_rephrase() {
- let analyzer = TextBasedSignalAnalyzer::new();
- let messages = vec![
- create_message(Role::User, "Help me with X"),
- create_message(Role::Assistant, "Sure"),
- create_message(Role::User, "Help me with Y"),
- ];
-
- let normalized_messages = preprocess_messages(&messages);
- let signal = analyzer.analyze_follow_up(&normalized_messages);
- // Should not detect as rephrase since only stopwords overlap
assert_eq!(
- signal.repair_count, 0,
- "Messages with only stopword overlap should not be rephrases"
+ r.interaction.misalignment.count, 0,
+ "a pure gratitude message should not trigger repair/misalignment"
);
+ assert!(r.interaction.satisfaction.count > 0);
}
#[test]
- fn test_frustrated_user_with_legitimate_repair() {
- let start = Instant::now();
- let analyzer = TextBasedSignalAnalyzer::new();
-
- use hermesllm::apis::openai::{FunctionCall, ToolCall};
-
- // Helper to create a message with tool calls
- let create_assistant_with_tools =
- |content: &str, tool_id: &str, tool_name: &str, args: &str| -> Message {
- Message {
- role: Role::Assistant,
- content: Some(MessageContent::Text(content.to_string())),
- name: None,
- tool_calls: Some(vec![ToolCall {
- id: tool_id.to_string(),
- call_type: "function".to_string(),
- function: FunctionCall {
- name: tool_name.to_string(),
- arguments: args.to_string(),
- },
- }]),
- tool_call_id: None,
- }
- };
-
- // Helper to create a tool response message
- let create_tool_message = |tool_call_id: &str, content: &str| -> Message {
- Message {
- role: Role::Tool,
- content: Some(MessageContent::Text(content.to_string())),
- name: None,
- tool_calls: None,
- tool_call_id: Some(tool_call_id.to_string()),
- }
- };
-
- // Scenario: User DOES mention New York in first message, making "I already told you" legitimate
- let messages = vec![
- create_message(
- Role::User,
- "I need to book a flight from New York to Paris for December 20th",
- ),
- create_assistant_with_tools(
- "I'll help you search for flights to Paris.",
- "call_123",
- "search_flights",
- r#"{"origin": "NYC", "destination": "Paris", "date": "2025-12-20"}"#,
- ),
- create_tool_message("call_123", r#"{"flights": []}"#),
- create_message(
- Role::Assistant,
- "I couldn't find any flights. Could you provide your departure city?",
- ),
- create_message(Role::User, "I already told you, from New York!"),
- create_assistant_with_tools(
- "Let me try again.",
- "call_456",
- "search_flights",
- r#"{"origin": "New York", "destination": "Paris", "date": "2025-12-20"}"#,
- ),
- create_tool_message("call_456", r#"{"flights": []}"#),
- create_message(
- Role::Assistant,
- "I'm still not finding results. Let me check the system.",
- ),
- create_message(
- Role::User,
- "THIS IS RIDICULOUS!!! The tool doesn't work at all. Why do you keep calling it?",
- ),
- create_message(
- Role::Assistant,
- "I sincerely apologize for the frustration with the search tool.",
- ),
- create_message(
- Role::User,
- "Forget it. I need to speak to a human agent. This is a waste of time.",
- ),
+ fn summary_groups_signals_by_taxonomy() {
+ // Even on a clean conversation the summary should expose the three
+ // layer headers so the taxonomy is visible.
+ let msgs = vec![
+ user("Hello"),
+ assistant("Hi! How can I help?"),
+ user("What's 2 + 2?"),
+ assistant("4"),
];
-
- let report = analyzer.analyze(&messages);
-
- // Tool messages should be filtered out, so we should only analyze text messages
- // That's 4 user messages + 5 assistant text messages = 9 turns
- assert_eq!(
- report.turn_count.total_turns, 9,
- "Should count 9 text messages (tool messages filtered out)"
+ let r = SignalAnalyzer::default().analyze_openai(&msgs);
+ assert!(
+ r.summary.contains("Interaction \u{2014}"),
+ "missing Interaction header in: {}",
+ r.summary
);
assert!(
- report.turn_count.is_concerning,
- "Should flag concerning turn count"
- );
-
- // Should detect frustration (all caps, complaints)
- assert!(
- report.frustration.has_frustration,
- "Should detect frustration"
+ r.summary.contains("Execution \u{2014}"),
+ "missing Execution header in: {}",
+ r.summary
);
assert!(
- report.frustration.frustration_count >= 2,
- "Should detect multiple frustration indicators"
- );
- assert!(
- report.frustration.severity >= 2,
- "Should have moderate or higher frustration severity"
- );
-
- // Should detect escalation request
- assert!(
- report.escalation.escalation_requested,
- "Should detect escalation to human agent"
- );
- assert!(
- report.escalation.escalation_count >= 1,
- "Should detect at least one escalation"
- );
-
- // Overall quality should be Poor or Severe
- assert!(
- matches!(
- report.overall_quality,
- InteractionQuality::Poor | InteractionQuality::Severe
- ),
- "Quality should be Poor or Severe, got {:?}",
- report.overall_quality
- );
-
- println!(
- "test_frustrated_user_with_legitimate_repair took: {:?}",
- start.elapsed()
+ r.summary.contains("Environment \u{2014}"),
+ "missing Environment header in: {}",
+ r.summary
);
+ assert!(r.summary.contains("misalignment: 0"));
+ assert!(r.summary.contains("loops: 0"));
+ assert!(r.summary.contains("exhaustion: 0"));
}
#[test]
- fn test_frustrated_user_false_claim() {
- let start = Instant::now();
- let analyzer = TextBasedSignalAnalyzer::new();
-
- use hermesllm::apis::openai::{FunctionCall, ToolCall};
-
- // Helper to create a message with tool calls
- let create_assistant_with_tools =
- |content: &str, tool_id: &str, tool_name: &str, args: &str| -> Message {
- Message {
- role: Role::Assistant,
- content: Some(MessageContent::Text(content.to_string())),
- name: None,
- tool_calls: Some(vec![ToolCall {
- id: tool_id.to_string(),
- call_type: "function".to_string(),
- function: FunctionCall {
- name: tool_name.to_string(),
- arguments: args.to_string(),
- },
- }]),
- tool_call_id: None,
- }
- };
-
- // Helper to create a tool response message
- let create_tool_message = |tool_call_id: &str, content: &str| -> Message {
- Message {
- role: Role::Tool,
- content: Some(MessageContent::Text(content.to_string())),
- name: None,
- tool_calls: None,
- tool_call_id: Some(tool_call_id.to_string()),
- }
- };
-
- // Scenario: User NEVER mentions New York in first message but claims "I already told you"
- // This represents realistic frustrated user behavior - exaggeration/misremembering
- let messages = vec![
- create_message(
- Role::User,
- "I need to book a flight to Paris for December 20th",
- ),
- create_assistant_with_tools(
- "I'll help you search for flights to Paris.",
- "call_123",
- "search_flights",
- r#"{"destination": "Paris", "date": "2025-12-20"}"#,
- ),
- create_tool_message("call_123", r#"{"error": "origin required"}"#),
- create_message(
- Role::Assistant,
- "I couldn't find any flights. Could you provide your departure city?",
- ),
- create_message(Role::User, "I already told you, from New York!"), // False claim - never mentioned it
- create_assistant_with_tools(
- "Let me try again.",
- "call_456",
- "search_flights",
- r#"{"origin": "New York", "destination": "Paris", "date": "2025-12-20"}"#,
- ),
- create_tool_message("call_456", r#"{"flights": []}"#),
- create_message(
- Role::Assistant,
- "I'm still not finding results. Let me check the system.",
- ),
- create_message(
- Role::User,
- "THIS IS RIDICULOUS!!! The tool doesn't work at all. Why do you keep calling it?",
- ),
- create_message(
- Role::Assistant,
- "I sincerely apologize for the frustration with the search tool.",
- ),
- create_message(
- Role::User,
- "Forget it. I need to speak to a human agent. This is a waste of time.",
- ),
+ fn summary_includes_severity_when_signals_fire() {
+ let msgs = vec![
+ user("This isn't helpful at all"),
+ assistant("I'm sorry, can you tell me more?"),
+ user("Get me a human, this is useless"),
];
-
- let report = analyzer.analyze(&messages);
-
- // Tool messages should be filtered out, so we should only analyze text messages
- // That's 4 user messages + 5 assistant text messages = 9 turns
- assert_eq!(
- report.turn_count.total_turns, 9,
- "Should count 9 text messages (tool messages filtered out)"
+ let r = SignalAnalyzer::default().analyze_openai(&msgs);
+ // Disengagement fires; should render with `(sev N)` and the
+ // escalation-requested alert tail.
+ assert!(
+ r.summary.contains("disengagement:") && r.summary.contains("(sev "),
+ "expected severity rendered for disengagement: {}",
+ r.summary
);
assert!(
- report.turn_count.is_concerning,
- "Should flag concerning turn count"
- );
-
- // Should detect frustration (all caps, complaints, false claims)
- assert!(
- report.frustration.has_frustration,
- "Should detect frustration"
- );
- assert!(
- report.frustration.frustration_count >= 2,
- "Should detect multiple frustration indicators"
- );
- assert!(
- report.frustration.severity >= 2,
- "Should have moderate or higher frustration severity"
- );
-
- // Should detect escalation request
- assert!(
- report.escalation.escalation_requested,
- "Should detect escalation to human agent"
- );
- assert!(
- report.escalation.escalation_count >= 1,
- "Should detect at least one escalation"
- );
-
- // Note: May detect false positive "positive feedback" due to fuzzy matching
- // e.g., "I already told YOU" matches "you rock", "THIS is RIDICULOUS" matches "this helps"
- // However, the overall quality should still be Poor/Severe due to frustration+escalation
-
- // Overall quality should be Poor or Severe (frustration + escalation indicates poor interaction)
- assert!(
- matches!(
- report.overall_quality,
- InteractionQuality::Poor | InteractionQuality::Severe
- ),
- "Quality should be Poor or Severe for frustrated user with false claims, got {:?}",
- report.overall_quality
- );
-
- println!(
- "test_frustrated_user_false_claim took: {:?}",
- start.elapsed()
+ r.summary.contains("Escalation requested:"),
+ "expected escalation alert in: {}",
+ r.summary
);
}
- // false negative tests
#[test]
- fn test_dissatisfaction_polite_not_working_for_me() {
- let analyzer = TextBasedSignalAnalyzer::new();
- let messages = vec![
- create_message(Role::User, "Thanks, but this still isn't working for me."), // Polite dissatisfaction, e.g., I appreciate it, but this isn't what I was looking for.
- create_message(Role::Assistant, "Sorry—what error do you see?"),
+ fn execution_failures_lower_quality() {
+ let msgs = vec![ShareGptMessage {
+ from: "human",
+ value: "do the thing",
+ }];
+ let _ = msgs;
+ // Build a synthetic ShareGPT input with multiple tool failures.
+ let convo = vec![
+ ShareGptMessage {
+ from: "human",
+ value: "create a user",
+ },
+ ShareGptMessage {
+ from: "function_call",
+ value: r#"{"name":"create_user","arguments":{"age":"twelve"}}"#,
+ },
+ ShareGptMessage {
+ from: "observation",
+ value: "Error: validation failed - expected integer got string",
+ },
+ ShareGptMessage {
+ from: "function_call",
+ value: r#"{"name":"create_user","arguments":{}}"#,
+ },
+ ShareGptMessage {
+ from: "observation",
+ value: "missing required field: name",
+ },
];
- let normalized = preprocess_messages(&messages);
- let signal = analyzer.analyze_frustration(&normalized);
- assert!(
- signal.has_frustration,
- "Polite dissatisfaction should be detected"
- );
- }
-
- #[test]
- fn test_dissatisfaction_giving_up_without_escalation() {
- let analyzer = TextBasedSignalAnalyzer::new();
- let messages = vec![create_message(
- Role::User,
- "Never mind, I'll figure it out myself.",
- )];
- let normalized = preprocess_messages(&messages);
- let signal = analyzer.analyze_escalation(&normalized);
- assert!(
- signal.escalation_requested,
- "Giving up should count as escalation/quit intent"
- );
- }
-
- #[test]
- fn test_dissatisfaction_same_problem_again() {
- let analyzer = TextBasedSignalAnalyzer::new();
- let messages = vec![create_message(
- Role::User,
- "I'm running into the same issue again.",
- )];
- let normalized = preprocess_messages(&messages);
- let signal = analyzer.analyze_frustration(&normalized);
- assert!(
- signal.has_frustration,
- "'same issue again' should be detected"
- );
- }
-
- #[test]
- fn test_unsatisfied_incomplete() {
- let analyzer = TextBasedSignalAnalyzer::new();
- let messages = vec![create_message(Role::User, "This feels incomplete.")];
- let normalized = preprocess_messages(&messages);
- let signal = analyzer.analyze_frustration(&normalized);
- assert!(
- signal.has_frustration,
- "Should detect 'incomplete' dissatisfaction"
- );
- }
-
- #[test]
- fn test_low_mood_overwhelming() {
- let analyzer = TextBasedSignalAnalyzer::new();
- let messages = vec![create_message(
- Role::User,
- "This is overwhelming and I'm not sure what to do.",
- )];
- let normalized = preprocess_messages(&messages);
- let signal = analyzer.analyze_frustration(&normalized);
- assert!(signal.has_frustration, "Should detect overwhelmed language");
- }
-
- #[test]
- fn test_low_mood_exhausted_trying() {
- let analyzer = TextBasedSignalAnalyzer::new();
- let messages = vec![create_message(
- Role::User,
- "I'm exhausted trying to get this working.",
- )];
- let normalized = preprocess_messages(&messages);
- let signal = analyzer.analyze_frustration(&normalized);
- assert!(
- signal.has_frustration,
- "Should detect exhaustion/struggle language"
- );
- }
-
- #[test]
- fn test_common_polite_unresolved_dissatisfaction() {
- let analyzer = TextBasedSignalAnalyzer::new();
- let messages = vec![
- create_message(Role::User, "I'm trying to set up SSH keys for GitHub."),
- create_message(
- Role::Assistant,
- "Sure. First generate a key using ssh-keygen.",
- ),
- create_message(Role::User, "I did that already."),
- create_message(
- Role::Assistant,
- "Then add the key to your GitHub account settings.",
- ),
- create_message(Role::User, "I've done that too."),
- create_message(
- Role::Assistant,
- "After that, make sure your SSH agent is running.",
- ),
- create_message(
- Role::User,
- "Okay, but this still doesn't seem to fix the issue.",
- ),
- create_message(Role::Assistant, "What error message are you seeing?"),
- create_message(Role::User, "It's just not connecting the way I expected."),
- ];
-
- let report = analyzer.analyze(&messages);
-
- // This is a common false negative if you only look for caps/profanity.
- // Desired: detect dissatisfaction/frustration (or at least not rate as Excellent).
- assert!(
- report.frustration.has_frustration || report.follow_up.repair_count >= 1,
- "Should detect polite unresolved dissatisfaction via frustration or follow-up indicators"
- );
-
- assert!(
- !matches!(report.overall_quality, InteractionQuality::Excellent),
- "Should not classify unresolved dissatisfaction as Excellent"
- );
- }
-
- #[test]
- fn test_common_resigned_giving_up_quietly() {
- let analyzer = TextBasedSignalAnalyzer::new();
- let messages = vec![
- create_message(
- Role::User,
- "Can you explain how to deploy this with Docker?",
- ),
- create_message(
- Role::Assistant,
- "You need to write a Dockerfile and build an image.",
- ),
- create_message(Role::User, "I tried that."),
- create_message(Role::Assistant, "Then you can run docker-compose up."),
- create_message(Role::User, "I did, but it didn’t really help."),
- create_message(Role::Assistant, "What error are you getting?"),
- create_message(
- Role::User,
- "Honestly, never mind. I’ll just try something else.",
- ),
- ];
-
- let report = analyzer.analyze(&messages);
-
- // Many systems miss "never mind / I'll try something else" if they only look for "human agent".
- assert!(
- report.escalation.escalation_requested || report.frustration.has_frustration,
- "Resigned quitting language should trigger escalation or frustration"
- );
-
- assert!(
- matches!(
- report.overall_quality,
- InteractionQuality::Poor | InteractionQuality::Severe
- ) || report.escalation.escalation_requested
- || report.frustration.has_frustration,
- "Giving up should not be classified as a high-quality interaction"
- );
- }
-
- #[test]
- fn test_common_discouraged_overwhelmed_low_mood() {
- let analyzer = TextBasedSignalAnalyzer::new();
- let messages = vec![
- create_message(Role::User, "I'm trying to understand backpropagation."),
- create_message(
- Role::Assistant,
- "It's a way to compute gradients efficiently.",
- ),
- create_message(Role::User, "I’ve read that explanation already."),
- create_message(Role::Assistant, "Would you like a mathematical derivation?"),
- create_message(Role::User, "Maybe, but I’m still having trouble following."),
- create_message(Role::Assistant, "I can walk through a simple example."),
- create_message(
- Role::User,
- "That might help, but honestly this is pretty overwhelming.",
- ),
- create_message(Role::Assistant, "Let’s slow it down step by step."),
- create_message(
- Role::User,
- "Yeah… I’m just feeling kind of discouraged right now.",
- ),
- ];
-
- let report = analyzer.analyze(&messages);
-
- // This is negative affect without caps/profanity. Should still count as frustration/negative signal.
- assert!(
- report.frustration.has_frustration,
- "Overwhelmed/discouraged language should be detected as negative sentiment/frustration"
- );
-
- assert!(
- !matches!(report.overall_quality, InteractionQuality::Excellent),
- "Low-mood discouragement should not be classified as Excellent"
- );
- }
-
- #[test]
- fn test_common_misalignment_not_what_i_asked() {
- let analyzer = TextBasedSignalAnalyzer::new();
- let messages = vec![
- create_message(Role::User, "How do I optimize this SQL query?"),
- create_message(
- Role::Assistant,
- "You can add indexes to improve performance.",
- ),
- create_message(Role::User, "I already have indexes."),
- create_message(Role::Assistant, "Then you could consider query caching."),
- create_message(Role::User, "That’s not really what I was asking about."),
- create_message(
- Role::Assistant,
- "What specifically are you trying to optimize?",
- ),
- create_message(
- Role::User,
- "The execution plan — this answer doesn’t address that.",
- ),
- ];
-
- let report = analyzer.analyze(&messages);
-
- // Misalignment often shows as follow-up repair or frustration.
- assert!(
- report.follow_up.repair_count >= 1 || report.frustration.has_frustration,
- "Misalignment ('not what I asked') should trigger repair or frustration signals"
- );
-
- assert!(
- !matches!(report.overall_quality, InteractionQuality::Excellent),
- "Misalignment should not be rated as Excellent"
- );
- }
-
- #[test]
- fn test_common_false_negative_polite_disappointment_complexity() {
- let analyzer = TextBasedSignalAnalyzer::new();
- let messages = vec![
- create_message(Role::User, "Can you help me write a regex for this?"),
- create_message(Role::Assistant, "Sure, try this pattern: ^[a-z]+$"),
- create_message(Role::User, "I tested it."),
- create_message(Role::Assistant, "Did it work?"),
- create_message(Role::User, "Not quite — it matches more than it should."),
- create_message(Role::Assistant, "You can refine it with a lookahead."),
- create_message(
- Role::User,
- "I see… this is more complicated than I expected.",
- ),
- ];
-
- let report = analyzer.analyze(&messages);
-
- // Polite disappointment often becomes a false negative.
- assert!(
- report.frustration.has_frustration || report.follow_up.repair_count >= 1,
- "Polite dissatisfaction ('not quite', 'more complicated than expected') should trigger a negative signal"
- );
-
- assert!(
- !matches!(report.overall_quality, InteractionQuality::Excellent),
- "Polite disappointment should not be classified as Excellent"
- );
- }
-
- #[test]
- fn test_catastrophic_failure_looping_assistant() {
- let analyzer = TextBasedSignalAnalyzer::new();
-
- // Catastrophic failure: assistant stuck in loop, user increasingly frustrated
- let messages = vec![
- // Turn 1: Initial request
- create_message(Role::User, "I need to cancel my subscription"),
- create_message(Role::Assistant, "I can help you with account management. What would you like to do today?"),
-
- // Turn 3: First repair - repetition clarification
- create_message(Role::User, "I just said I want to cancel my subscription"),
- // Turn 4: Exact repetition from assistant
- create_message(Role::Assistant, "I can help you with account management. What would you like to do today?"),
-
- // Turn 5: Repair (negation) + frustration (ALL CAPS)
- create_message(Role::User, "No, I meant CANCEL. My. Subscription."),
- // Turn 6: Near-duplicate repetition
- create_message(Role::Assistant, "I understand you need help with your account. Let me assist you with that."),
-
- // Turn 7: Severe frustration (complaint + excessive punctuation + ALL CAPS)
- create_message(Role::User, "This is not helpful!! I want to CANCEL my subscription!!!"),
- // Turn 8: Exact repetition again (looping)
- create_message(Role::Assistant, "I can help you with account management. What would you like to do today?"),
-
- // Turn 9: Repair (explicit rephrase) + frustration (complaint)
- create_message(Role::User, "This doesnt work. Let me rephrase: I need to terminate my account and stop being charged"),
- // Turn 10: Near-duplicate repetition (looping continues)
- create_message(Role::Assistant, "I understand you need help with your account. Let me assist you with that."),
-
- // Turn 11: Escalation (multiple types) + frustration
- create_message(Role::User, "I give up. Can I speak to a real person? This is a waste of time"),
- ];
-
- let report = analyzer.analyze(&messages);
-
- // Validate turn count
- assert_eq!(
- report.turn_count.total_turns, 11,
- "Should have 11 total turns"
- );
- assert_eq!(report.turn_count.user_turns, 6, "Should have 6 user turns");
- assert_eq!(
- report.turn_count.assistant_turns, 5,
- "Should have 5 assistant turns"
- );
- assert!(
- report.turn_count.is_concerning,
- "11 turns should be concerning (>7)"
- );
- assert!(
- !report.turn_count.is_excessive,
- "11 turns should not be excessive (<=12)"
- );
- assert!(
- report.turn_count.efficiency_score < 0.5,
- "Efficiency should be low"
- );
-
- // Validate repair detection (USER signals - query reformulation)
- // Detected repairs:
- // 1. "I just said I want to cancel..." - pattern: "I just said"
- // 2. "No, I meant CANCEL..." - pattern: "No, I meant"
- // 3. "Let me rephrase: I need to terminate..." - pattern: "let me rephrase"
- // Note: "This is not helpful!!" is frustration (not repair)
- // Note: "I give up..." is escalation (not repair)
- assert_eq!(
- report.follow_up.repair_count, 3,
- "Should detect exactly 3 repair attempts from user messages"
- );
- assert_eq!(
- report.follow_up.repair_ratio, 0.5,
- "Repair ratio should be 0.5 (3 repairs / 6 user messages)"
- );
- assert!(
- report.follow_up.is_concerning,
- "50% repair ratio should be highly concerning (threshold is 30%)"
- );
-
- // Validate frustration detection
- assert!(
- report.frustration.has_frustration,
- "Should detect frustration"
- );
- assert!(
- report.frustration.frustration_count >= 4,
- "Should detect multiple frustration indicators: found {}",
- report.frustration.frustration_count
- );
- assert!(
- report.frustration.severity >= 2,
- "Should be at least moderate frustration"
- );
-
- // Validate repetition/looping detection (ASSISTANT signals - not following instructions)
- // The assistant repeats the same unhelpful responses multiple times:
- // 1. "I can help you with account management..." appears 3 times (exact repetition)
- // 2. "I understand you need help with your account..." appears 2 times (near-duplicate)
- assert!(
- report.repetition.repetition_count >= 4,
- "Should detect at least 4 assistant repetitions (exact + near-duplicates)"
- );
- assert!(
- report.repetition.has_looping,
- "Should detect looping (>2 repetitions indicates stuck agent)"
- );
- assert!(
- report.repetition.severity >= 2,
- "Should be moderate to severe looping (assistant not adapting)"
- );
-
- // Validate escalation detection
- assert!(
- report.escalation.escalation_requested,
- "Should detect escalation request"
- );
- assert!(
- report.escalation.escalation_count >= 2,
- "Should detect multiple escalation indicators: 'give up' + 'speak to a real person'"
- );
-
- // Validate overall quality
- assert_eq!(report.overall_quality, InteractionQuality::Severe, "Should be classified as Severe due to escalation + excessive frustration + looping + high repair ratio");
+ let r = SignalAnalyzer::default().analyze_sharegpt(&convo);
+ assert!(r.execution.failure.count >= 1);
+ assert!(r.quality_score < 50.0);
}
}
diff --git a/crates/brightstaff/src/signals/environment/exhaustion.rs b/crates/brightstaff/src/signals/environment/exhaustion.rs
new file mode 100644
index 00000000..142e7d6e
--- /dev/null
+++ b/crates/brightstaff/src/signals/environment/exhaustion.rs
@@ -0,0 +1,347 @@
+//! Environment exhaustion detector. Direct port of
+//! `signals/environment/exhaustion.py`.
+
+use std::sync::OnceLock;
+
+use regex::Regex;
+use serde_json::json;
+
+use crate::signals::analyzer::ShareGptMessage;
+use crate::signals::schemas::{SignalGroup, SignalInstance, SignalType};
+
+pub const API_ERROR_PATTERNS: &[&str] = &[
+ r"500\s*(internal\s+)?server\s+error",
+ r"502\s*bad\s+gateway",
+ r"503\s*service\s+unavailable",
+ r"504\s*gateway\s+timeout",
+ r"internal\s+server\s+error",
+ r"service\s+unavailable",
+ r"server\s+error",
+ r"backend\s+error",
+ r"upstream\s+error",
+ r"service\s+temporarily\s+unavailable",
+ r"maintenance\s+mode",
+ r"under\s+maintenance",
+ r"try\s+again\s+later",
+ r"temporarily\s+unavailable",
+ r"system\s+error",
+ r"unexpected\s+error",
+ r"unhandled\s+exception",
+];
+
+pub const TIMEOUT_PATTERNS: &[&str] = &[
+ r"timeout",
+ r"timed?\s*out",
+ r"etimedout",
+ r"connection\s+timed?\s*out",
+ r"read\s+timed?\s*out",
+ r"request\s+timed?\s*out",
+ r"gateway\s+timeout",
+ r"deadline\s+exceeded",
+ r"took\s+too\s+long",
+ r"operation\s+timed?\s*out",
+ r"socket\s+timeout",
+];
+
+pub const RATE_LIMIT_PATTERNS: &[&str] = &[
+ r"rate\s+limit",
+ r"rate.limited",
+ r"(status|error|http)\s*:?\s*429",
+ r"429\s+(too\s+many|rate|limit)",
+ r"too\s+many\s+requests?",
+ r"quota\s+exceeded",
+ r"quota\s+limit",
+ r"throttl(ed|ing)",
+ r"request\s+limit",
+ r"api\s+limit",
+ r"calls?\s+per\s+(second|minute|hour|day)",
+ r"exceeded\s+.*\s+limit",
+ r"slow\s+down",
+ r"retry\s+after",
+ r"requests?\s+exceeded",
+];
+
+pub const NETWORK_PATTERNS: &[&str] = &[
+ r"connection\s+refused",
+ r"econnrefused",
+ r"econnreset",
+ r"connection\s+reset",
+ r"enotfound",
+ r"dns\s+(error|failure|lookup)",
+ r"host\s+not\s+found",
+ r"network\s+(error|failure|unreachable)",
+ r"no\s+route\s+to\s+host",
+ r"socket\s+error",
+ r"connection\s+failed",
+ r"unable\s+to\s+connect",
+ r"cannot\s+connect",
+ r"could\s+not\s+connect",
+ r"connect\s+error",
+ r"ssl\s+(error|handshake|certificate)",
+ r"certificate\s+(error|invalid|expired)",
+];
+
+pub const MALFORMED_PATTERNS: &[&str] = &[
+ r"json\s+parse\s+error",
+ r"invalid\s+json",
+ r"unexpected\s+token",
+ r"syntax\s+error.*json",
+ r"malformed\s+(response|json|data)",
+ r"unexpected\s+end\s+of",
+ r"parse\s+error",
+ r"parsing\s+failed",
+ r"invalid\s+response",
+ r"unexpected\s+response",
+ r"response\s+format",
+ r"missing\s+field.*response",
+ r"unexpected\s+schema",
+ r"schema\s+validation",
+ r"deserialization\s+error",
+ r"failed\s+to\s+decode",
+];
+
+pub const CONTEXT_OVERFLOW_PATTERNS: &[&str] = &[
+ r"context\s+(length|limit|overflow|exceeded)",
+ r"token\s+(limit|overflow|exceeded)",
+ r"max(imum)?\s+tokens?",
+ r"input\s+too\s+(long|large)",
+ r"exceeds?\s+(context|token|character|input)\s+limit",
+ r"message\s+too\s+(long|large)",
+ r"content\s+too\s+(long|large)",
+ r"truncat(ed|ion)\s+(due\s+to|because|for)\s+(length|size|limit)",
+ r"maximum\s+context",
+ r"prompt\s+too\s+(long|large)",
+];
+
+fn compile(patterns: &[&str]) -> Regex {
+ let combined = patterns
+ .iter()
+ .map(|p| format!("({})", p))
+ .collect::>()
+ .join("|");
+ Regex::new(&format!("(?i){}", combined)).expect("exhaustion pattern regex must compile")
+}
+
+fn api_error_re() -> &'static Regex {
+ static R: OnceLock = OnceLock::new();
+ R.get_or_init(|| compile(API_ERROR_PATTERNS))
+}
+fn timeout_re() -> &'static Regex {
+ static R: OnceLock = OnceLock::new();
+ R.get_or_init(|| compile(TIMEOUT_PATTERNS))
+}
+fn rate_limit_re() -> &'static Regex {
+ static R: OnceLock = OnceLock::new();
+ R.get_or_init(|| compile(RATE_LIMIT_PATTERNS))
+}
+fn network_re() -> &'static Regex {
+ static R: OnceLock = OnceLock::new();
+ R.get_or_init(|| compile(NETWORK_PATTERNS))
+}
+fn malformed_re() -> &'static Regex {
+ static R: OnceLock = OnceLock::new();
+ R.get_or_init(|| compile(MALFORMED_PATTERNS))
+}
+fn context_overflow_re() -> &'static Regex {
+ static R: OnceLock = OnceLock::new();
+ R.get_or_init(|| compile(CONTEXT_OVERFLOW_PATTERNS))
+}
+
+fn snippet_around(text: &str, m: regex::Match<'_>, context: usize) -> String {
+ let start = m.start().saturating_sub(context);
+ let end = (m.end() + context).min(text.len());
+ let start = align_char_boundary(text, start, false);
+ let end = align_char_boundary(text, end, true);
+ let mut snippet = String::new();
+ if start > 0 {
+ snippet.push_str("...");
+ }
+ snippet.push_str(&text[start..end]);
+ if end < text.len() {
+ snippet.push_str("...");
+ }
+ snippet
+}
+
+fn align_char_boundary(s: &str, mut idx: usize, forward: bool) -> usize {
+ if idx >= s.len() {
+ return s.len();
+ }
+ while !s.is_char_boundary(idx) {
+ if forward {
+ idx += 1;
+ } else if idx == 0 {
+ break;
+ } else {
+ idx -= 1;
+ }
+ }
+ idx
+}
+
+pub fn analyze_exhaustion(messages: &[ShareGptMessage<'_>]) -> SignalGroup {
+ let mut group = SignalGroup::new("exhaustion");
+
+ for (i, msg) in messages.iter().enumerate() {
+ if msg.from != "observation" {
+ continue;
+ }
+ let value = msg.value;
+ let lower = value.to_lowercase();
+
+ if let Some(m) = rate_limit_re().find(&lower) {
+ group.add_signal(emit(
+ SignalType::EnvironmentExhaustionRateLimit,
+ i,
+ snippet_around(value, m, 50),
+ 0.95,
+ "rate_limit",
+ m.as_str(),
+ ));
+ continue;
+ }
+
+ if let Some(m) = api_error_re().find(&lower) {
+ group.add_signal(emit(
+ SignalType::EnvironmentExhaustionApiError,
+ i,
+ snippet_around(value, m, 50),
+ 0.9,
+ "api_error",
+ m.as_str(),
+ ));
+ continue;
+ }
+
+ if let Some(m) = timeout_re().find(&lower) {
+ group.add_signal(emit(
+ SignalType::EnvironmentExhaustionTimeout,
+ i,
+ snippet_around(value, m, 50),
+ 0.9,
+ "timeout",
+ m.as_str(),
+ ));
+ continue;
+ }
+
+ if let Some(m) = network_re().find(&lower) {
+ group.add_signal(emit(
+ SignalType::EnvironmentExhaustionNetwork,
+ i,
+ snippet_around(value, m, 50),
+ 0.9,
+ "network",
+ m.as_str(),
+ ));
+ continue;
+ }
+
+ if let Some(m) = malformed_re().find(&lower) {
+ group.add_signal(emit(
+ SignalType::EnvironmentExhaustionMalformed,
+ i,
+ snippet_around(value, m, 50),
+ 0.85,
+ "malformed_response",
+ m.as_str(),
+ ));
+ continue;
+ }
+
+ if let Some(m) = context_overflow_re().find(&lower) {
+ group.add_signal(emit(
+ SignalType::EnvironmentExhaustionContextOverflow,
+ i,
+ snippet_around(value, m, 50),
+ 0.9,
+ "context_overflow",
+ m.as_str(),
+ ));
+ }
+ }
+
+ group
+}
+
+fn emit(
+ t: SignalType,
+ idx: usize,
+ snippet: String,
+ confidence: f32,
+ kind: &str,
+ matched: &str,
+) -> SignalInstance {
+ SignalInstance::new(t, idx, snippet)
+ .with_confidence(confidence)
+ .with_metadata(json!({
+ "exhaustion_type": kind,
+ "matched": matched,
+ }))
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ fn obs(value: &str) -> ShareGptMessage<'_> {
+ ShareGptMessage {
+ from: "observation",
+ value,
+ }
+ }
+
+ #[test]
+ fn detects_rate_limit() {
+ let g = analyze_exhaustion(&[obs("HTTP 429: too many requests, retry after 30s")]);
+ assert!(g
+ .signals
+ .iter()
+ .any(|s| matches!(s.signal_type, SignalType::EnvironmentExhaustionRateLimit)));
+ }
+
+ #[test]
+ fn detects_api_error() {
+ let g = analyze_exhaustion(&[obs("503 service unavailable - try again later")]);
+ assert!(g
+ .signals
+ .iter()
+ .any(|s| matches!(s.signal_type, SignalType::EnvironmentExhaustionApiError)));
+ }
+
+ #[test]
+ fn detects_timeout() {
+ let g = analyze_exhaustion(&[obs("Connection timed out after 30 seconds")]);
+ assert!(g
+ .signals
+ .iter()
+ .any(|s| matches!(s.signal_type, SignalType::EnvironmentExhaustionTimeout)));
+ }
+
+ #[test]
+ fn detects_network_failure() {
+ let g = analyze_exhaustion(&[obs("ECONNREFUSED: connection refused by remote host")]);
+ assert!(g
+ .signals
+ .iter()
+ .any(|s| matches!(s.signal_type, SignalType::EnvironmentExhaustionNetwork)));
+ }
+
+ #[test]
+ fn detects_malformed_response() {
+ let g = analyze_exhaustion(&[obs("Invalid JSON: unexpected token at position 42")]);
+ assert!(g
+ .signals
+ .iter()
+ .any(|s| matches!(s.signal_type, SignalType::EnvironmentExhaustionMalformed)));
+ }
+
+ #[test]
+ fn detects_context_overflow() {
+ let g = analyze_exhaustion(&[obs("Maximum context length exceeded for this model")]);
+ assert!(g.signals.iter().any(|s| matches!(
+ s.signal_type,
+ SignalType::EnvironmentExhaustionContextOverflow
+ )));
+ }
+}
diff --git a/crates/brightstaff/src/signals/environment/mod.rs b/crates/brightstaff/src/signals/environment/mod.rs
new file mode 100644
index 00000000..97d9b300
--- /dev/null
+++ b/crates/brightstaff/src/signals/environment/mod.rs
@@ -0,0 +1,3 @@
+//! Environment signals: exhaustion (external system failures and constraints).
+
+pub mod exhaustion;
diff --git a/crates/brightstaff/src/signals/execution/failure.rs b/crates/brightstaff/src/signals/execution/failure.rs
new file mode 100644
index 00000000..3e171446
--- /dev/null
+++ b/crates/brightstaff/src/signals/execution/failure.rs
@@ -0,0 +1,388 @@
+//! Execution failure detector. Direct port of `signals/execution/failure.py`.
+
+use std::sync::OnceLock;
+
+use regex::Regex;
+use serde_json::json;
+
+use crate::signals::analyzer::ShareGptMessage;
+use crate::signals::schemas::{SignalGroup, SignalInstance, SignalType};
+
+pub const INVALID_ARGS_PATTERNS: &[&str] = &[
+ r"invalid\s+argument",
+ r"invalid\s+parameter",
+ r"invalid\s+type",
+ r"type\s*error",
+ r"expected\s+\w+\s*,?\s*got\s+\w+",
+ r"required\s+field",
+ r"required\s+parameter",
+ r"missing\s+required",
+ r"missing\s+argument",
+ r"validation\s+failed",
+ r"validation\s+error",
+ r"invalid\s+value",
+ r"invalid\s+format",
+ r"must\s+be\s+(a|an)\s+\w+",
+ r"cannot\s+be\s+(null|empty|none)",
+ r"is\s+not\s+valid",
+ r"does\s+not\s+match",
+ r"out\s+of\s+range",
+ r"invalid\s+date",
+ r"invalid\s+json",
+ r"malformed\s+request",
+];
+
+pub const BAD_QUERY_PATTERNS: &[&str] = &[
+ r"invalid\s+query",
+ r"query\s+syntax\s+error",
+ r"malformed\s+query",
+ r"unknown\s+field",
+ r"invalid\s+field",
+ r"invalid\s+filter",
+ r"invalid\s+search",
+ r"unknown\s+id",
+ r"invalid\s+id",
+ r"id\s+format\s+error",
+ r"invalid\s+identifier",
+ r"query\s+failed",
+ r"search\s+error",
+ r"invalid\s+operator",
+ r"unsupported\s+query",
+];
+
+pub const TOOL_NOT_FOUND_PATTERNS: &[&str] = &[
+ r"unknown\s+function",
+ r"unknown\s+tool",
+ r"function\s+not\s+found",
+ r"tool\s+not\s+found",
+ r"no\s+such\s+function",
+ r"no\s+such\s+tool",
+ r"undefined\s+function",
+ r"action\s+not\s+supported",
+ r"invalid\s+tool",
+ r"invalid\s+function",
+ r"unrecognized\s+function",
+];
+
+pub const AUTH_MISUSE_PATTERNS: &[&str] = &[
+ r"\bunauthorized\b",
+ r"(status|error|http|code)\s*:?\s*401",
+ r"401\s+unauthorized",
+ r"403\s+forbidden",
+ r"permission\s+denied",
+ r"access\s+denied",
+ r"authentication\s+required",
+ r"invalid\s+credentials",
+ r"invalid\s+token",
+ r"token\s+expired",
+ r"missing\s+authorization",
+ r"\bforbidden\b",
+ r"not\s+authorized",
+ r"insufficient\s+permissions?",
+];
+
+pub const STATE_ERROR_PATTERNS: &[&str] = &[
+ r"invalid\s+state",
+ r"illegal\s+state",
+ r"must\s+call\s+\w+\s+first",
+ r"must\s+\w+\s+before",
+ r"cannot\s+\w+\s+before",
+ r"already\s+(exists?|created|started|finished)",
+ r"not\s+initialized",
+ r"not\s+started",
+ r"already\s+in\s+progress",
+ r"operation\s+in\s+progress",
+ r"sequence\s+error",
+ r"precondition\s+failed",
+ r"(status|error|http)\s*:?\s*409",
+ r"409\s+conflict",
+ r"\bconflict\b",
+];
+
+fn compile(patterns: &[&str]) -> Regex {
+ // Use `(?i)` flag for case-insensitive matching, matching Python's `re.IGNORECASE`.
+ let combined = patterns
+ .iter()
+ .map(|p| format!("({})", p))
+ .collect::>()
+ .join("|");
+ Regex::new(&format!("(?i){}", combined)).expect("failure pattern regex must compile")
+}
+
+fn invalid_args_re() -> &'static Regex {
+ static R: OnceLock = OnceLock::new();
+ R.get_or_init(|| compile(INVALID_ARGS_PATTERNS))
+}
+fn bad_query_re() -> &'static Regex {
+ static R: OnceLock = OnceLock::new();
+ R.get_or_init(|| compile(BAD_QUERY_PATTERNS))
+}
+fn tool_not_found_re() -> &'static Regex {
+ static R: OnceLock = OnceLock::new();
+ R.get_or_init(|| compile(TOOL_NOT_FOUND_PATTERNS))
+}
+fn auth_misuse_re() -> &'static Regex {
+ static R: OnceLock = OnceLock::new();
+ R.get_or_init(|| compile(AUTH_MISUSE_PATTERNS))
+}
+fn state_error_re() -> &'static Regex {
+ static R: OnceLock = OnceLock::new();
+ R.get_or_init(|| compile(STATE_ERROR_PATTERNS))
+}
+
+/// Pull tool name + args from a `function_call` message. Mirrors
+/// `_extract_tool_info` in the reference.
+pub(crate) fn extract_tool_info(value: &str) -> (String, String) {
+ if let Ok(parsed) = serde_json::from_str::(value) {
+ if let Some(obj) = parsed.as_object() {
+ let name = obj
+ .get("name")
+ .or_else(|| obj.get("function"))
+ .and_then(|v| v.as_str())
+ .map(|s| s.to_string())
+ .unwrap_or_else(|| "unknown".to_string());
+ let args = match obj.get("arguments").or_else(|| obj.get("args")) {
+ Some(serde_json::Value::Object(o)) => {
+ serde_json::to_string(&serde_json::Value::Object(o.clone())).unwrap_or_default()
+ }
+ Some(other) => other
+ .as_str()
+ .map(|s| s.to_string())
+ .unwrap_or_else(|| serde_json::to_string(other).unwrap_or_default()),
+ None => String::new(),
+ };
+ return (name, args);
+ }
+ }
+ let mut snippet: String = value.chars().take(200).collect();
+ snippet.shrink_to_fit();
+ ("unknown".to_string(), snippet)
+}
+
+/// Build a context-window snippet around a regex match, with leading/trailing
+/// ellipses when truncated. Mirrors `_get_snippet`.
+fn snippet_around(text: &str, m: regex::Match<'_>, context: usize) -> String {
+ let start = m.start().saturating_sub(context);
+ let end = (m.end() + context).min(text.len());
+ // Ensure we cut on UTF-8 boundaries.
+ let start = align_char_boundary(text, start, false);
+ let end = align_char_boundary(text, end, true);
+ let mut snippet = String::new();
+ if start > 0 {
+ snippet.push_str("...");
+ }
+ snippet.push_str(&text[start..end]);
+ if end < text.len() {
+ snippet.push_str("...");
+ }
+ snippet
+}
+
+fn align_char_boundary(s: &str, mut idx: usize, forward: bool) -> usize {
+ if idx >= s.len() {
+ return s.len();
+ }
+ while !s.is_char_boundary(idx) {
+ if forward {
+ idx += 1;
+ } else if idx == 0 {
+ break;
+ } else {
+ idx -= 1;
+ }
+ }
+ idx
+}
+
+pub fn analyze_failure(messages: &[ShareGptMessage<'_>]) -> SignalGroup {
+ let mut group = SignalGroup::new("failure");
+ let mut last_call: Option<(usize, String, String)> = None;
+
+ for (i, msg) in messages.iter().enumerate() {
+ match msg.from {
+ "function_call" => {
+ let (name, args) = extract_tool_info(msg.value);
+ last_call = Some((i, name, args));
+ continue;
+ }
+ "observation" => {}
+ _ => continue,
+ }
+
+ let value = msg.value;
+ let lower = value.to_lowercase();
+ let (call_index, tool_name) = match &last_call {
+ Some((idx, name, _)) => (*idx, name.clone()),
+ None => (i.saturating_sub(1), "unknown".to_string()),
+ };
+
+ if let Some(m) = invalid_args_re().find(&lower) {
+ group.add_signal(
+ SignalInstance::new(
+ SignalType::ExecutionFailureInvalidArgs,
+ i,
+ snippet_around(value, m, 50),
+ )
+ .with_confidence(0.9)
+ .with_metadata(json!({
+ "tool_name": tool_name,
+ "call_index": call_index,
+ "error_type": "invalid_args",
+ "matched": m.as_str(),
+ })),
+ );
+ continue;
+ }
+
+ if let Some(m) = tool_not_found_re().find(&lower) {
+ group.add_signal(
+ SignalInstance::new(
+ SignalType::ExecutionFailureToolNotFound,
+ i,
+ snippet_around(value, m, 50),
+ )
+ .with_confidence(0.95)
+ .with_metadata(json!({
+ "tool_name": tool_name,
+ "call_index": call_index,
+ "error_type": "tool_not_found",
+ "matched": m.as_str(),
+ })),
+ );
+ continue;
+ }
+
+ if let Some(m) = auth_misuse_re().find(&lower) {
+ group.add_signal(
+ SignalInstance::new(
+ SignalType::ExecutionFailureAuthMisuse,
+ i,
+ snippet_around(value, m, 50),
+ )
+ .with_confidence(0.8)
+ .with_metadata(json!({
+ "tool_name": tool_name,
+ "call_index": call_index,
+ "error_type": "auth_misuse",
+ "matched": m.as_str(),
+ })),
+ );
+ continue;
+ }
+
+ if let Some(m) = state_error_re().find(&lower) {
+ group.add_signal(
+ SignalInstance::new(
+ SignalType::ExecutionFailureStateError,
+ i,
+ snippet_around(value, m, 50),
+ )
+ .with_confidence(0.85)
+ .with_metadata(json!({
+ "tool_name": tool_name,
+ "call_index": call_index,
+ "error_type": "state_error",
+ "matched": m.as_str(),
+ })),
+ );
+ continue;
+ }
+
+ if let Some(m) = bad_query_re().find(&lower) {
+ let confidence = if ["error", "invalid", "failed"]
+ .iter()
+ .any(|w| lower.contains(w))
+ {
+ 0.8
+ } else {
+ 0.6
+ };
+ group.add_signal(
+ SignalInstance::new(
+ SignalType::ExecutionFailureBadQuery,
+ i,
+ snippet_around(value, m, 50),
+ )
+ .with_confidence(confidence)
+ .with_metadata(json!({
+ "tool_name": tool_name,
+ "call_index": call_index,
+ "error_type": "bad_query",
+ "matched": m.as_str(),
+ })),
+ );
+ }
+ }
+
+ group
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ fn fc(value: &str) -> ShareGptMessage<'_> {
+ ShareGptMessage {
+ from: "function_call",
+ value,
+ }
+ }
+ fn obs(value: &str) -> ShareGptMessage<'_> {
+ ShareGptMessage {
+ from: "observation",
+ value,
+ }
+ }
+
+ #[test]
+ fn detects_invalid_args() {
+ let msgs = vec![
+ fc(r#"{"name":"create_user","arguments":{"age":"twelve"}}"#),
+ obs("Error: validation failed - expected integer got string for field age"),
+ ];
+ let g = analyze_failure(&msgs);
+ assert!(g
+ .signals
+ .iter()
+ .any(|s| matches!(s.signal_type, SignalType::ExecutionFailureInvalidArgs)));
+ }
+
+ #[test]
+ fn detects_tool_not_found() {
+ let msgs = vec![
+ fc(r#"{"name":"send_thought","arguments":{}}"#),
+ obs("Error: unknown function 'send_thought'"),
+ ];
+ let g = analyze_failure(&msgs);
+ assert!(g
+ .signals
+ .iter()
+ .any(|s| matches!(s.signal_type, SignalType::ExecutionFailureToolNotFound)));
+ }
+
+ #[test]
+ fn detects_auth_misuse() {
+ let msgs = vec![
+ fc(r#"{"name":"get_secret","arguments":{}}"#),
+ obs("HTTP 401 Unauthorized"),
+ ];
+ let g = analyze_failure(&msgs);
+ assert!(g
+ .signals
+ .iter()
+ .any(|s| matches!(s.signal_type, SignalType::ExecutionFailureAuthMisuse)));
+ }
+
+ #[test]
+ fn detects_state_error() {
+ let msgs = vec![
+ fc(r#"{"name":"commit_tx","arguments":{}}"#),
+ obs("must call begin_tx first"),
+ ];
+ let g = analyze_failure(&msgs);
+ assert!(g
+ .signals
+ .iter()
+ .any(|s| matches!(s.signal_type, SignalType::ExecutionFailureStateError)));
+ }
+}
diff --git a/crates/brightstaff/src/signals/execution/loops.rs b/crates/brightstaff/src/signals/execution/loops.rs
new file mode 100644
index 00000000..70b90e83
--- /dev/null
+++ b/crates/brightstaff/src/signals/execution/loops.rs
@@ -0,0 +1,433 @@
+//! Execution loops detector. Direct port of `signals/execution/loops.py`.
+
+use serde_json::json;
+
+use crate::signals::analyzer::ShareGptMessage;
+use crate::signals::schemas::{SignalGroup, SignalInstance, SignalType};
+
+pub const RETRY_THRESHOLD: usize = 3;
+pub const PARAMETER_DRIFT_THRESHOLD: usize = 3;
+pub const OSCILLATION_CYCLES_THRESHOLD: usize = 3;
+
+#[derive(Debug, Clone)]
+pub struct ToolCall {
+ pub index: usize,
+ pub name: String,
+ /// Canonical JSON string of arguments (sorted keys when parseable).
+ pub args: String,
+ pub args_dict: Option>,
+}
+
+impl ToolCall {
+ pub fn args_equal(&self, other: &ToolCall) -> bool {
+ match (&self.args_dict, &other.args_dict) {
+ (Some(a), Some(b)) => a == b,
+ _ => self.args == other.args,
+ }
+ }
+}
+
+fn parse_tool_call(index: usize, msg: &ShareGptMessage<'_>) -> Option {
+ if msg.from != "function_call" {
+ return None;
+ }
+ let value = msg.value;
+
+ if let Ok(parsed) = serde_json::from_str::(value) {
+ if let Some(obj) = parsed.as_object() {
+ let name = obj
+ .get("name")
+ .or_else(|| obj.get("function"))
+ .and_then(|v| v.as_str())
+ .map(|s| s.to_string())
+ .unwrap_or_else(|| "unknown".to_string());
+ let raw_args = obj.get("arguments").or_else(|| obj.get("args"));
+ let (args_str, args_dict) = match raw_args {
+ Some(serde_json::Value::Object(o)) => {
+ let mut keys: Vec<&String> = o.keys().collect();
+ keys.sort();
+ let mut canon = serde_json::Map::new();
+ for k in keys {
+ canon.insert(k.clone(), o[k].clone());
+ }
+ (
+ serde_json::to_string(&serde_json::Value::Object(canon.clone()))
+ .unwrap_or_default(),
+ Some(canon),
+ )
+ }
+ Some(other) => (
+ other
+ .as_str()
+ .map(|s| s.to_string())
+ .unwrap_or_else(|| serde_json::to_string(other).unwrap_or_default()),
+ None,
+ ),
+ None => (String::new(), None),
+ };
+ return Some(ToolCall {
+ index,
+ name,
+ args: args_str,
+ args_dict,
+ });
+ }
+ }
+
+ if let Some(paren) = value.find('(') {
+ if paren > 0 {
+ let name = value[..paren].trim().to_string();
+ let args_part = &value[paren..];
+ if args_part.starts_with('(') && args_part.ends_with(')') {
+ let inner = args_part[1..args_part.len() - 1].trim();
+ if let Ok(serde_json::Value::Object(o)) =
+ serde_json::from_str::(inner)
+ {
+ let mut keys: Vec<&String> = o.keys().collect();
+ keys.sort();
+ let mut canon = serde_json::Map::new();
+ for k in keys {
+ canon.insert(k.clone(), o[k].clone());
+ }
+ return Some(ToolCall {
+ index,
+ name,
+ args: serde_json::to_string(&serde_json::Value::Object(canon.clone()))
+ .unwrap_or_default(),
+ args_dict: Some(canon),
+ });
+ }
+ return Some(ToolCall {
+ index,
+ name,
+ args: inner.to_string(),
+ args_dict: None,
+ });
+ }
+ return Some(ToolCall {
+ index,
+ name,
+ args: args_part.to_string(),
+ args_dict: None,
+ });
+ }
+ }
+
+ Some(ToolCall {
+ index,
+ name: value.trim().to_string(),
+ args: String::new(),
+ args_dict: None,
+ })
+}
+
+fn extract_tool_calls(messages: &[ShareGptMessage<'_>]) -> Vec {
+ let mut out = Vec::new();
+ for (i, msg) in messages.iter().enumerate() {
+ if let Some(c) = parse_tool_call(i, msg) {
+ out.push(c);
+ }
+ }
+ out
+}
+
+fn detect_retry(calls: &[ToolCall]) -> Vec<(usize, usize, String)> {
+ if calls.len() < RETRY_THRESHOLD {
+ return Vec::new();
+ }
+ let mut patterns = Vec::new();
+ let mut i = 0;
+ while i < calls.len() {
+ let current = &calls[i];
+ let mut j = i + 1;
+ let mut run_length = 1;
+ while j < calls.len() {
+ if calls[j].name == current.name && calls[j].args_equal(current) {
+ run_length += 1;
+ j += 1;
+ } else {
+ break;
+ }
+ }
+ if run_length >= RETRY_THRESHOLD {
+ patterns.push((calls[i].index, calls[j - 1].index, current.name.clone()));
+ i = j;
+ } else {
+ i += 1;
+ }
+ }
+ patterns
+}
+
+fn detect_parameter_drift(calls: &[ToolCall]) -> Vec<(usize, usize, String, usize)> {
+ if calls.len() < PARAMETER_DRIFT_THRESHOLD {
+ return Vec::new();
+ }
+ let mut patterns = Vec::new();
+ let mut i = 0;
+ while i < calls.len() {
+ let current_name = calls[i].name.clone();
+ let mut seen_args: Vec = vec![calls[i].args.clone()];
+ let mut unique_args = 1;
+ let mut j = i + 1;
+ while j < calls.len() {
+ if calls[j].name != current_name {
+ break;
+ }
+ if !seen_args.iter().any(|a| a == &calls[j].args) {
+ seen_args.push(calls[j].args.clone());
+ unique_args += 1;
+ }
+ j += 1;
+ }
+ let run_length = j - i;
+ if run_length >= PARAMETER_DRIFT_THRESHOLD && unique_args >= 2 {
+ patterns.push((
+ calls[i].index,
+ calls[j - 1].index,
+ current_name,
+ unique_args,
+ ));
+ i = j;
+ } else {
+ i += 1;
+ }
+ }
+ patterns
+}
+
+fn detect_oscillation(calls: &[ToolCall]) -> Vec<(usize, usize, Vec, usize)> {
+ let min_calls = 2 * OSCILLATION_CYCLES_THRESHOLD;
+ if calls.len() < min_calls {
+ return Vec::new();
+ }
+ let mut patterns = Vec::new();
+ let mut i: usize = 0;
+ while i + min_calls <= calls.len() {
+ let max_pat_len = (5usize).min(calls.len() - i);
+ let mut found_for_i = false;
+ for pat_len in 2..=max_pat_len {
+ let pattern_names: Vec =
+ (0..pat_len).map(|k| calls[i + k].name.clone()).collect();
+ let unique: std::collections::HashSet<&String> = pattern_names.iter().collect();
+ if unique.len() < 2 {
+ continue;
+ }
+ let mut cycles = 1;
+ let mut pos = i + pat_len;
+ while pos + pat_len <= calls.len() {
+ let mut all_match = true;
+ for k in 0..pat_len {
+ if calls[pos + k].name != pattern_names[k] {
+ all_match = false;
+ break;
+ }
+ }
+ if all_match {
+ cycles += 1;
+ pos += pat_len;
+ } else {
+ break;
+ }
+ }
+ if cycles >= OSCILLATION_CYCLES_THRESHOLD {
+ let end_idx_in_calls = i + (cycles * pat_len) - 1;
+ patterns.push((
+ calls[i].index,
+ calls[end_idx_in_calls].index,
+ pattern_names,
+ cycles,
+ ));
+ // Mirror Python: `i = end_idx + 1 - pattern_len`. We set `i` so that
+ // the next outer iteration begins after we account for overlap.
+ i = end_idx_in_calls + 1 - pat_len;
+ found_for_i = true;
+ break;
+ }
+ }
+ if !found_for_i {
+ i += 1;
+ } else {
+ // Match Python's `i = end_idx + 1 - pattern_len; break` then loop.
+ // We'll continue; the outer while re-checks i.
+ }
+ }
+ if patterns.len() > 1 {
+ patterns = deduplicate_patterns(patterns);
+ }
+ patterns
+}
+
+fn deduplicate_patterns(
+ mut patterns: Vec<(usize, usize, Vec, usize)>,
+) -> Vec<(usize, usize, Vec, usize)> {
+ if patterns.is_empty() {
+ return patterns;
+ }
+ patterns.sort_by(|a, b| {
+ let ord = a.0.cmp(&b.0);
+ if ord != std::cmp::Ordering::Equal {
+ ord
+ } else {
+ (b.1 - b.0).cmp(&(a.1 - a.0))
+ }
+ });
+ let mut result = Vec::new();
+ let mut last_end: i64 = -1;
+ for p in patterns {
+ if (p.0 as i64) > last_end {
+ last_end = p.1 as i64;
+ result.push(p);
+ }
+ }
+ result
+}
+
+pub fn analyze_loops(messages: &[ShareGptMessage<'_>]) -> SignalGroup {
+ let mut group = SignalGroup::new("loops");
+ let calls = extract_tool_calls(messages);
+ if calls.len() < RETRY_THRESHOLD {
+ return group;
+ }
+
+ let retries = detect_retry(&calls);
+ for (start_idx, end_idx, tool_name) in &retries {
+ let call_count = calls
+ .iter()
+ .filter(|c| *start_idx <= c.index && c.index <= *end_idx)
+ .count();
+ group.add_signal(
+ SignalInstance::new(
+ SignalType::ExecutionLoopsRetry,
+ *start_idx,
+ format!(
+ "Tool '{}' called {} times with identical arguments",
+ tool_name, call_count
+ ),
+ )
+ .with_confidence(0.95)
+ .with_metadata(json!({
+ "tool_name": tool_name,
+ "start_index": start_idx,
+ "end_index": end_idx,
+ "call_count": call_count,
+ "loop_type": "retry",
+ })),
+ );
+ }
+
+ let drifts = detect_parameter_drift(&calls);
+ for (start_idx, end_idx, tool_name, variation_count) in &drifts {
+ let overlaps_retry = retries
+ .iter()
+ .any(|r| !(*end_idx < r.0 || *start_idx > r.1));
+ if overlaps_retry {
+ continue;
+ }
+ let call_count = calls
+ .iter()
+ .filter(|c| *start_idx <= c.index && c.index <= *end_idx)
+ .count();
+ group.add_signal(
+ SignalInstance::new(
+ SignalType::ExecutionLoopsParameterDrift,
+ *start_idx,
+ format!(
+ "Tool '{}' called {} times with {} different argument variations",
+ tool_name, call_count, variation_count
+ ),
+ )
+ .with_confidence(0.85)
+ .with_metadata(json!({
+ "tool_name": tool_name,
+ "start_index": start_idx,
+ "end_index": end_idx,
+ "call_count": call_count,
+ "variation_count": variation_count,
+ "loop_type": "parameter_drift",
+ })),
+ );
+ }
+
+ let oscillations = detect_oscillation(&calls);
+ for (start_idx, end_idx, tool_names, cycle_count) in &oscillations {
+ let pattern_str = tool_names.join(" \u{2192} ");
+ group.add_signal(
+ SignalInstance::new(
+ SignalType::ExecutionLoopsOscillation,
+ *start_idx,
+ format!(
+ "Oscillation pattern [{}] repeated {} times",
+ pattern_str, cycle_count
+ ),
+ )
+ .with_confidence(0.9)
+ .with_metadata(json!({
+ "pattern": tool_names,
+ "start_index": start_idx,
+ "end_index": end_idx,
+ "cycle_count": cycle_count,
+ "loop_type": "oscillation",
+ })),
+ );
+ }
+
+ group
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ fn fc(value: &str) -> ShareGptMessage<'_> {
+ ShareGptMessage {
+ from: "function_call",
+ value,
+ }
+ }
+
+ #[test]
+ fn detects_retry_loop() {
+ let arg = r#"{"name":"check_status","arguments":{"id":"abc"}}"#;
+ let msgs = vec![fc(arg), fc(arg), fc(arg), fc(arg)];
+ let g = analyze_loops(&msgs);
+ assert!(g
+ .signals
+ .iter()
+ .any(|s| matches!(s.signal_type, SignalType::ExecutionLoopsRetry)));
+ }
+
+ #[test]
+ fn detects_parameter_drift() {
+ let msgs = vec![
+ fc(r#"{"name":"search","arguments":{"q":"a"}}"#),
+ fc(r#"{"name":"search","arguments":{"q":"ab"}}"#),
+ fc(r#"{"name":"search","arguments":{"q":"abc"}}"#),
+ fc(r#"{"name":"search","arguments":{"q":"abcd"}}"#),
+ ];
+ let g = analyze_loops(&msgs);
+ assert!(g
+ .signals
+ .iter()
+ .any(|s| matches!(s.signal_type, SignalType::ExecutionLoopsParameterDrift)));
+ }
+
+ #[test]
+ fn detects_oscillation() {
+ let a = r#"{"name":"toolA","arguments":{}}"#;
+ let b = r#"{"name":"toolB","arguments":{}}"#;
+ let msgs = vec![fc(a), fc(b), fc(a), fc(b), fc(a), fc(b)];
+ let g = analyze_loops(&msgs);
+ assert!(g
+ .signals
+ .iter()
+ .any(|s| matches!(s.signal_type, SignalType::ExecutionLoopsOscillation)));
+ }
+
+ #[test]
+ fn no_signals_when_few_calls() {
+ let msgs = vec![fc(r#"{"name":"only_once","arguments":{}}"#)];
+ let g = analyze_loops(&msgs);
+ assert!(g.signals.is_empty());
+ }
+}
diff --git a/crates/brightstaff/src/signals/execution/mod.rs b/crates/brightstaff/src/signals/execution/mod.rs
new file mode 100644
index 00000000..87dc28c4
--- /dev/null
+++ b/crates/brightstaff/src/signals/execution/mod.rs
@@ -0,0 +1,5 @@
+//! Execution signals: failure (agent-caused tool errors) and loops
+//! (repetitive tool-call behavior).
+
+pub mod failure;
+pub mod loops;
diff --git a/crates/brightstaff/src/signals/interaction/constants.rs b/crates/brightstaff/src/signals/interaction/constants.rs
new file mode 100644
index 00000000..2301395c
--- /dev/null
+++ b/crates/brightstaff/src/signals/interaction/constants.rs
@@ -0,0 +1,193 @@
+//! Shared constants for the interaction layer detectors.
+//!
+//! Direct port of `signals/interaction/constants.py`.
+
+use std::collections::HashSet;
+use std::sync::OnceLock;
+
+pub const POSITIVE_PREFIXES: &[&str] = &[
+ "yes",
+ "yeah",
+ "yep",
+ "yup",
+ "sure",
+ "ok",
+ "okay",
+ "great",
+ "awesome",
+ "perfect",
+ "thanks",
+ "thank",
+ "wonderful",
+ "excellent",
+ "amazing",
+ "nice",
+ "good",
+ "cool",
+ "absolutely",
+ "definitely",
+ "please",
+];
+
+pub const CONFIRMATION_PREFIXES: &[&str] = &[
+ "yes",
+ "yeah",
+ "yep",
+ "yup",
+ "correct",
+ "right",
+ "that's correct",
+ "thats correct",
+ "that's right",
+ "thats right",
+ "that is correct",
+ "that is right",
+];
+
+const STOPWORD_LIST: &[&str] = &[
+ "a",
+ "about",
+ "above",
+ "after",
+ "again",
+ "against",
+ "all",
+ "am",
+ "an",
+ "and",
+ "any",
+ "are",
+ "as",
+ "at",
+ "be",
+ "because",
+ "been",
+ "before",
+ "being",
+ "below",
+ "between",
+ "both",
+ "but",
+ "by",
+ "can",
+ "could",
+ "did",
+ "do",
+ "does",
+ "doing",
+ "down",
+ "during",
+ "each",
+ "few",
+ "for",
+ "from",
+ "further",
+ "had",
+ "has",
+ "have",
+ "having",
+ "he",
+ "her",
+ "here",
+ "hers",
+ "herself",
+ "him",
+ "himself",
+ "his",
+ "how",
+ "i",
+ "if",
+ "in",
+ "into",
+ "is",
+ "it",
+ "its",
+ "itself",
+ "just",
+ "me",
+ "more",
+ "most",
+ "my",
+ "myself",
+ "no",
+ "nor",
+ "not",
+ "now",
+ "of",
+ "off",
+ "on",
+ "once",
+ "only",
+ "or",
+ "other",
+ "our",
+ "ours",
+ "ourselves",
+ "out",
+ "over",
+ "own",
+ "same",
+ "she",
+ "should",
+ "so",
+ "some",
+ "such",
+ "than",
+ "that",
+ "the",
+ "their",
+ "theirs",
+ "them",
+ "themselves",
+ "then",
+ "there",
+ "these",
+ "they",
+ "this",
+ "those",
+ "through",
+ "to",
+ "too",
+ "under",
+ "until",
+ "up",
+ "very",
+ "was",
+ "we",
+ "were",
+ "what",
+ "when",
+ "where",
+ "which",
+ "while",
+ "who",
+ "whom",
+ "why",
+ "with",
+ "would",
+ "you",
+ "your",
+ "yours",
+ "yourself",
+ "yourselves",
+];
+
+pub fn stopwords() -> &'static HashSet<&'static str> {
+ static SET: OnceLock> = OnceLock::new();
+ SET.get_or_init(|| STOPWORD_LIST.iter().copied().collect())
+}
+
+/// Returns true if `text` (case-insensitive, trimmed) starts with any of the
+/// given prefixes treated as **whole tokens or token sequences**. This matches
+/// the Python's `text_lower.startswith(prefix)` plus the natural intent that
+/// `"please"` shouldn't fire on `"pleased"`.
+pub fn starts_with_prefix(text: &str, prefixes: &[&str]) -> bool {
+ let lowered = text.to_lowercase();
+ let trimmed = lowered.trim_start();
+ for prefix in prefixes {
+ if trimmed.starts_with(prefix) {
+ return true;
+ }
+ }
+ false
+}
diff --git a/crates/brightstaff/src/signals/interaction/disengagement.rs b/crates/brightstaff/src/signals/interaction/disengagement.rs
new file mode 100644
index 00000000..28711d18
--- /dev/null
+++ b/crates/brightstaff/src/signals/interaction/disengagement.rs
@@ -0,0 +1,445 @@
+//! Disengagement signals: escalation, quit, negative stance.
+//!
+//! Direct port of `signals/interaction/disengagement.py`.
+
+use std::sync::OnceLock;
+
+use regex::Regex;
+use serde_json::json;
+
+use super::constants::{starts_with_prefix, POSITIVE_PREFIXES};
+use crate::signals::schemas::{SignalGroup, SignalInstance, SignalType};
+use crate::signals::text_processing::{normalize_patterns, NormalizedMessage, NormalizedPattern};
+
+const ESCALATION_PATTERN_TEXTS: &[&str] = &[
+ // Human requests
+ "speak to a human",
+ "talk to a human",
+ "connect me to a human",
+ "connect me with a human",
+ "transfer me to a human",
+ "get me a human",
+ "chat with a human",
+ // Person requests
+ "speak to a person",
+ "talk to a person",
+ "connect me to a person",
+ "connect me with a person",
+ "transfer me to a person",
+ "get me a person",
+ "chat with a person",
+ // Real person requests
+ "speak to a real person",
+ "talk to a real person",
+ "connect me to a real person",
+ "connect me with a real person",
+ "transfer me to a real person",
+ "get me a real person",
+ "chat with a real person",
+ // Actual person requests
+ "speak to an actual person",
+ "talk to an actual person",
+ "connect me to an actual person",
+ "connect me with an actual person",
+ "transfer me to an actual person",
+ "get me an actual person",
+ "chat with an actual person",
+ // Supervisor requests
+ "speak to a supervisor",
+ "talk to a supervisor",
+ "connect me to a supervisor",
+ "connect me with a supervisor",
+ "transfer me to a supervisor",
+ "get me a supervisor",
+ "chat with a supervisor",
+ // Manager requests
+ "speak to a manager",
+ "talk to a manager",
+ "connect me to a manager",
+ "connect me with a manager",
+ "transfer me to a manager",
+ "get me a manager",
+ "chat with a manager",
+ // Customer service requests
+ "speak to customer service",
+ "talk to customer service",
+ "connect me to customer service",
+ "connect me with customer service",
+ "transfer me to customer service",
+ "get me customer service",
+ "chat with customer service",
+ // Customer support requests
+ "speak to customer support",
+ "talk to customer support",
+ "connect me to customer support",
+ "connect me with customer support",
+ "transfer me to customer support",
+ "get me customer support",
+ "chat with customer support",
+ // Support requests
+ "speak to support",
+ "talk to support",
+ "connect me to support",
+ "connect me with support",
+ "transfer me to support",
+ "get me support",
+ "chat with support",
+ // Tech support requests
+ "speak to tech support",
+ "talk to tech support",
+ "connect me to tech support",
+ "connect me with tech support",
+ "transfer me to tech support",
+ "get me tech support",
+ "chat with tech support",
+ // Help desk requests
+ "speak to help desk",
+ "talk to help desk",
+ "connect me to help desk",
+ "connect me with help desk",
+ "transfer me to help desk",
+ "get me help desk",
+ "chat with help desk",
+ // Explicit escalation
+ "escalate this",
+];
+
+const QUIT_PATTERN_TEXTS: &[&str] = &[
+ "i give up",
+ "i'm giving up",
+ "im giving up",
+ "i'm going to quit",
+ "i quit",
+ "forget it",
+ "forget this",
+ "screw it",
+ "screw this",
+ "don't bother trying",
+ "don't bother with this",
+ "don't bother with it",
+ "don't even bother",
+ "why bother",
+ "not worth it",
+ "this is hopeless",
+ "going elsewhere",
+ "try somewhere else",
+ "look elsewhere",
+];
+
+const NEGATIVE_STANCE_PATTERN_TEXTS: &[&str] = &[
+ "this is useless",
+ "not helpful",
+ "doesn't help",
+ "not helping",
+ "you're not helping",
+ "youre not helping",
+ "this doesn't work",
+ "this doesnt work",
+ "this isn't working",
+ "this isnt working",
+ "still doesn't work",
+ "still doesnt work",
+ "still not working",
+ "still isn't working",
+ "still isnt working",
+ "waste of time",
+ "wasting my time",
+ "this is ridiculous",
+ "this is absurd",
+ "this is insane",
+ "this is stupid",
+ "this is dumb",
+ "this sucks",
+ "this is frustrating",
+ "not good enough",
+ "why can't you",
+ "why cant you",
+ "same issue",
+ "did that already",
+ "done that already",
+ "tried that already",
+ "already tried that",
+ "i've done that",
+ "ive done that",
+ "i've tried that",
+ "ive tried that",
+ "i'm disappointed",
+ "im disappointed",
+ "disappointed with you",
+ "disappointed in you",
+ "useless bot",
+ "dumb bot",
+ "stupid bot",
+];
+
+const AGENT_DIRECTED_PROFANITY_PATTERN_TEXTS: &[&str] = &[
+ "this is bullshit",
+ "what bullshit",
+ "such bullshit",
+ "total bullshit",
+ "complete bullshit",
+ "this is crap",
+ "what crap",
+ "this is shit",
+ "what the hell is wrong with you",
+ "what the fuck is wrong with you",
+ "you're fucking useless",
+ "youre fucking useless",
+ "you are fucking useless",
+ "fucking useless",
+ "this bot is shit",
+ "this bot is crap",
+ "damn bot",
+ "fucking bot",
+ "stupid fucking",
+ "are you fucking kidding",
+ "wtf is wrong with you",
+ "wtf is this",
+ "ffs just",
+ "for fucks sake",
+ "for fuck's sake",
+ "what the f**k",
+ "what the f*ck",
+ "what the f***",
+ "that's bullsh*t",
+ "thats bullsh*t",
+ "that's bull***t",
+ "thats bull***t",
+ "that's bs",
+ "thats bs",
+ "this is bullsh*t",
+ "this is bull***t",
+ "this is bs",
+];
+
+fn escalation_patterns() -> &'static Vec {
+ static PATS: OnceLock> = OnceLock::new();
+ PATS.get_or_init(|| normalize_patterns(ESCALATION_PATTERN_TEXTS))
+}
+
+fn quit_patterns() -> &'static Vec {
+ static PATS: OnceLock> = OnceLock::new();
+ PATS.get_or_init(|| normalize_patterns(QUIT_PATTERN_TEXTS))
+}
+
+fn negative_stance_patterns() -> &'static Vec {
+ static PATS: OnceLock> = OnceLock::new();
+ PATS.get_or_init(|| normalize_patterns(NEGATIVE_STANCE_PATTERN_TEXTS))
+}
+
+fn profanity_patterns() -> &'static Vec {
+ static PATS: OnceLock> = OnceLock::new();
+ PATS.get_or_init(|| normalize_patterns(AGENT_DIRECTED_PROFANITY_PATTERN_TEXTS))
+}
+
+fn re_consecutive_q() -> &'static Regex {
+ static R: OnceLock = OnceLock::new();
+ R.get_or_init(|| Regex::new(r"\?{2,}").unwrap())
+}
+fn re_consecutive_e() -> &'static Regex {
+ static R: OnceLock = OnceLock::new();
+ R.get_or_init(|| Regex::new(r"!{2,}").unwrap())
+}
+fn re_mixed_punct() -> &'static Regex {
+ static R: OnceLock = OnceLock::new();
+ R.get_or_init(|| Regex::new(r"[?!]{3,}").unwrap())
+}
+
+pub fn analyze_disengagement(
+ normalized_messages: &[(usize, &str, NormalizedMessage)],
+ char_ngram_threshold: f32,
+ token_cosine_threshold: f32,
+) -> SignalGroup {
+ let mut group = SignalGroup::new("disengagement");
+
+ for (idx, role, norm_msg) in normalized_messages {
+ if *role != "human" {
+ continue;
+ }
+
+ let text = &norm_msg.raw;
+
+ // All-caps shouting check.
+ let alpha_chars: String = text.chars().filter(|c| c.is_alphabetic()).collect();
+ if alpha_chars.chars().count() >= 10 {
+ let upper_count = alpha_chars.chars().filter(|c| c.is_uppercase()).count();
+ let upper_ratio = upper_count as f32 / alpha_chars.chars().count() as f32;
+ if upper_ratio >= 0.8 {
+ let snippet: String = text.chars().take(50).collect();
+ group.add_signal(
+ SignalInstance::new(SignalType::DisengagementNegativeStance, *idx, snippet)
+ .with_metadata(json!({
+ "indicator_type": "all_caps",
+ "upper_ratio": upper_ratio,
+ })),
+ );
+ }
+ }
+
+ // Excessive consecutive punctuation.
+ let starts_with_positive = starts_with_prefix(text, POSITIVE_PREFIXES);
+ let cq = re_consecutive_q().find_iter(text).count();
+ let ce = re_consecutive_e().find_iter(text).count();
+ let mixed = re_mixed_punct().find_iter(text).count();
+ if !starts_with_positive && (cq >= 1 || ce >= 1 || mixed >= 1) {
+ let snippet: String = text.chars().take(50).collect();
+ group.add_signal(
+ SignalInstance::new(SignalType::DisengagementNegativeStance, *idx, snippet)
+ .with_metadata(json!({
+ "indicator_type": "excessive_punctuation",
+ "consecutive_questions": cq,
+ "consecutive_exclamations": ce,
+ "mixed_punctuation": mixed,
+ })),
+ );
+ }
+
+ // Escalation patterns.
+ let mut found_escalation = false;
+ for pattern in escalation_patterns() {
+ if norm_msg.matches_normalized_pattern(
+ pattern,
+ char_ngram_threshold,
+ token_cosine_threshold,
+ ) {
+ group.add_signal(
+ SignalInstance::new(
+ SignalType::DisengagementEscalation,
+ *idx,
+ pattern.raw.clone(),
+ )
+ .with_metadata(json!({"pattern_type": "escalation"})),
+ );
+ found_escalation = true;
+ break;
+ }
+ }
+
+ // Quit patterns (independent of escalation).
+ for pattern in quit_patterns() {
+ if norm_msg.matches_normalized_pattern(
+ pattern,
+ char_ngram_threshold,
+ token_cosine_threshold,
+ ) {
+ group.add_signal(
+ SignalInstance::new(SignalType::DisengagementQuit, *idx, pattern.raw.clone())
+ .with_metadata(json!({"pattern_type": "quit"})),
+ );
+ break;
+ }
+ }
+
+ // Profanity (more specific) before generic negative stance.
+ let mut found_profanity = false;
+ for pattern in profanity_patterns() {
+ if norm_msg.matches_normalized_pattern(
+ pattern,
+ char_ngram_threshold,
+ token_cosine_threshold,
+ ) {
+ group.add_signal(
+ SignalInstance::new(
+ SignalType::DisengagementNegativeStance,
+ *idx,
+ pattern.raw.clone(),
+ )
+ .with_metadata(json!({
+ "indicator_type": "profanity",
+ "pattern": pattern.raw,
+ })),
+ );
+ found_profanity = true;
+ break;
+ }
+ }
+
+ if !found_escalation && !found_profanity {
+ for pattern in negative_stance_patterns() {
+ if norm_msg.matches_normalized_pattern(
+ pattern,
+ char_ngram_threshold,
+ token_cosine_threshold,
+ ) {
+ group.add_signal(
+ SignalInstance::new(
+ SignalType::DisengagementNegativeStance,
+ *idx,
+ pattern.raw.clone(),
+ )
+ .with_metadata(json!({
+ "indicator_type": "complaint",
+ "pattern": pattern.raw,
+ })),
+ );
+ break;
+ }
+ }
+ }
+ }
+
+ group
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ fn nm(s: &str) -> NormalizedMessage {
+ NormalizedMessage::from_text(s, 2000)
+ }
+
+ #[test]
+ fn detects_human_escalation_request() {
+ let msgs = vec![(
+ 0usize,
+ "human",
+ nm("This is taking forever, get me a human"),
+ )];
+ let g = analyze_disengagement(&msgs, 0.65, 0.6);
+ assert!(g
+ .signals
+ .iter()
+ .any(|s| matches!(s.signal_type, SignalType::DisengagementEscalation)));
+ }
+
+ #[test]
+ fn detects_quit_intent() {
+ let msgs = vec![(0usize, "human", nm("Forget it, I give up"))];
+ let g = analyze_disengagement(&msgs, 0.65, 0.6);
+ assert!(g
+ .signals
+ .iter()
+ .any(|s| matches!(s.signal_type, SignalType::DisengagementQuit)));
+ }
+
+ #[test]
+ fn detects_negative_stance_complaint() {
+ let msgs = vec![(0usize, "human", nm("This is useless"))];
+ let g = analyze_disengagement(&msgs, 0.65, 0.6);
+ assert!(g
+ .signals
+ .iter()
+ .any(|s| matches!(s.signal_type, SignalType::DisengagementNegativeStance)));
+ }
+
+ #[test]
+ fn detects_excessive_punctuation_as_negative_stance() {
+ let msgs = vec![(0usize, "human", nm("WHY isn't this working???"))];
+ let g = analyze_disengagement(&msgs, 0.65, 0.6);
+ assert!(g
+ .signals
+ .iter()
+ .any(|s| matches!(s.signal_type, SignalType::DisengagementNegativeStance)));
+ }
+
+ #[test]
+ fn positive_excitement_is_not_disengagement() {
+ let msgs = vec![(0usize, "human", nm("Yes!! That's perfect!!!"))];
+ let g = analyze_disengagement(&msgs, 0.65, 0.6);
+ assert!(g
+ .signals
+ .iter()
+ .all(|s| !matches!(s.signal_type, SignalType::DisengagementNegativeStance)));
+ }
+}
diff --git a/crates/brightstaff/src/signals/interaction/misalignment.rs b/crates/brightstaff/src/signals/interaction/misalignment.rs
new file mode 100644
index 00000000..3dcf3ddd
--- /dev/null
+++ b/crates/brightstaff/src/signals/interaction/misalignment.rs
@@ -0,0 +1,338 @@
+//! Misalignment signals: corrections, rephrases, clarifications.
+//!
+//! Direct port of `signals/interaction/misalignment.py`.
+
+use std::sync::OnceLock;
+
+use serde_json::json;
+
+use super::constants::{stopwords, CONFIRMATION_PREFIXES};
+use crate::signals::schemas::{SignalGroup, SignalInstance, SignalType};
+use crate::signals::text_processing::{normalize_patterns, NormalizedMessage, NormalizedPattern};
+
+const CORRECTION_PATTERN_TEXTS: &[&str] = &[
+ "no, i meant",
+ "no i meant",
+ "no, i said",
+ "no i said",
+ "no, i asked",
+ "no i asked",
+ "nah, i meant",
+ "nope, i meant",
+ "not what i said",
+ "not what i asked",
+ "that's not what i said",
+ "that's not what i asked",
+ "that's not what i meant",
+ "thats not what i said",
+ "thats not what i asked",
+ "thats not what i meant",
+ "that's not what you",
+ "no that's not what i",
+ "no, that's not what i",
+ "you're not quite right",
+ "youre not quite right",
+ "you're not exactly right",
+ "youre not exactly right",
+ "you're wrong about",
+ "youre wrong about",
+ "i just said",
+ "i already said",
+ "i already told you",
+];
+
+const REPHRASE_PATTERN_TEXTS: &[&str] = &[
+ "let me rephrase",
+ "let me explain again",
+ "what i'm trying to say",
+ "what i'm saying is",
+ "in other words",
+];
+
+const CLARIFICATION_PATTERN_TEXTS: &[&str] = &[
+ "i don't understand",
+ "don't understand",
+ "not understanding",
+ "can't understand",
+ "don't get it",
+ "don't follow",
+ "i'm confused",
+ "so confused",
+ "makes no sense",
+ "doesn't make sense",
+ "not making sense",
+ "what do you mean",
+ "what does that mean",
+ "what are you saying",
+ "i'm lost",
+ "totally lost",
+ "lost me",
+ "no clue what you",
+ "no idea what you",
+ "no clue what that",
+ "no idea what that",
+ "come again",
+ "say that again",
+ "repeat that",
+ "trouble following",
+ "hard to follow",
+ "can't follow",
+];
+
+fn correction_patterns() -> &'static Vec {
+ static PATS: OnceLock> = OnceLock::new();
+ PATS.get_or_init(|| normalize_patterns(CORRECTION_PATTERN_TEXTS))
+}
+
+fn rephrase_patterns() -> &'static Vec {
+ static PATS: OnceLock> = OnceLock::new();
+ PATS.get_or_init(|| normalize_patterns(REPHRASE_PATTERN_TEXTS))
+}
+
+fn clarification_patterns() -> &'static Vec {
+ static PATS: OnceLock> = OnceLock::new();
+ PATS.get_or_init(|| normalize_patterns(CLARIFICATION_PATTERN_TEXTS))
+}
+
+fn is_confirmation_message(text: &str) -> bool {
+ let lowered = text.to_lowercase();
+ let trimmed = lowered.trim();
+ CONFIRMATION_PREFIXES.iter().any(|p| trimmed.starts_with(p))
+}
+
+/// Detect whether two user messages appear to be rephrases of each other.
+pub fn is_similar_rephrase(
+ norm_msg1: &NormalizedMessage,
+ norm_msg2: &NormalizedMessage,
+ overlap_threshold: f32,
+ min_meaningful_tokens: usize,
+ max_new_content_ratio: f32,
+) -> bool {
+ if norm_msg1.tokens.len() < 3 || norm_msg2.tokens.len() < 3 {
+ return false;
+ }
+ if is_confirmation_message(&norm_msg1.raw) {
+ return false;
+ }
+
+ let stops = stopwords();
+ let tokens1: std::collections::HashSet<&str> = norm_msg1
+ .tokens
+ .iter()
+ .filter(|t| !stops.contains(t.as_str()))
+ .map(|s| s.as_str())
+ .collect();
+ let tokens2: std::collections::HashSet<&str> = norm_msg2
+ .tokens
+ .iter()
+ .filter(|t| !stops.contains(t.as_str()))
+ .map(|s| s.as_str())
+ .collect();
+
+ if tokens1.len() < min_meaningful_tokens || tokens2.len() < min_meaningful_tokens {
+ return false;
+ }
+
+ let new_tokens: std::collections::HashSet<&&str> = tokens1.difference(&tokens2).collect();
+ let new_content_ratio = if tokens1.is_empty() {
+ 0.0
+ } else {
+ new_tokens.len() as f32 / tokens1.len() as f32
+ };
+ if new_content_ratio > max_new_content_ratio {
+ return false;
+ }
+
+ let intersection = tokens1.intersection(&tokens2).count();
+ let min_size = tokens1.len().min(tokens2.len());
+ if min_size == 0 {
+ return false;
+ }
+ let overlap_ratio = intersection as f32 / min_size as f32;
+ overlap_ratio >= overlap_threshold
+}
+
+/// Analyze user messages for misalignment signals.
+pub fn analyze_misalignment(
+ normalized_messages: &[(usize, &str, NormalizedMessage)],
+ char_ngram_threshold: f32,
+ token_cosine_threshold: f32,
+) -> SignalGroup {
+ let mut group = SignalGroup::new("misalignment");
+
+ let mut prev_user_idx: Option = None;
+ let mut prev_user_msg: Option<&NormalizedMessage> = None;
+
+ for (idx, role, norm_msg) in normalized_messages {
+ if *role != "human" {
+ continue;
+ }
+
+ let mut found_in_turn = false;
+
+ for pattern in correction_patterns() {
+ if norm_msg.matches_normalized_pattern(
+ pattern,
+ char_ngram_threshold,
+ token_cosine_threshold,
+ ) {
+ group.add_signal(
+ SignalInstance::new(
+ SignalType::MisalignmentCorrection,
+ *idx,
+ pattern.raw.clone(),
+ )
+ .with_metadata(json!({"pattern_type": "correction"})),
+ );
+ found_in_turn = true;
+ break;
+ }
+ }
+
+ if found_in_turn {
+ prev_user_idx = Some(*idx);
+ prev_user_msg = Some(norm_msg);
+ continue;
+ }
+
+ for pattern in rephrase_patterns() {
+ if norm_msg.matches_normalized_pattern(
+ pattern,
+ char_ngram_threshold,
+ token_cosine_threshold,
+ ) {
+ group.add_signal(
+ SignalInstance::new(
+ SignalType::MisalignmentRephrase,
+ *idx,
+ pattern.raw.clone(),
+ )
+ .with_metadata(json!({"pattern_type": "rephrase"})),
+ );
+ found_in_turn = true;
+ break;
+ }
+ }
+
+ if found_in_turn {
+ prev_user_idx = Some(*idx);
+ prev_user_msg = Some(norm_msg);
+ continue;
+ }
+
+ for pattern in clarification_patterns() {
+ if norm_msg.matches_normalized_pattern(
+ pattern,
+ char_ngram_threshold,
+ token_cosine_threshold,
+ ) {
+ group.add_signal(
+ SignalInstance::new(
+ SignalType::MisalignmentClarification,
+ *idx,
+ pattern.raw.clone(),
+ )
+ .with_metadata(json!({"pattern_type": "clarification"})),
+ );
+ found_in_turn = true;
+ break;
+ }
+ }
+
+ if found_in_turn {
+ prev_user_idx = Some(*idx);
+ prev_user_msg = Some(norm_msg);
+ continue;
+ }
+
+ // Semantic rephrase vs the previous user message (recent only).
+ if let (Some(prev_idx), Some(prev_msg)) = (prev_user_idx, prev_user_msg) {
+ let turns_between = idx.saturating_sub(prev_idx);
+ if turns_between <= 3 && is_similar_rephrase(norm_msg, prev_msg, 0.75, 4, 0.5) {
+ group.add_signal(
+ SignalInstance::new(
+ SignalType::MisalignmentRephrase,
+ *idx,
+ "[similar rephrase detected]",
+ )
+ .with_confidence(0.8)
+ .with_metadata(json!({
+ "pattern_type": "semantic_rephrase",
+ "compared_to": prev_idx,
+ })),
+ );
+ }
+ }
+
+ prev_user_idx = Some(*idx);
+ prev_user_msg = Some(norm_msg);
+ }
+
+ group
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ fn nm(s: &str) -> NormalizedMessage {
+ NormalizedMessage::from_text(s, 2000)
+ }
+
+ fn make(items: &[(&'static str, &str)]) -> Vec<(usize, &'static str, NormalizedMessage)> {
+ items
+ .iter()
+ .enumerate()
+ .map(|(i, (role, text))| (i, *role, nm(text)))
+ .collect()
+ }
+
+ #[test]
+ fn detects_explicit_correction() {
+ let msgs = make(&[
+ ("human", "Show me my orders"),
+ ("gpt", "Sure, here are your invoices"),
+ ("human", "No, I meant my recent orders"),
+ ]);
+ let g = analyze_misalignment(&msgs, 0.65, 0.6);
+ assert!(g
+ .signals
+ .iter()
+ .any(|s| matches!(s.signal_type, SignalType::MisalignmentCorrection)));
+ }
+
+ #[test]
+ fn detects_rephrase_marker() {
+ let msgs = make(&[
+ ("human", "Show me X"),
+ ("gpt", "Sure"),
+ ("human", "Let me rephrase: I want X grouped by date"),
+ ]);
+ let g = analyze_misalignment(&msgs, 0.65, 0.6);
+ assert!(g
+ .signals
+ .iter()
+ .any(|s| matches!(s.signal_type, SignalType::MisalignmentRephrase)));
+ }
+
+ #[test]
+ fn detects_clarification_request() {
+ let msgs = make(&[
+ ("human", "Run the report"),
+ ("gpt", "Foobar quux baz."),
+ ("human", "I don't understand what you mean"),
+ ]);
+ let g = analyze_misalignment(&msgs, 0.65, 0.6);
+ assert!(g
+ .signals
+ .iter()
+ .any(|s| matches!(s.signal_type, SignalType::MisalignmentClarification)));
+ }
+
+ #[test]
+ fn confirmation_is_not_a_rephrase() {
+ let m1 = nm("Yes, that's correct, please proceed with the order");
+ let m2 = nm("please proceed with the order for the same product");
+ assert!(!is_similar_rephrase(&m1, &m2, 0.75, 4, 0.5));
+ }
+}
diff --git a/crates/brightstaff/src/signals/interaction/mod.rs b/crates/brightstaff/src/signals/interaction/mod.rs
new file mode 100644
index 00000000..b60a6748
--- /dev/null
+++ b/crates/brightstaff/src/signals/interaction/mod.rs
@@ -0,0 +1,10 @@
+//! Interaction signals: misalignment, stagnation, disengagement, satisfaction.
+//!
+//! These signals capture how the dialogue itself unfolds (semantic alignment,
+//! progress, engagement, closure) independent of tool execution outcomes.
+
+pub mod constants;
+pub mod disengagement;
+pub mod misalignment;
+pub mod satisfaction;
+pub mod stagnation;
diff --git a/crates/brightstaff/src/signals/interaction/satisfaction.rs b/crates/brightstaff/src/signals/interaction/satisfaction.rs
new file mode 100644
index 00000000..ad719960
--- /dev/null
+++ b/crates/brightstaff/src/signals/interaction/satisfaction.rs
@@ -0,0 +1,177 @@
+//! Satisfaction signals: gratitude, confirmation, success.
+//!
+//! Direct port of `signals/interaction/satisfaction.py`.
+
+use std::sync::OnceLock;
+
+use serde_json::json;
+
+use crate::signals::schemas::{SignalGroup, SignalInstance, SignalType};
+use crate::signals::text_processing::{normalize_patterns, NormalizedMessage, NormalizedPattern};
+
+const GRATITUDE_PATTERN_TEXTS: &[&str] = &[
+ "that's helpful",
+ "that helps",
+ "this helps",
+ "appreciate it",
+ "appreciate that",
+ "that's perfect",
+ "exactly what i needed",
+ "just what i needed",
+ "you're the best",
+ "you rock",
+ "you're awesome",
+ "you're amazing",
+ "you're great",
+];
+
+const CONFIRMATION_PATTERN_TEXTS: &[&str] = &[
+ "that works",
+ "this works",
+ "that's great",
+ "that's amazing",
+ "this is great",
+ "that's awesome",
+ "love it",
+ "love this",
+ "love that",
+];
+
+const SUCCESS_PATTERN_TEXTS: &[&str] = &[
+ "it worked",
+ "that worked",
+ "this worked",
+ "it's working",
+ "that's working",
+ "this is working",
+];
+
+fn gratitude_patterns() -> &'static Vec {
+ static PATS: OnceLock> = OnceLock::new();
+ PATS.get_or_init(|| normalize_patterns(GRATITUDE_PATTERN_TEXTS))
+}
+
+fn confirmation_patterns() -> &'static Vec {
+ static PATS: OnceLock> = OnceLock::new();
+ PATS.get_or_init(|| normalize_patterns(CONFIRMATION_PATTERN_TEXTS))
+}
+
+fn success_patterns() -> &'static Vec {
+ static PATS: OnceLock> = OnceLock::new();
+ PATS.get_or_init(|| normalize_patterns(SUCCESS_PATTERN_TEXTS))
+}
+
+pub fn analyze_satisfaction(
+ normalized_messages: &[(usize, &str, NormalizedMessage)],
+ char_ngram_threshold: f32,
+ token_cosine_threshold: f32,
+) -> SignalGroup {
+ let mut group = SignalGroup::new("satisfaction");
+
+ for (idx, role, norm_msg) in normalized_messages {
+ if *role != "human" {
+ continue;
+ }
+
+ let mut found = false;
+
+ for pattern in gratitude_patterns() {
+ if norm_msg.matches_normalized_pattern(
+ pattern,
+ char_ngram_threshold,
+ token_cosine_threshold,
+ ) {
+ group.add_signal(
+ SignalInstance::new(
+ SignalType::SatisfactionGratitude,
+ *idx,
+ pattern.raw.clone(),
+ )
+ .with_metadata(json!({"pattern_type": "gratitude"})),
+ );
+ found = true;
+ break;
+ }
+ }
+ if found {
+ continue;
+ }
+
+ for pattern in confirmation_patterns() {
+ if norm_msg.matches_normalized_pattern(
+ pattern,
+ char_ngram_threshold,
+ token_cosine_threshold,
+ ) {
+ group.add_signal(
+ SignalInstance::new(
+ SignalType::SatisfactionConfirmation,
+ *idx,
+ pattern.raw.clone(),
+ )
+ .with_metadata(json!({"pattern_type": "confirmation"})),
+ );
+ found = true;
+ break;
+ }
+ }
+ if found {
+ continue;
+ }
+
+ for pattern in success_patterns() {
+ if norm_msg.matches_normalized_pattern(
+ pattern,
+ char_ngram_threshold,
+ token_cosine_threshold,
+ ) {
+ group.add_signal(
+ SignalInstance::new(SignalType::SatisfactionSuccess, *idx, pattern.raw.clone())
+ .with_metadata(json!({"pattern_type": "success"})),
+ );
+ break;
+ }
+ }
+ }
+
+ group
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ fn nm(s: &str) -> NormalizedMessage {
+ NormalizedMessage::from_text(s, 2000)
+ }
+
+ #[test]
+ fn detects_gratitude() {
+ let msgs = vec![(0usize, "human", nm("That's perfect, appreciate it!"))];
+ let g = analyze_satisfaction(&msgs, 0.65, 0.6);
+ assert!(g
+ .signals
+ .iter()
+ .any(|s| matches!(s.signal_type, SignalType::SatisfactionGratitude)));
+ }
+
+ #[test]
+ fn detects_confirmation() {
+ let msgs = vec![(0usize, "human", nm("That works for me, thanks"))];
+ let g = analyze_satisfaction(&msgs, 0.65, 0.6);
+ assert!(g
+ .signals
+ .iter()
+ .any(|s| matches!(s.signal_type, SignalType::SatisfactionConfirmation)));
+ }
+
+ #[test]
+ fn detects_success() {
+ let msgs = vec![(0usize, "human", nm("Great, it worked!"))];
+ let g = analyze_satisfaction(&msgs, 0.65, 0.6);
+ assert!(g
+ .signals
+ .iter()
+ .any(|s| matches!(s.signal_type, SignalType::SatisfactionSuccess)));
+ }
+}
diff --git a/crates/brightstaff/src/signals/interaction/stagnation.rs b/crates/brightstaff/src/signals/interaction/stagnation.rs
new file mode 100644
index 00000000..d7d03c80
--- /dev/null
+++ b/crates/brightstaff/src/signals/interaction/stagnation.rs
@@ -0,0 +1,241 @@
+//! Stagnation signals: dragging (turn-count efficiency) and repetition.
+//!
+//! Direct port of `signals/interaction/stagnation.py`.
+
+use serde_json::json;
+
+use super::constants::{starts_with_prefix, POSITIVE_PREFIXES};
+use crate::signals::schemas::{SignalGroup, SignalInstance, SignalType, TurnMetrics};
+use crate::signals::text_processing::NormalizedMessage;
+
+/// Adapter row used by stagnation::dragging detector. Mirrors the ShareGPT
+/// `{"from": role, "value": text}` shape used in the Python reference.
+pub struct ShareGptMsg<'a> {
+ pub from: &'a str,
+}
+
+pub fn analyze_dragging(
+ messages: &[ShareGptMsg<'_>],
+ baseline_turns: usize,
+ efficiency_threshold: f32,
+) -> (SignalGroup, TurnMetrics) {
+ let mut group = SignalGroup::new("stagnation");
+
+ let mut user_turns: usize = 0;
+ let mut assistant_turns: usize = 0;
+ for m in messages {
+ match m.from {
+ "human" => user_turns += 1,
+ "gpt" => assistant_turns += 1,
+ _ => {}
+ }
+ }
+
+ let total_turns = user_turns;
+ let efficiency_score: f32 = if total_turns == 0 || total_turns <= baseline_turns {
+ 1.0
+ } else {
+ let excess = (total_turns - baseline_turns) as f32;
+ 1.0 / (1.0 + excess * 0.25)
+ };
+
+ let is_dragging = efficiency_score < efficiency_threshold;
+ let metrics = TurnMetrics {
+ total_turns,
+ user_turns,
+ assistant_turns,
+ is_dragging,
+ efficiency_score,
+ };
+
+ if is_dragging {
+ let last_idx = messages.len().saturating_sub(1);
+ group.add_signal(
+ SignalInstance::new(
+ SignalType::StagnationDragging,
+ last_idx,
+ format!(
+ "Conversation dragging: {} turns (efficiency: {:.2})",
+ total_turns, efficiency_score
+ ),
+ )
+ .with_confidence(1.0 - efficiency_score)
+ .with_metadata(json!({
+ "total_turns": total_turns,
+ "efficiency_score": efficiency_score,
+ "baseline_turns": baseline_turns,
+ })),
+ );
+ }
+
+ (group, metrics)
+}
+
+pub fn analyze_repetition(
+ normalized_messages: &[(usize, &str, NormalizedMessage)],
+ lookback: usize,
+ exact_threshold: f32,
+ near_duplicate_threshold: f32,
+) -> SignalGroup {
+ let mut group = SignalGroup::new("stagnation");
+
+ // We keep references into `normalized_messages`. Since `normalized_messages`
+ // is borrowed for the whole function, this avoids cloning.
+ let mut prev_human: Vec<(usize, &NormalizedMessage)> = Vec::new();
+ let mut prev_gpt: Vec<(usize, &NormalizedMessage)> = Vec::new();
+
+ for (idx, role, norm_msg) in normalized_messages {
+ if *role != "human" && *role != "gpt" {
+ continue;
+ }
+
+ // Skip human positive-prefix messages; they're naturally repetitive.
+ if *role == "human" && starts_with_prefix(&norm_msg.raw, POSITIVE_PREFIXES) {
+ prev_human.push((*idx, norm_msg));
+ continue;
+ }
+
+ if norm_msg.tokens.len() < 5 {
+ if *role == "human" {
+ prev_human.push((*idx, norm_msg));
+ } else {
+ prev_gpt.push((*idx, norm_msg));
+ }
+ continue;
+ }
+
+ let prev = if *role == "human" {
+ &prev_human
+ } else {
+ &prev_gpt
+ };
+ let start = prev.len().saturating_sub(lookback);
+ let mut matched = false;
+ for (prev_idx, prev_msg) in &prev[start..] {
+ if prev_msg.tokens.len() < 5 {
+ continue;
+ }
+ let similarity = norm_msg.ngram_similarity_with_message(prev_msg);
+ if similarity >= exact_threshold {
+ group.add_signal(
+ SignalInstance::new(
+ SignalType::StagnationRepetition,
+ *idx,
+ format!("Exact repetition with message {}", prev_idx),
+ )
+ .with_confidence(similarity)
+ .with_metadata(json!({
+ "repetition_type": "exact",
+ "compared_to": prev_idx,
+ "similarity": similarity,
+ "role": role,
+ })),
+ );
+ matched = true;
+ break;
+ } else if similarity >= near_duplicate_threshold {
+ group.add_signal(
+ SignalInstance::new(
+ SignalType::StagnationRepetition,
+ *idx,
+ format!("Near-duplicate with message {}", prev_idx),
+ )
+ .with_confidence(similarity)
+ .with_metadata(json!({
+ "repetition_type": "near_duplicate",
+ "compared_to": prev_idx,
+ "similarity": similarity,
+ "role": role,
+ })),
+ );
+ matched = true;
+ break;
+ }
+ }
+ let _ = matched;
+
+ if *role == "human" {
+ prev_human.push((*idx, norm_msg));
+ } else {
+ prev_gpt.push((*idx, norm_msg));
+ }
+ }
+
+ group
+}
+
+/// Combined stagnation analyzer: dragging + repetition.
+pub fn analyze_stagnation(
+ messages: &[ShareGptMsg<'_>],
+ normalized_messages: &[(usize, &str, NormalizedMessage)],
+ baseline_turns: usize,
+) -> (SignalGroup, TurnMetrics) {
+ let (dragging_group, metrics) = analyze_dragging(messages, baseline_turns, 0.5);
+ let repetition_group = analyze_repetition(normalized_messages, 2, 0.95, 0.85);
+
+ let mut combined = SignalGroup::new("stagnation");
+ for s in dragging_group.signals.iter().cloned() {
+ combined.add_signal(s);
+ }
+ for s in repetition_group.signals.iter().cloned() {
+ combined.add_signal(s);
+ }
+ (combined, metrics)
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ fn nm(s: &str) -> NormalizedMessage {
+ NormalizedMessage::from_text(s, 2000)
+ }
+
+ #[test]
+ fn dragging_after_many_user_turns() {
+ let msgs: Vec<_> = (0..15)
+ .flat_map(|_| [ShareGptMsg { from: "human" }, ShareGptMsg { from: "gpt" }])
+ .collect();
+ let (g, m) = analyze_dragging(&msgs, 5, 0.5);
+ assert!(m.is_dragging);
+ assert!(g
+ .signals
+ .iter()
+ .any(|s| matches!(s.signal_type, SignalType::StagnationDragging)));
+ }
+
+ #[test]
+ fn no_dragging_below_baseline() {
+ let msgs = vec![
+ ShareGptMsg { from: "human" },
+ ShareGptMsg { from: "gpt" },
+ ShareGptMsg { from: "human" },
+ ShareGptMsg { from: "gpt" },
+ ];
+ let (g, m) = analyze_dragging(&msgs, 5, 0.5);
+ assert!(!m.is_dragging);
+ assert!(g.signals.is_empty());
+ }
+
+ #[test]
+ fn detects_exact_repetition_in_user_messages() {
+ let n = vec![
+ (
+ 0usize,
+ "human",
+ nm("This widget is broken and needs repair right now"),
+ ),
+ (1, "gpt", nm("Sorry to hear that. Let me look into it.")),
+ (
+ 2,
+ "human",
+ nm("This widget is broken and needs repair right now"),
+ ),
+ ];
+ let g = analyze_repetition(&n, 2, 0.95, 0.85);
+ assert!(g
+ .signals
+ .iter()
+ .any(|s| matches!(s.signal_type, SignalType::StagnationRepetition)));
+ }
+}
diff --git a/crates/brightstaff/src/signals/mod.rs b/crates/brightstaff/src/signals/mod.rs
index 83db943e..d96d3bf0 100644
--- a/crates/brightstaff/src/signals/mod.rs
+++ b/crates/brightstaff/src/signals/mod.rs
@@ -1,3 +1,26 @@
-mod analyzer;
+//! Plano signals: behavioral quality indicators for agent interactions.
+//!
+//! This is a Rust port of the paper-aligned Python reference implementation at
+//! `https://github.com/katanemo/signals` (or `/Users/shashmi/repos/signals`).
+//!
+//! Three layers of signals are detected from a conversation transcript:
+//!
+//! - **Interaction**: misalignment, stagnation, disengagement, satisfaction
+//! - **Execution**: failure, loops
+//! - **Environment**: exhaustion
+//!
+//! See `SignalType` for the full hierarchy.
-pub use analyzer::*;
+pub mod analyzer;
+pub mod environment;
+pub mod execution;
+pub mod interaction;
+pub mod otel;
+pub mod schemas;
+pub mod text_processing;
+
+pub use analyzer::{SignalAnalyzer, FLAG_MARKER};
+pub use schemas::{
+ EnvironmentSignals, ExecutionSignals, InteractionQuality, InteractionSignals, SignalGroup,
+ SignalInstance, SignalLayer, SignalReport, SignalType, TurnMetrics,
+};
diff --git a/crates/brightstaff/src/signals/otel.rs b/crates/brightstaff/src/signals/otel.rs
new file mode 100644
index 00000000..deb3c1b5
--- /dev/null
+++ b/crates/brightstaff/src/signals/otel.rs
@@ -0,0 +1,241 @@
+//! Helpers for emitting `SignalReport` data to OpenTelemetry spans.
+//!
+//! Two sets of attributes are emitted:
+//!
+//! - **Legacy** keys under `signals.*` (e.g. `signals.frustration.count`),
+//! computed from the new layered counts. Preserved for one release for
+//! backward compatibility with existing dashboards.
+//! - **New** layered keys (e.g. `signals.interaction.misalignment.count`),
+//! one set of `count`/`severity` attributes per category, plus per-instance
+//! span events named `signal.`.
+
+use opentelemetry::trace::SpanRef;
+use opentelemetry::KeyValue;
+
+use crate::signals::schemas::{SignalGroup, SignalReport, SignalType};
+
+/// Emit both legacy and layered OTel attributes/events for a `SignalReport`.
+///
+/// Returns `true` if any "concerning" signal was found, mirroring the previous
+/// behavior used to flag the span operation name.
+pub fn emit_signals_to_span(span: &SpanRef<'_>, report: &SignalReport) -> bool {
+ emit_overall(span, report);
+ emit_layered_attributes(span, report);
+ emit_legacy_attributes(span, report);
+ emit_signal_events(span, report);
+
+ is_concerning(report)
+}
+
+fn emit_overall(span: &SpanRef<'_>, report: &SignalReport) {
+ span.set_attribute(KeyValue::new(
+ "signals.quality",
+ report.overall_quality.as_str().to_string(),
+ ));
+ span.set_attribute(KeyValue::new(
+ "signals.quality_score",
+ report.quality_score as f64,
+ ));
+ span.set_attribute(KeyValue::new(
+ "signals.turn_count",
+ report.turn_metrics.total_turns as i64,
+ ));
+ span.set_attribute(KeyValue::new(
+ "signals.efficiency_score",
+ report.turn_metrics.efficiency_score as f64,
+ ));
+}
+
+fn emit_group(span: &SpanRef<'_>, prefix: &str, group: &SignalGroup) {
+ if group.count == 0 {
+ return;
+ }
+ span.set_attribute(KeyValue::new(
+ format!("{}.count", prefix),
+ group.count as i64,
+ ));
+ span.set_attribute(KeyValue::new(
+ format!("{}.severity", prefix),
+ group.severity as i64,
+ ));
+}
+
+fn emit_layered_attributes(span: &SpanRef<'_>, report: &SignalReport) {
+ emit_group(
+ span,
+ "signals.interaction.misalignment",
+ &report.interaction.misalignment,
+ );
+ emit_group(
+ span,
+ "signals.interaction.stagnation",
+ &report.interaction.stagnation,
+ );
+ emit_group(
+ span,
+ "signals.interaction.disengagement",
+ &report.interaction.disengagement,
+ );
+ emit_group(
+ span,
+ "signals.interaction.satisfaction",
+ &report.interaction.satisfaction,
+ );
+ emit_group(span, "signals.execution.failure", &report.execution.failure);
+ emit_group(span, "signals.execution.loops", &report.execution.loops);
+ emit_group(
+ span,
+ "signals.environment.exhaustion",
+ &report.environment.exhaustion,
+ );
+}
+
+fn count_of(report: &SignalReport, t: SignalType) -> usize {
+ report.iter_signals().filter(|s| s.signal_type == t).count()
+}
+
+/// Emit the legacy attribute keys consumed by existing dashboards. These are
+/// derived from the new `SignalReport` so no detector contract is broken.
+fn emit_legacy_attributes(span: &SpanRef<'_>, report: &SignalReport) {
+ use crate::tracing::signals as legacy;
+
+ // signals.follow_up.repair.{count,ratio} - misalignment proxies repairs.
+ let repair_count = report.interaction.misalignment.count;
+ let user_turns = report.turn_metrics.user_turns.max(1) as f32;
+ if repair_count > 0 {
+ span.set_attribute(KeyValue::new(legacy::REPAIR_COUNT, repair_count as i64));
+ let ratio = repair_count as f32 / user_turns;
+ span.set_attribute(KeyValue::new(legacy::REPAIR_RATIO, format!("{:.3}", ratio)));
+ }
+
+ // signals.frustration.{count,severity} - disengagement.negative_stance is
+ // the closest legacy analog of "frustration".
+ let frustration_count = count_of(report, SignalType::DisengagementNegativeStance);
+ if frustration_count > 0 {
+ span.set_attribute(KeyValue::new(
+ legacy::FRUSTRATION_COUNT,
+ frustration_count as i64,
+ ));
+ let severity = match frustration_count {
+ 0 => 0,
+ 1..=2 => 1,
+ 3..=4 => 2,
+ _ => 3,
+ };
+ span.set_attribute(KeyValue::new(legacy::FRUSTRATION_SEVERITY, severity as i64));
+ }
+
+ // signals.repetition.count - stagnation (repetition + dragging).
+ if report.interaction.stagnation.count > 0 {
+ span.set_attribute(KeyValue::new(
+ legacy::REPETITION_COUNT,
+ report.interaction.stagnation.count as i64,
+ ));
+ }
+
+ // signals.escalation.requested - any escalation/quit signal.
+ let escalated = report.interaction.disengagement.signals.iter().any(|s| {
+ matches!(
+ s.signal_type,
+ SignalType::DisengagementEscalation | SignalType::DisengagementQuit
+ )
+ });
+ if escalated {
+ span.set_attribute(KeyValue::new(legacy::ESCALATION_REQUESTED, true));
+ }
+
+ // signals.positive_feedback.count - satisfaction signals.
+ if report.interaction.satisfaction.count > 0 {
+ span.set_attribute(KeyValue::new(
+ legacy::POSITIVE_FEEDBACK_COUNT,
+ report.interaction.satisfaction.count as i64,
+ ));
+ }
+}
+
+fn emit_signal_events(span: &SpanRef<'_>, report: &SignalReport) {
+ for sig in report.iter_signals() {
+ let event_name = format!("signal.{}", sig.signal_type.as_str());
+ let mut attrs: Vec = vec![
+ KeyValue::new("signal.type", sig.signal_type.as_str().to_string()),
+ KeyValue::new("signal.message_index", sig.message_index as i64),
+ KeyValue::new("signal.confidence", sig.confidence as f64),
+ ];
+ if !sig.snippet.is_empty() {
+ attrs.push(KeyValue::new("signal.snippet", sig.snippet.clone()));
+ }
+ if !sig.metadata.is_null() {
+ attrs.push(KeyValue::new("signal.metadata", sig.metadata.to_string()));
+ }
+ span.add_event(event_name, attrs);
+ }
+}
+
+fn is_concerning(report: &SignalReport) -> bool {
+ use crate::signals::schemas::InteractionQuality;
+ if matches!(
+ report.overall_quality,
+ InteractionQuality::Poor | InteractionQuality::Severe
+ ) {
+ return true;
+ }
+ if report.interaction.disengagement.count > 0 {
+ return true;
+ }
+ if report.interaction.stagnation.count > 2 {
+ return true;
+ }
+ if report.execution.failure.count > 0 || report.execution.loops.count > 0 {
+ return true;
+ }
+ false
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::signals::schemas::{
+ EnvironmentSignals, ExecutionSignals, InteractionQuality, InteractionSignals, SignalGroup,
+ SignalInstance, SignalReport, SignalType, TurnMetrics,
+ };
+
+ fn report_with_escalation() -> SignalReport {
+ let mut diseng = SignalGroup::new("disengagement");
+ diseng.add_signal(SignalInstance::new(
+ SignalType::DisengagementEscalation,
+ 3,
+ "get me a human",
+ ));
+ SignalReport {
+ interaction: InteractionSignals {
+ disengagement: diseng,
+ ..InteractionSignals::default()
+ },
+ execution: ExecutionSignals::default(),
+ environment: EnvironmentSignals::default(),
+ overall_quality: InteractionQuality::Severe,
+ quality_score: 0.0,
+ turn_metrics: TurnMetrics {
+ total_turns: 3,
+ user_turns: 2,
+ assistant_turns: 1,
+ is_dragging: false,
+ efficiency_score: 1.0,
+ },
+ summary: String::new(),
+ }
+ }
+
+ #[test]
+ fn is_concerning_flags_disengagement() {
+ let r = report_with_escalation();
+ assert!(is_concerning(&r));
+ }
+
+ #[test]
+ fn count_of_returns_per_type_count() {
+ let r = report_with_escalation();
+ assert_eq!(count_of(&r, SignalType::DisengagementEscalation), 1);
+ assert_eq!(count_of(&r, SignalType::DisengagementNegativeStance), 0);
+ }
+}
diff --git a/crates/brightstaff/src/signals/schemas.rs b/crates/brightstaff/src/signals/schemas.rs
new file mode 100644
index 00000000..47ea0836
--- /dev/null
+++ b/crates/brightstaff/src/signals/schemas.rs
@@ -0,0 +1,431 @@
+//! Data shapes for the signal analyzer.
+//!
+//! Mirrors `signals/schemas.py` from the reference implementation. Where the
+//! Python library exposes a `Dict[str, SignalGroup]` partitioned by category,
+//! the Rust port uses strongly-typed sub-structs (`InteractionSignals`,
+//! `ExecutionSignals`, `EnvironmentSignals`) for the same partitioning.
+
+use serde::{Deserialize, Serialize};
+
+/// Hierarchical signal type. The 20 leaf variants mirror the paper taxonomy
+/// and the Python reference's `SignalType` string enum.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
+pub enum SignalType {
+ // Interaction > Misalignment
+ MisalignmentCorrection,
+ MisalignmentRephrase,
+ MisalignmentClarification,
+
+ // Interaction > Stagnation
+ StagnationDragging,
+ StagnationRepetition,
+
+ // Interaction > Disengagement
+ DisengagementEscalation,
+ DisengagementQuit,
+ DisengagementNegativeStance,
+
+ // Interaction > Satisfaction
+ SatisfactionGratitude,
+ SatisfactionConfirmation,
+ SatisfactionSuccess,
+
+ // Execution > Failure
+ ExecutionFailureInvalidArgs,
+ ExecutionFailureBadQuery,
+ ExecutionFailureToolNotFound,
+ ExecutionFailureAuthMisuse,
+ ExecutionFailureStateError,
+
+ // Execution > Loops
+ ExecutionLoopsRetry,
+ ExecutionLoopsParameterDrift,
+ ExecutionLoopsOscillation,
+
+ // Environment > Exhaustion
+ EnvironmentExhaustionApiError,
+ EnvironmentExhaustionTimeout,
+ EnvironmentExhaustionRateLimit,
+ EnvironmentExhaustionNetwork,
+ EnvironmentExhaustionMalformed,
+ EnvironmentExhaustionContextOverflow,
+}
+
+impl SignalType {
+ /// Dotted hierarchical string identifier, e.g.
+ /// `"interaction.misalignment.correction"`. Matches the Python reference's
+ /// `SignalType` enum *value* strings byte-for-byte.
+ pub fn as_str(&self) -> &'static str {
+ match self {
+ SignalType::MisalignmentCorrection => "interaction.misalignment.correction",
+ SignalType::MisalignmentRephrase => "interaction.misalignment.rephrase",
+ SignalType::MisalignmentClarification => "interaction.misalignment.clarification",
+ SignalType::StagnationDragging => "interaction.stagnation.dragging",
+ SignalType::StagnationRepetition => "interaction.stagnation.repetition",
+ SignalType::DisengagementEscalation => "interaction.disengagement.escalation",
+ SignalType::DisengagementQuit => "interaction.disengagement.quit",
+ SignalType::DisengagementNegativeStance => "interaction.disengagement.negative_stance",
+ SignalType::SatisfactionGratitude => "interaction.satisfaction.gratitude",
+ SignalType::SatisfactionConfirmation => "interaction.satisfaction.confirmation",
+ SignalType::SatisfactionSuccess => "interaction.satisfaction.success",
+ SignalType::ExecutionFailureInvalidArgs => "execution.failure.invalid_args",
+ SignalType::ExecutionFailureBadQuery => "execution.failure.bad_query",
+ SignalType::ExecutionFailureToolNotFound => "execution.failure.tool_not_found",
+ SignalType::ExecutionFailureAuthMisuse => "execution.failure.auth_misuse",
+ SignalType::ExecutionFailureStateError => "execution.failure.state_error",
+ SignalType::ExecutionLoopsRetry => "execution.loops.retry",
+ SignalType::ExecutionLoopsParameterDrift => "execution.loops.parameter_drift",
+ SignalType::ExecutionLoopsOscillation => "execution.loops.oscillation",
+ SignalType::EnvironmentExhaustionApiError => "environment.exhaustion.api_error",
+ SignalType::EnvironmentExhaustionTimeout => "environment.exhaustion.timeout",
+ SignalType::EnvironmentExhaustionRateLimit => "environment.exhaustion.rate_limit",
+ SignalType::EnvironmentExhaustionNetwork => "environment.exhaustion.network",
+ SignalType::EnvironmentExhaustionMalformed => {
+ "environment.exhaustion.malformed_response"
+ }
+ SignalType::EnvironmentExhaustionContextOverflow => {
+ "environment.exhaustion.context_overflow"
+ }
+ }
+ }
+
+ pub fn layer(&self) -> SignalLayer {
+ match self {
+ SignalType::MisalignmentCorrection
+ | SignalType::MisalignmentRephrase
+ | SignalType::MisalignmentClarification
+ | SignalType::StagnationDragging
+ | SignalType::StagnationRepetition
+ | SignalType::DisengagementEscalation
+ | SignalType::DisengagementQuit
+ | SignalType::DisengagementNegativeStance
+ | SignalType::SatisfactionGratitude
+ | SignalType::SatisfactionConfirmation
+ | SignalType::SatisfactionSuccess => SignalLayer::Interaction,
+ SignalType::ExecutionFailureInvalidArgs
+ | SignalType::ExecutionFailureBadQuery
+ | SignalType::ExecutionFailureToolNotFound
+ | SignalType::ExecutionFailureAuthMisuse
+ | SignalType::ExecutionFailureStateError
+ | SignalType::ExecutionLoopsRetry
+ | SignalType::ExecutionLoopsParameterDrift
+ | SignalType::ExecutionLoopsOscillation => SignalLayer::Execution,
+ SignalType::EnvironmentExhaustionApiError
+ | SignalType::EnvironmentExhaustionTimeout
+ | SignalType::EnvironmentExhaustionRateLimit
+ | SignalType::EnvironmentExhaustionNetwork
+ | SignalType::EnvironmentExhaustionMalformed
+ | SignalType::EnvironmentExhaustionContextOverflow => SignalLayer::Environment,
+ }
+ }
+
+ /// Category name within the layer (e.g. `"misalignment"`, `"failure"`).
+ pub fn category(&self) -> &'static str {
+ // Strip the layer prefix and take everything before the next dot.
+ let s = self.as_str();
+ let after_layer = s.split_once('.').map(|(_, rest)| rest).unwrap_or(s);
+ after_layer
+ .split_once('.')
+ .map(|(c, _)| c)
+ .unwrap_or(after_layer)
+ }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
+pub enum SignalLayer {
+ Interaction,
+ Execution,
+ Environment,
+}
+
+impl SignalLayer {
+ pub fn as_str(&self) -> &'static str {
+ match self {
+ SignalLayer::Interaction => "interaction",
+ SignalLayer::Execution => "execution",
+ SignalLayer::Environment => "environment",
+ }
+ }
+}
+
+/// Overall quality assessment for an agent interaction session.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+pub enum InteractionQuality {
+ Excellent,
+ Good,
+ Neutral,
+ Poor,
+ Severe,
+}
+
+impl InteractionQuality {
+ pub fn as_str(&self) -> &'static str {
+ match self {
+ InteractionQuality::Excellent => "excellent",
+ InteractionQuality::Good => "good",
+ InteractionQuality::Neutral => "neutral",
+ InteractionQuality::Poor => "poor",
+ InteractionQuality::Severe => "severe",
+ }
+ }
+}
+
+/// A single detected signal instance.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct SignalInstance {
+ pub signal_type: SignalType,
+ /// Absolute index into the original conversation `Vec`.
+ pub message_index: usize,
+ pub snippet: String,
+ pub confidence: f32,
+ /// Free-form metadata payload mirroring the Python `Dict[str, Any]`.
+ /// Stored as a JSON object so we can faithfully reproduce the reference's
+ /// flexible per-detector metadata.
+ #[serde(default)]
+ pub metadata: serde_json::Value,
+}
+
+impl SignalInstance {
+ pub fn new(signal_type: SignalType, message_index: usize, snippet: impl Into) -> Self {
+ Self {
+ signal_type,
+ message_index,
+ snippet: snippet.into(),
+ confidence: 1.0,
+ metadata: serde_json::Value::Object(serde_json::Map::new()),
+ }
+ }
+
+ pub fn with_confidence(mut self, c: f32) -> Self {
+ self.confidence = c;
+ self
+ }
+
+ pub fn with_metadata(mut self, m: serde_json::Value) -> Self {
+ self.metadata = m;
+ self
+ }
+}
+
+/// Aggregated signals for a specific category.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct SignalGroup {
+ pub category: String,
+ pub count: usize,
+ pub signals: Vec,
+ /// Severity level (0-3: none, mild, moderate, severe).
+ pub severity: u8,
+}
+
+impl SignalGroup {
+ pub fn new(category: impl Into) -> Self {
+ Self {
+ category: category.into(),
+ count: 0,
+ signals: Vec::new(),
+ severity: 0,
+ }
+ }
+
+ pub fn add_signal(&mut self, signal: SignalInstance) {
+ self.signals.push(signal);
+ self.count = self.signals.len();
+ self.update_severity();
+ }
+
+ fn update_severity(&mut self) {
+ self.severity = match self.count {
+ 0 => 0,
+ 1..=2 => 1,
+ 3..=4 => 2,
+ _ => 3,
+ };
+ }
+}
+
+/// Turn count and efficiency metrics, used by stagnation.dragging.
+#[derive(Debug, Clone, Default, Serialize, Deserialize)]
+pub struct TurnMetrics {
+ pub total_turns: usize,
+ pub user_turns: usize,
+ pub assistant_turns: usize,
+ pub is_dragging: bool,
+ pub efficiency_score: f32,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct InteractionSignals {
+ pub misalignment: SignalGroup,
+ pub stagnation: SignalGroup,
+ pub disengagement: SignalGroup,
+ pub satisfaction: SignalGroup,
+}
+
+impl Default for InteractionSignals {
+ fn default() -> Self {
+ Self {
+ misalignment: SignalGroup::new("misalignment"),
+ stagnation: SignalGroup::new("stagnation"),
+ disengagement: SignalGroup::new("disengagement"),
+ satisfaction: SignalGroup::new("satisfaction"),
+ }
+ }
+}
+
+impl InteractionSignals {
+ /// Ratio of misalignment instances to user turns. Used as a quality
+ /// scoring input and as a threshold for the "high misalignment rate"
+ /// summary callout. Mirrors `misalignment.count / max(user_turns, 1)`
+ /// from the Python reference's `_assess_quality` and `_generate_summary`.
+ pub fn misalignment_ratio(&self, user_turns: usize) -> f32 {
+ let denom = user_turns.max(1) as f32;
+ self.misalignment.count as f32 / denom
+ }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ExecutionSignals {
+ pub failure: SignalGroup,
+ pub loops: SignalGroup,
+}
+
+impl Default for ExecutionSignals {
+ fn default() -> Self {
+ Self {
+ failure: SignalGroup::new("failure"),
+ loops: SignalGroup::new("loops"),
+ }
+ }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct EnvironmentSignals {
+ pub exhaustion: SignalGroup,
+}
+
+impl Default for EnvironmentSignals {
+ fn default() -> Self {
+ Self {
+ exhaustion: SignalGroup::new("exhaustion"),
+ }
+ }
+}
+
+/// Complete signal analysis report for a conversation.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct SignalReport {
+ pub interaction: InteractionSignals,
+ pub execution: ExecutionSignals,
+ pub environment: EnvironmentSignals,
+ pub overall_quality: InteractionQuality,
+ pub quality_score: f32,
+ pub turn_metrics: TurnMetrics,
+ pub summary: String,
+}
+
+impl Default for SignalReport {
+ fn default() -> Self {
+ Self {
+ interaction: InteractionSignals::default(),
+ execution: ExecutionSignals::default(),
+ environment: EnvironmentSignals::default(),
+ overall_quality: InteractionQuality::Neutral,
+ quality_score: 50.0,
+ turn_metrics: TurnMetrics::default(),
+ summary: String::new(),
+ }
+ }
+}
+
+impl SignalReport {
+ /// Iterate over every `SignalInstance` across all layers and groups.
+ pub fn iter_signals(&self) -> impl Iterator- {
+ self.interaction
+ .misalignment
+ .signals
+ .iter()
+ .chain(self.interaction.stagnation.signals.iter())
+ .chain(self.interaction.disengagement.signals.iter())
+ .chain(self.interaction.satisfaction.signals.iter())
+ .chain(self.execution.failure.signals.iter())
+ .chain(self.execution.loops.signals.iter())
+ .chain(self.environment.exhaustion.signals.iter())
+ }
+
+ pub fn has_signal_type(&self, t: SignalType) -> bool {
+ self.iter_signals().any(|s| s.signal_type == t)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn signal_type_strings_match_paper_taxonomy() {
+ assert_eq!(
+ SignalType::MisalignmentCorrection.as_str(),
+ "interaction.misalignment.correction"
+ );
+ assert_eq!(
+ SignalType::ExecutionFailureInvalidArgs.as_str(),
+ "execution.failure.invalid_args"
+ );
+ assert_eq!(
+ SignalType::EnvironmentExhaustionMalformed.as_str(),
+ "environment.exhaustion.malformed_response"
+ );
+ }
+
+ #[test]
+ fn signal_type_layer_and_category() {
+ assert_eq!(
+ SignalType::MisalignmentRephrase.layer(),
+ SignalLayer::Interaction
+ );
+ assert_eq!(SignalType::MisalignmentRephrase.category(), "misalignment");
+ assert_eq!(
+ SignalType::ExecutionLoopsRetry.layer(),
+ SignalLayer::Execution
+ );
+ assert_eq!(SignalType::ExecutionLoopsRetry.category(), "loops");
+ assert_eq!(
+ SignalType::EnvironmentExhaustionTimeout.layer(),
+ SignalLayer::Environment
+ );
+ assert_eq!(
+ SignalType::EnvironmentExhaustionTimeout.category(),
+ "exhaustion"
+ );
+ }
+
+ #[test]
+ fn signal_group_severity_buckets_match_python() {
+ let mut g = SignalGroup::new("misalignment");
+ assert_eq!(g.severity, 0);
+ for n in 1..=2 {
+ g.add_signal(SignalInstance::new(
+ SignalType::MisalignmentCorrection,
+ n,
+ "x",
+ ));
+ }
+ assert_eq!(g.severity, 1);
+ for n in 3..=4 {
+ g.add_signal(SignalInstance::new(
+ SignalType::MisalignmentCorrection,
+ n,
+ "x",
+ ));
+ }
+ assert_eq!(g.severity, 2);
+ for n in 5..=6 {
+ g.add_signal(SignalInstance::new(
+ SignalType::MisalignmentCorrection,
+ n,
+ "x",
+ ));
+ }
+ assert_eq!(g.severity, 3);
+ }
+}
diff --git a/crates/brightstaff/src/signals/text_processing.rs b/crates/brightstaff/src/signals/text_processing.rs
new file mode 100644
index 00000000..a1d463cc
--- /dev/null
+++ b/crates/brightstaff/src/signals/text_processing.rs
@@ -0,0 +1,401 @@
+//! Text normalization and similarity primitives.
+//!
+//! Direct Rust port of `signals/text_processing.py` from the reference. The
+//! shapes (`NormalizedMessage`, `NormalizedPattern`) and similarity formulas
+//! match the Python implementation exactly so that pattern matching produces
+//! the same results on the same inputs.
+
+use std::collections::{HashMap, HashSet};
+
+/// Size of character n-grams used for fuzzy similarity (3 = trigrams).
+pub const NGRAM_SIZE: usize = 3;
+
+const PUNCT_TRIM: &[char] = &[
+ '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '<', '=',
+ '>', '?', '@', '[', '\\', ']', '^', '_', '`', '{', '|', '}', '~',
+];
+
+/// Pre-processed message with normalized text and tokens for efficient matching.
+#[derive(Debug, Clone, Default)]
+pub struct NormalizedMessage {
+ pub raw: String,
+ pub tokens: Vec,
+ pub token_set: HashSet,
+ pub bigram_set: HashSet,
+ pub char_ngram_set: HashSet,
+ pub token_frequency: HashMap,
+}
+
+impl NormalizedMessage {
+ /// Create a normalized message from raw text. Mirrors
+ /// `NormalizedMessage.from_text` in the reference, including the
+ /// head-20%/tail-80% truncation strategy when text exceeds `max_length`.
+ pub fn from_text(text: &str, max_length: usize) -> Self {
+ let char_count = text.chars().count();
+
+ let raw: String = if char_count <= max_length {
+ text.to_string()
+ } else {
+ let head_len = max_length / 5;
+ // Reserve one char for the joining space.
+ let tail_len = max_length.saturating_sub(head_len + 1);
+ let head: String = text.chars().take(head_len).collect();
+ let tail: String = text
+ .chars()
+ .skip(char_count.saturating_sub(tail_len))
+ .collect();
+ format!("{} {}", head, tail)
+ };
+
+ // Normalize unicode punctuation to ASCII equivalents.
+ let normalized_unicode = raw
+ .replace(['\u{2019}', '\u{2018}'], "'")
+ .replace(['\u{201c}', '\u{201d}'], "\"")
+ .replace(['\u{2013}', '\u{2014}'], "-");
+
+ // Lowercase + collapse whitespace (matches Python's `" ".join(s.split())`).
+ let normalized: String = normalized_unicode
+ .to_lowercase()
+ .split_whitespace()
+ .collect::>()
+ .join(" ");
+
+ let mut tokens: Vec = Vec::new();
+ for word in normalized.split_whitespace() {
+ let stripped: String = word.trim_matches(PUNCT_TRIM).to_string();
+ if !stripped.is_empty() {
+ tokens.push(stripped);
+ }
+ }
+
+ let token_set: HashSet = tokens.iter().cloned().collect();
+
+ let mut bigram_set: HashSet = HashSet::new();
+ for i in 0..tokens.len().saturating_sub(1) {
+ bigram_set.insert(format!("{} {}", tokens[i], tokens[i + 1]));
+ }
+
+ let tokens_text = tokens.join(" ");
+ let char_ngram_set = char_ngrams(&tokens_text, NGRAM_SIZE);
+
+ let mut token_frequency: HashMap = HashMap::new();
+ for t in &tokens {
+ *token_frequency.entry(t.clone()).or_insert(0) += 1;
+ }
+
+ Self {
+ raw,
+ tokens,
+ token_set,
+ bigram_set,
+ char_ngram_set,
+ token_frequency,
+ }
+ }
+
+ pub fn contains_token(&self, token: &str) -> bool {
+ self.token_set.contains(token)
+ }
+
+ pub fn contains_phrase(&self, phrase: &str) -> bool {
+ let phrase_tokens: Vec<&str> = phrase.split_whitespace().collect();
+ if phrase_tokens.is_empty() {
+ return false;
+ }
+ if phrase_tokens.len() == 1 {
+ return self.contains_token(phrase_tokens[0]);
+ }
+ if phrase_tokens.len() > self.tokens.len() {
+ return false;
+ }
+ let n = phrase_tokens.len();
+ for i in 0..=self.tokens.len() - n {
+ if self.tokens[i..i + n]
+ .iter()
+ .zip(phrase_tokens.iter())
+ .all(|(a, b)| a == b)
+ {
+ return true;
+ }
+ }
+ false
+ }
+
+ /// Character n-gram (Jaccard) similarity vs another normalized message.
+ pub fn ngram_similarity_with_message(&self, other: &NormalizedMessage) -> f32 {
+ jaccard(&self.char_ngram_set, &other.char_ngram_set)
+ }
+
+ /// Character n-gram (Jaccard) similarity vs a raw pattern string.
+ pub fn ngram_similarity_with_pattern(&self, pattern: &str) -> f32 {
+ let normalized = strip_non_word_chars(&pattern.to_lowercase());
+ let pattern_ngrams = char_ngrams(&normalized, NGRAM_SIZE);
+ jaccard(&self.char_ngram_set, &pattern_ngrams)
+ }
+
+ /// Fraction of pattern's ngrams contained in this message's ngram set.
+ pub fn char_ngram_containment(&self, pattern: &str) -> f32 {
+ let normalized = strip_non_word_chars(&pattern.to_lowercase());
+ let pattern_ngrams = char_ngrams(&normalized, NGRAM_SIZE);
+ if pattern_ngrams.is_empty() {
+ return 0.0;
+ }
+ let contained = pattern_ngrams
+ .iter()
+ .filter(|ng| self.char_ngram_set.contains(*ng))
+ .count();
+ contained as f32 / pattern_ngrams.len() as f32
+ }
+
+ /// Token-frequency cosine similarity vs a raw pattern string.
+ pub fn token_cosine_similarity(&self, pattern: &str) -> f32 {
+ let mut pattern_freq: HashMap = HashMap::new();
+ for word in pattern.to_lowercase().split_whitespace() {
+ let stripped = word.trim_matches(PUNCT_TRIM);
+ if !stripped.is_empty() {
+ *pattern_freq.entry(stripped.to_string()).or_insert(0) += 1;
+ }
+ }
+ cosine_freq(&self.token_frequency, &pattern_freq)
+ }
+
+ /// Layered match against a pre-normalized pattern. Mirrors
+ /// `matches_normalized_pattern` from the reference: exact phrase ->
+ /// char-ngram Jaccard -> token cosine.
+ pub fn matches_normalized_pattern(
+ &self,
+ pattern: &NormalizedPattern,
+ char_ngram_threshold: f32,
+ token_cosine_threshold: f32,
+ ) -> bool {
+ // Layer 0: exact phrase match using pre-tokenized message.
+ let plen = pattern.tokens.len();
+ let slen = self.tokens.len();
+ if plen > 0 && plen <= slen {
+ for i in 0..=slen - plen {
+ if self.tokens[i..i + plen] == pattern.tokens[..] {
+ return true;
+ }
+ }
+ }
+
+ // Layer 1: character n-gram Jaccard similarity.
+ if !self.char_ngram_set.is_empty() && !pattern.char_ngram_set.is_empty() {
+ let inter = self
+ .char_ngram_set
+ .intersection(&pattern.char_ngram_set)
+ .count();
+ let union = self.char_ngram_set.union(&pattern.char_ngram_set).count();
+ if union > 0 {
+ let sim = inter as f32 / union as f32;
+ if sim >= char_ngram_threshold {
+ return true;
+ }
+ }
+ }
+
+ // Layer 2: token frequency cosine similarity.
+ if !self.token_frequency.is_empty() && !pattern.token_frequency.is_empty() {
+ let sim = cosine_freq(&self.token_frequency, &pattern.token_frequency);
+ if sim >= token_cosine_threshold {
+ return true;
+ }
+ }
+
+ false
+ }
+}
+
+/// Pre-processed pattern with normalized text and pre-computed n-grams/tokens.
+#[derive(Debug, Clone, Default)]
+pub struct NormalizedPattern {
+ pub raw: String,
+ pub tokens: Vec,
+ pub char_ngram_set: HashSet,
+ pub token_frequency: HashMap,
+}
+
+impl NormalizedPattern {
+ pub fn from_text(pattern: &str) -> Self {
+ let normalized = pattern
+ .to_lowercase()
+ .replace(['\u{2019}', '\u{2018}'], "'")
+ .replace(['\u{201c}', '\u{201d}'], "\"")
+ .replace(['\u{2013}', '\u{2014}'], "-");
+ let normalized: String = normalized.split_whitespace().collect::>().join(" ");
+
+ // Tokenize the same way as NormalizedMessage (trim boundary punctuation,
+ // keep internal punctuation).
+ let mut tokens: Vec = Vec::new();
+ for word in normalized.split_whitespace() {
+ let stripped = word.trim_matches(PUNCT_TRIM);
+ if !stripped.is_empty() {
+ tokens.push(stripped.to_string());
+ }
+ }
+
+ // For ngrams + cosine, strip ALL punctuation (matches Python's
+ // `re.sub(r"[^\w\s]", "", normalized)`).
+ let normalized_for_ngrams = strip_non_word_chars(&normalized);
+ let char_ngram_set = char_ngrams(&normalized_for_ngrams, NGRAM_SIZE);
+
+ let tokens_no_punct: Vec<&str> = normalized_for_ngrams.split_whitespace().collect();
+ let mut token_frequency: HashMap = HashMap::new();
+ for t in &tokens_no_punct {
+ *token_frequency.entry((*t).to_string()).or_insert(0) += 1;
+ }
+
+ Self {
+ raw: pattern.to_string(),
+ tokens,
+ char_ngram_set,
+ token_frequency,
+ }
+ }
+}
+
+/// Convenience: normalize a list of raw pattern strings into `NormalizedPattern`s.
+pub fn normalize_patterns(patterns: &[&str]) -> Vec {
+ patterns
+ .iter()
+ .map(|p| NormalizedPattern::from_text(p))
+ .collect()
+}
+
+// ---------------------------------------------------------------------------
+// Similarity primitives
+// ---------------------------------------------------------------------------
+
+fn char_ngrams(s: &str, n: usize) -> HashSet {
+ // Python iterates by character index, not byte; mirror that with .chars().
+ let chars: Vec = s.chars().collect();
+ let mut out: HashSet = HashSet::new();
+ if chars.len() < n {
+ return out;
+ }
+ for i in 0..=chars.len() - n {
+ out.insert(chars[i..i + n].iter().collect());
+ }
+ out
+}
+
+fn jaccard(a: &HashSet, b: &HashSet) -> f32 {
+ if a.is_empty() && b.is_empty() {
+ return 1.0;
+ }
+ if a.is_empty() || b.is_empty() {
+ return 0.0;
+ }
+ let inter = a.intersection(b).count();
+ let union = a.union(b).count();
+ if union == 0 {
+ 0.0
+ } else {
+ inter as f32 / union as f32
+ }
+}
+
+fn cosine_freq(a: &HashMap, b: &HashMap) -> f32 {
+ if a.is_empty() && b.is_empty() {
+ return 1.0;
+ }
+ if a.is_empty() || b.is_empty() {
+ return 0.0;
+ }
+ let mut dot: f64 = 0.0;
+ let mut n1_sq: f64 = 0.0;
+ let mut n2_sq: f64 = 0.0;
+ for (token, &freq2) in b {
+ let freq1 = *a.get(token).unwrap_or(&0);
+ dot += (freq1 * freq2) as f64;
+ n2_sq += (freq2 * freq2) as f64;
+ }
+ for &freq1 in a.values() {
+ n1_sq += (freq1 * freq1) as f64;
+ }
+ let n1 = n1_sq.sqrt();
+ let n2 = n2_sq.sqrt();
+ if n1 == 0.0 || n2 == 0.0 {
+ 0.0
+ } else {
+ (dot / (n1 * n2)) as f32
+ }
+}
+
+/// Python equivalent: `re.sub(r"[^\w\s]", "", text)` followed by whitespace
+/// collapse. Python's `\w` is `[A-Za-z0-9_]` plus unicode word characters; we
+/// use Rust's `char::is_alphanumeric()` plus `_` for an equivalent definition.
+fn strip_non_word_chars(text: &str) -> String {
+ let mut out = String::with_capacity(text.len());
+ for c in text.chars() {
+ if c.is_alphanumeric() || c == '_' || c.is_whitespace() {
+ out.push(c);
+ }
+ }
+ out.split_whitespace().collect::>().join(" ")
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn normalize_lowercases_and_strips_punctuation() {
+ let m = NormalizedMessage::from_text("Hello, World!", 2000);
+ assert_eq!(m.tokens, vec!["hello".to_string(), "world".to_string()]);
+ }
+
+ #[test]
+ fn normalizes_smart_quotes() {
+ let m = NormalizedMessage::from_text("don\u{2019}t", 2000);
+ assert!(m.tokens.contains(&"don't".to_string()));
+ }
+
+ #[test]
+ fn truncates_long_text_with_head_tail() {
+ let long = "a".repeat(3000);
+ let m = NormalizedMessage::from_text(&long, 2000);
+ // raw should be ~ 2000 chars (head + space + tail)
+ assert!(m.raw.chars().count() <= 2001);
+ assert!(m.raw.starts_with("aa"));
+ assert!(m.raw.ends_with("aa"));
+ }
+
+ #[test]
+ fn contains_phrase_matches_consecutive_tokens() {
+ let m = NormalizedMessage::from_text("I think this is great work", 2000);
+ assert!(m.contains_phrase("this is great"));
+ assert!(!m.contains_phrase("great this"));
+ }
+
+ #[test]
+ fn matches_pattern_via_exact_phrase() {
+ let m = NormalizedMessage::from_text("No, I meant the second one", 2000);
+ let p = NormalizedPattern::from_text("no i meant");
+ assert!(m.matches_normalized_pattern(&p, 0.65, 0.6));
+ }
+
+ #[test]
+ fn matches_pattern_via_char_ngram_fuzziness() {
+ // Typo in "meant" -> "ment" so layer 0 (exact phrase) cannot match,
+ // forcing the matcher to fall back to layer 1 (char n-gram Jaccard).
+ let m = NormalizedMessage::from_text("No I ment", 2000);
+ let p = NormalizedPattern::from_text("no i meant");
+ assert!(m.matches_normalized_pattern(&p, 0.4, 0.6));
+ }
+
+ #[test]
+ fn jaccard_identical_sets_is_one() {
+ let a: HashSet = ["abc", "bcd"].iter().map(|s| s.to_string()).collect();
+ assert!((jaccard(&a, &a) - 1.0).abs() < 1e-6);
+ }
+
+ #[test]
+ fn cosine_freq_orthogonal_is_zero() {
+ let mut a: HashMap = HashMap::new();
+ a.insert("hello".to_string(), 1);
+ let mut b: HashMap = HashMap::new();
+ b.insert("world".to_string(), 1);
+ assert_eq!(cosine_freq(&a, &b), 0.0);
+ }
+}
diff --git a/crates/brightstaff/src/streaming.rs b/crates/brightstaff/src/streaming.rs
index f7af8ae0..26af8672 100644
--- a/crates/brightstaff/src/streaming.rs
+++ b/crates/brightstaff/src/streaming.rs
@@ -16,10 +16,134 @@ use tracing_opentelemetry::OpenTelemetrySpanExt;
use crate::handlers::agents::pipeline::{PipelineError, PipelineProcessor};
const STREAM_BUFFER_SIZE: usize = 16;
-use crate::signals::{InteractionQuality, SignalAnalyzer, TextBasedSignalAnalyzer, FLAG_MARKER};
-use crate::tracing::{llm, set_service_name, signals as signal_constants};
+/// Cap on accumulated response bytes kept for usage extraction.
+/// Most chat responses are well under this; pathological ones are dropped without
+/// affecting pass-through streaming to the client.
+const USAGE_BUFFER_MAX: usize = 2 * 1024 * 1024;
+use crate::metrics as bs_metrics;
+use crate::metrics::labels as metric_labels;
+use crate::signals::otel::emit_signals_to_span;
+use crate::signals::{SignalAnalyzer, FLAG_MARKER};
+use crate::tracing::{llm, set_service_name};
use hermesllm::apis::openai::Message;
+/// Parsed usage + resolved-model details from a provider response.
+#[derive(Debug, Default, Clone)]
+struct ExtractedUsage {
+ prompt_tokens: Option,
+ completion_tokens: Option,
+ total_tokens: Option,
+ cached_input_tokens: Option,
+ cache_creation_tokens: Option,
+ reasoning_tokens: Option,
+ /// The model the upstream actually used. For router aliases (e.g.
+ /// `router:software-engineering`), this differs from the request model.
+ resolved_model: Option,
+}
+
+impl ExtractedUsage {
+ fn is_empty(&self) -> bool {
+ self.prompt_tokens.is_none()
+ && self.completion_tokens.is_none()
+ && self.total_tokens.is_none()
+ && self.resolved_model.is_none()
+ }
+
+ fn from_json(value: &serde_json::Value) -> Self {
+ let mut out = Self::default();
+ if let Some(model) = value.get("model").and_then(|v| v.as_str()) {
+ if !model.is_empty() {
+ out.resolved_model = Some(model.to_string());
+ }
+ }
+ if let Some(u) = value.get("usage") {
+ // OpenAI-shape usage
+ out.prompt_tokens = u.get("prompt_tokens").and_then(|v| v.as_i64());
+ out.completion_tokens = u.get("completion_tokens").and_then(|v| v.as_i64());
+ out.total_tokens = u.get("total_tokens").and_then(|v| v.as_i64());
+ out.cached_input_tokens = u
+ .get("prompt_tokens_details")
+ .and_then(|d| d.get("cached_tokens"))
+ .and_then(|v| v.as_i64());
+ out.reasoning_tokens = u
+ .get("completion_tokens_details")
+ .and_then(|d| d.get("reasoning_tokens"))
+ .and_then(|v| v.as_i64());
+
+ // Anthropic-shape fallbacks
+ if out.prompt_tokens.is_none() {
+ out.prompt_tokens = u.get("input_tokens").and_then(|v| v.as_i64());
+ }
+ if out.completion_tokens.is_none() {
+ out.completion_tokens = u.get("output_tokens").and_then(|v| v.as_i64());
+ }
+ if out.total_tokens.is_none() {
+ if let (Some(p), Some(c)) = (out.prompt_tokens, out.completion_tokens) {
+ out.total_tokens = Some(p + c);
+ }
+ }
+ if out.cached_input_tokens.is_none() {
+ out.cached_input_tokens = u.get("cache_read_input_tokens").and_then(|v| v.as_i64());
+ }
+ if out.cached_input_tokens.is_none() {
+ out.cached_input_tokens =
+ u.get("cached_content_token_count").and_then(|v| v.as_i64());
+ }
+ out.cache_creation_tokens = u
+ .get("cache_creation_input_tokens")
+ .and_then(|v| v.as_i64());
+ if out.reasoning_tokens.is_none() {
+ out.reasoning_tokens = u.get("thoughts_token_count").and_then(|v| v.as_i64());
+ }
+ }
+ out
+ }
+}
+
+/// Try to pull usage out of an accumulated response body.
+/// Handles both a single JSON object (non-streaming) and SSE streams where the
+/// final `data: {...}` event carries the `usage` field.
+fn extract_usage_from_bytes(buf: &[u8]) -> ExtractedUsage {
+ if buf.is_empty() {
+ return ExtractedUsage::default();
+ }
+
+ // Fast path: full-body JSON (non-streaming).
+ if let Ok(value) = serde_json::from_slice::(buf) {
+ let u = ExtractedUsage::from_json(&value);
+ if !u.is_empty() {
+ return u;
+ }
+ }
+
+ // SSE path: scan from the end for a `data:` line containing a usage object.
+ let text = match std::str::from_utf8(buf) {
+ Ok(t) => t,
+ Err(_) => return ExtractedUsage::default(),
+ };
+ for line in text.lines().rev() {
+ let trimmed = line.trim_start();
+ let payload = match trimmed.strip_prefix("data:") {
+ Some(p) => p.trim_start(),
+ None => continue,
+ };
+ if payload == "[DONE]" || payload.is_empty() {
+ continue;
+ }
+ if !payload.contains("\"usage\"") {
+ continue;
+ }
+ if let Ok(value) = serde_json::from_str::(payload) {
+ let u = ExtractedUsage::from_json(&value);
+ if !u.is_empty() {
+ return u;
+ }
+ }
+ }
+
+ ExtractedUsage::default()
+}
+
/// Trait for processing streaming chunks
/// Implementors can inject custom logic during streaming (e.g., hallucination detection, logging)
pub trait StreamProcessor: Send + 'static {
@@ -51,6 +175,18 @@ impl StreamProcessor for Box {
}
}
+/// Optional Prometheus-metric context for an LLM upstream call. When present,
+/// [`ObservableStreamProcessor`] emits `brightstaff_llm_*` metrics at
+/// first-byte / complete / error callbacks.
+#[derive(Debug, Clone)]
+pub struct LlmMetricsCtx {
+ pub provider: String,
+ pub model: String,
+ /// HTTP status of the upstream response. Used to pick `status_class` and
+ /// `error_class` on `on_complete`.
+ pub upstream_status: u16,
+}
+
/// A processor that tracks streaming metrics
pub struct ObservableStreamProcessor {
service_name: String,
@@ -60,6 +196,12 @@ pub struct ObservableStreamProcessor {
start_time: Instant,
time_to_first_token: Option,
messages: Option>,
+ /// Accumulated response bytes used only for best-effort usage extraction
+ /// on `on_complete`. Capped at `USAGE_BUFFER_MAX`; excess chunks are dropped
+ /// from the buffer (they still pass through to the client).
+ response_buffer: Vec,
+ llm_metrics: Option,
+ metrics_recorded: bool,
}
impl ObservableStreamProcessor {
@@ -93,21 +235,42 @@ impl ObservableStreamProcessor {
start_time,
time_to_first_token: None,
messages,
+ response_buffer: Vec::new(),
+ llm_metrics: None,
+ metrics_recorded: false,
}
}
+
+ /// Attach LLM upstream metric context so the processor emits
+ /// `brightstaff_llm_*` metrics on first-byte / complete / error.
+ pub fn with_llm_metrics(mut self, ctx: LlmMetricsCtx) -> Self {
+ self.llm_metrics = Some(ctx);
+ self
+ }
}
impl StreamProcessor for ObservableStreamProcessor {
fn process_chunk(&mut self, chunk: Bytes) -> Result