From 6131589277d564acf12dfef0ca1b880e65bf8fc1 Mon Sep 17 00:00:00 2001 From: Musa Date: Thu, 12 Feb 2026 15:34:47 -0800 Subject: [PATCH] feat: add trace listener process management and foreground mode --- cli/planoai/trace_cmd.py | 408 ++++++++++-- cli/test/source/failure.json | 1133 ++++++++++++++++++++++++++++++++++ cli/test/source/success.json | 803 ++++++++++++++++++++++++ cli/test/test_trace_cmd.py | 392 ++++++++++++ cli/uv.lock | 2 +- 5 files changed, 2676 insertions(+), 62 deletions(-) create mode 100644 cli/test/source/failure.json create mode 100644 cli/test/source/success.json create mode 100644 cli/test/test_trace_cmd.py diff --git a/cli/planoai/trace_cmd.py b/cli/planoai/trace_cmd.py index dd8194ef..6452b38e 100644 --- a/cli/planoai/trace_cmd.py +++ b/cli/planoai/trace_cmd.py @@ -1,7 +1,10 @@ import json import os import re +import signal import string +import subprocess +import sys import threading import time from collections import OrderedDict @@ -26,6 +29,7 @@ from planoai.consts import PLANO_COLOR DEFAULT_GRPC_PORT = 4317 MAX_TRACES = 50 MAX_SPANS_PER_TRACE = 500 +TRACE_LISTENER_PID_FILE = os.path.expanduser("~/.plano_trace_listener.pid") @dataclass @@ -46,6 +50,60 @@ class TraceSummary: return dt.astimezone().strftime("%Y-%m-%d %H:%M:%S") +def _is_port_in_use(port: int) -> bool: + """Check whether a local TCP listener is accepting connections on a port.""" + import socket + + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.settimeout(0.2) + return s.connect_ex(("127.0.0.1", port)) == 0 + + +def _save_listener_pid(pid: int) -> None: + """Save the listener process PID to a file.""" + with open(TRACE_LISTENER_PID_FILE, "w") as f: + f.write(str(pid)) + + +def _get_listener_pid() -> int | None: + """Get the saved listener process PID, if it exists and is running.""" + if not os.path.exists(TRACE_LISTENER_PID_FILE): + return None + try: + with open(TRACE_LISTENER_PID_FILE, "r") as f: + pid = int(f.read().strip()) + # Check if process is still running + os.kill(pid, 0) # Doesn't actually kill, just checks if process exists + return pid + except (ValueError, ProcessLookupError, OSError): + # PID file is invalid or process doesn't exist + if os.path.exists(TRACE_LISTENER_PID_FILE): + os.remove(TRACE_LISTENER_PID_FILE) + return None + + +def _stop_background_listener() -> bool: + """Stop the background listener process if it's running.""" + pid = _get_listener_pid() + if pid is None: + return False + try: + os.kill(pid, signal.SIGTERM) + time.sleep(0.5) # Give it time to shut down gracefully + try: + os.kill(pid, 0) # Check if still running + os.kill(pid, signal.SIGKILL) # Force kill if still alive + except ProcessLookupError: + pass # Already dead + if os.path.exists(TRACE_LISTENER_PID_FILE): + os.remove(TRACE_LISTENER_PID_FILE) + return True + except ProcessLookupError: + if os.path.exists(TRACE_LISTENER_PID_FILE): + os.remove(TRACE_LISTENER_PID_FILE) + return False + + def _parse_filter_patterns(filter_patterns: tuple[str, ...]) -> list[str]: parts: list[str] = [] for raw in filter_patterns: @@ -428,6 +486,10 @@ class _OTLPTraceServicer(trace_service_pb2_grpc.TraceServiceServicer): _console = Console(stderr=True) + # we have to stream events otherwise the payload is too slow to process + def __init__(self, stream_events: bool = True) -> None: + self._stream_events = stream_events + def Export(self, request, context): # noqa: N802 for resource_spans in request.resource_spans: service_name = "unknown" @@ -458,14 +520,15 @@ class _OTLPTraceServicer(trace_service_pb2_grpc.TraceServiceServicer): dur_s = dur_ns / 1_000_000_000 dur_str = f"{dur_s:.3f}".rstrip("0").rstrip(".") dur_str = f"{dur_str}s" - self._console.print( - f"[dim]{span_start}[/dim], " - f"trace=[yellow]{short_id}[/yellow], " - f"span=[yellow]{short_span}[/yellow], " - f"[bold {_service_color(service_name)}]{service_name}[/bold {_service_color(service_name)}] " - f"[cyan]{span.name}[/cyan] " - f"[dim]({dur_str})[/dim]" - ) + if self._stream_events: + self._console.print( + f"[dim]{span_start}[/dim], " + f"trace=[yellow]{short_id}[/yellow], " + f"span=[yellow]{short_span}[/yellow], " + f"[bold {_service_color(service_name)}]{service_name}[/bold {_service_color(service_name)}] " + f"[cyan]{span.name}[/cyan] " + f"[dim]({dur_str})[/dim]" + ) return trace_service_pb2.ExportTraceServiceResponse() @@ -488,7 +551,9 @@ class _TraceQueryHandler(grpc.GenericRpcHandler): return json.dumps({"traces": traces}, separators=(",", ":")).encode("utf-8") -def _create_trace_server(host: str, grpc_port: int) -> grpc.Server: +def _create_trace_server( + host: str, grpc_port: int, stream_events: bool = False +) -> grpc.Server: """Create, bind, and start an OTLP/gRPC trace-collection server. Returns the running ``grpc.Server``. The caller is responsible @@ -499,31 +564,113 @@ def _create_trace_server(host: str, grpc_port: int) -> grpc.Server: handlers=[_TraceQueryHandler()], ) trace_service_pb2_grpc.add_TraceServiceServicer_to_server( - _OTLPTraceServicer(), grpc_server + _OTLPTraceServicer(stream_events=stream_events), grpc_server ) grpc_server.add_insecure_port(f"{host}:{grpc_port}") grpc_server.start() return grpc_server -def _start_trace_listener(host: str, grpc_port: int) -> None: - """Start the OTLP/gRPC listener and block until interrupted.""" - console = Console() - grpc_server = _create_trace_server(host, grpc_port) +def _start_trace_listener_foreground( + host: str, grpc_port: int, debug: bool = False +) -> None: + """Start the OTLP/gRPC listener in foreground (blocking). + + This is the actual server implementation that blocks until interrupted. + """ + console = Console() + grpc_server = _create_trace_server(host, grpc_port, stream_events=debug) + + if debug: + console.print() + console.print( + f"[bold {PLANO_COLOR}]Trace listener started[/bold {PLANO_COLOR}]" + ) + console.print( + f"[green]●[/green] gRPC (OTLP receiver) on [cyan]{host}:{grpc_port}[/cyan]" + ) + console.print( + "[dim]Debug mode: streaming trace events. Press Ctrl+C to stop.[/dim]" + ) + console.print() + + # Save PID for background mode + _save_listener_pid(os.getpid()) - console.print() - console.print(f"[bold {PLANO_COLOR}]Listening for traces...[/bold {PLANO_COLOR}]") - console.print( - f"[green]●[/green] gRPC (OTLP receiver) on [cyan]{host}:{grpc_port}[/cyan]" - ) - console.print("[dim]Press Ctrl+C to stop.[/dim]") - console.print() try: grpc_server.wait_for_termination() except KeyboardInterrupt: - pass + if debug: + console.print("\n[dim]Stopping trace listener...[/dim]") finally: grpc_server.stop(grace=2) + if os.path.exists(TRACE_LISTENER_PID_FILE): + os.remove(TRACE_LISTENER_PID_FILE) + + +def _start_trace_listener(host: str, grpc_port: int, debug: bool = False) -> None: + """Start the OTLP/gRPC listener. + + Args: + host: Host to bind to + grpc_port: Port to bind to + debug: If True, runs in foreground with event streaming. If False, daemonizes. + """ + console = Console() + + # Check if port is already in use + if _is_port_in_use(grpc_port): + existing_pid = _get_listener_pid() + if existing_pid: + console.print( + f"[yellow]⚠[/yellow] Trace listener already running on port [cyan]{grpc_port}[/cyan] (PID: {existing_pid})" + ) + else: + console.print( + f"[red]✗[/red] Port [cyan]{grpc_port}[/cyan] is already in use by another process" + ) + console.print(f"\n[dim]Check what's using the port:[/dim]") + console.print(f" [cyan]lsof -i :{grpc_port}[/cyan]") + return + + if debug: + # Run in foreground with debug output + _start_trace_listener_foreground(host, grpc_port, debug=True) + else: + # Fork to background + pid = os.fork() + if pid > 0: + # Parent process + time.sleep(0.5) # Give child time to start + if _is_port_in_use(grpc_port): + console.print() + console.print( + f"[bold {PLANO_COLOR}]Trace listener started[/bold {PLANO_COLOR}]" + ) + console.print( + f"[green]●[/green] gRPC (OTLP receiver) on [cyan]{host}:{grpc_port}[/cyan]" + ) + console.print(f"[dim]Process ID: {pid}[/dim]") + console.print( + "[dim]Use [cyan]planoai trace[/cyan] to view collected traces.[/dim]" + ) + console.print( + "[dim]Use [cyan]planoai trace listen --debug[/cyan] to see live events.[/dim]" + ) + console.print() + else: + console.print(f"[red]✗[/red] Failed to start trace listener") + return + else: + # Child process - become daemon + os.setsid() # Create new session + devnull = os.open(os.devnull, os.O_RDWR) + os.dup2(devnull, 0) + os.dup2(devnull, 1) + os.dup2(devnull, 2) + if devnull > 2: + os.close(devnull) + _start_trace_listener_foreground(host, grpc_port, debug=False) def start_trace_listener_background( @@ -534,7 +681,7 @@ def start_trace_listener_background( Returns the running ``grpc.Server`` so the caller can call ``server.stop()`` later. """ - return _create_trace_server(host, grpc_port) + return _create_trace_server(host, grpc_port, stream_events=False) def _span_time_ns(span: dict[str, Any], key: str) -> int: @@ -562,13 +709,13 @@ def _trace_summary(trace: dict[str, Any]) -> TraceSummary: def _service_color(service: str) -> str: service = service.lower() if "inbound" in service: - return "white" + return "#4860fa" if "outbound" in service: - return "white" + return "#57d9a9" if "orchestrator" in service: return PLANO_COLOR if "routing" in service: - return "magenta" + return "#e3a2fa" if "agent" in service: return "cyan" if "llm" in service: @@ -576,6 +723,79 @@ def _service_color(service: str) -> str: return "white" +def _error_symbol(status_code: str) -> str: + code = int(status_code) if status_code.isdigit() else 0 + + if code >= 500: + return "💥" # Server error - something broke + elif code == 429: + return "🚦" # Rate limited + elif code == 404: + return "🔍" # Not found + elif code == 403: + return "🚫" # Forbidden + elif code == 401: + return "🔐" # Unauthorized + elif code >= 400: + return "⚠️" # Client error + else: + return "❓" # Generic error + + +def _error_description(status_code: str) -> str: + """Return a developer-friendly description of the error.""" + code = int(status_code) if status_code.isdigit() else 0 + + if code == 500: + return "Internal Server Error" + elif code == 502: + return "Bad Gateway" + elif code == 503: + return "Service Unavailable" + elif code == 504: + return "Gateway Timeout" + elif code >= 500: + return "Server Error" + elif code == 429: + return "Rate Limit Exceeded" + elif code == 404: + return "Not Found" + elif code == 403: + return "Forbidden - Access Denied" + elif code == 401: + return "Unauthorized - Auth Required" + elif code == 400: + return "Bad Request" + elif code >= 400: + return "Client Error" + else: + return "Error" + + +def _detect_error(span: dict[str, Any]) -> tuple[bool, str, str]: + """Detect if span has an error and return (has_error, status_code, error_msg). + + Returns: + tuple: (has_error, status_code, error_description) + """ + attrs = _attrs(span) + status_code = attrs.get("http.status_code", "") + + # Check for non-2xx status codes + if status_code and status_code.isdigit(): + code = int(status_code) + if code >= 400: + return True, status_code, _error_description(status_code) + + # Check for explicit error attributes + if "error.message" in attrs: + return True, status_code or "unknown", attrs["error.message"] + if "exception.message" in attrs: + return True, status_code or "unknown", attrs["exception.message"] + + return False, "", "" + + # Attributes to show for inbound/outbound spans when not verbose (trimmed view). _INBOUND_OUTBOUND_ATTR_KEYS = ( "http.method", @@ -599,10 +819,20 @@ def _trim_attrs_for_display( def _sorted_attr_items(attrs: dict[str, str]) -> list[tuple[str, str]]: + # Error attributes always come first + error_priority = [ + "http.status_code", + "error.type", + "error.message", + "error.stack", + "exception.type", + "exception.message", + ] + + # Then regular priority attributes priority = [ "http.method", "http.target", - "http.status_code", "guid:x-request-id", "request_size", "response_size", @@ -619,7 +849,10 @@ def _sorted_attr_items(attrs: dict[str, str]) -> list[tuple[str, str]]: "llm.duration_ms", "llm.response_bytes", ] - prioritized = [(k, attrs[k]) for k in priority if k in attrs] + + # Combine error priority with regular priority + full_priority = error_priority + priority + prioritized = [(k, attrs[k]) for k in full_priority if k in attrs] prioritized_keys = {k for k, _ in prioritized} remaining = [(k, v) for k, v in attrs.items() if k not in prioritized_keys] remaining.sort(key=lambda item: item[0]) @@ -627,8 +860,14 @@ def _sorted_attr_items(attrs: dict[str, str]) -> list[tuple[str, str]]: def _display_attr_value(key: str, value: str) -> str: - if key == "http.status_code" and value != "200": - return f"{value} ⚠️" + if key == "http.status_code": + if value.isdigit(): + code = int(value) + if code >= 400: + return f"{value} {_error_symbol(value)}" + elif code >= 200 and code < 300: + return f"{value}" + return value return value @@ -648,7 +887,7 @@ def _build_tree(trace: dict[str, Any], console: Console, verbose: bool = False) ) spans.sort(key=lambda s: _span_time_ns(s, "startTimeUnixNano")) - tree = Tree("", guide_style="dim") + tree = Tree("", guide_style="dim #5b5a5c bold") for span in spans: service = span.get("service", "plano(unknown)") @@ -656,22 +895,52 @@ def _build_tree(trace: dict[str, Any], console: Console, verbose: bool = False) offset_ms = max( 0, (_span_time_ns(span, "startTimeUnixNano") - start_ns) / 1_000_000 ) - color = _service_color(service) - label = Text(f"{offset_ms:.0f}ms ", style="yellow") - label.append(service, style=f"bold {color}") - if name: - label.append(f" {name}", style="dim white") + + # Check for errors in this span + has_error, error_code, error_desc = _detect_error(span) + + if has_error: + # Create error banner above the span + error_banner = Text() + error_banner.append(error_desc, style="bright_red") + tree.add(error_banner) + + # Style the span label in light red + label = Text(f"{offset_ms:.0f}ms ", style="#ff6b6b") + label.append(service, style="bold #ff6b6b") + if name: + label.append(f" {name}", style="#ff6b6b italic") + else: + # Normal styling + color = _service_color(service) + label = Text(f"{offset_ms:.0f}ms ", style="#949c99") + label.append(service, style=f"bold {color}") + if name: + label.append(f" {name}", style="dim white bold italic") node = tree.add(label) attrs = _trim_attrs_for_display(_attrs(span), service, verbose) sorted_items = list(_sorted_attr_items(attrs)) for idx, (key, value) in enumerate(sorted_items): attr_line = Text() - attr_line.append(f"{key}: ", style="white") - attr_line.append( - _display_attr_value(key, str(value)), - style=f"{PLANO_COLOR}", - ) + # attribute key + attr_line.append(f"{key}: ", style="#a4a9aa") + # attribute value + if key == "http.status_code" and value.isdigit(): + val_int = int(value) + val_style = "bold red" if val_int >= 400 else "green" + attr_line.append(_display_attr_value(key, str(value)), style=val_style) + elif key in [ + "error.message", + "exception.message", + "error.type", + "exception.type", + ]: + attr_line.append(_display_attr_value(key, str(value)), style="red") + else: + attr_line.append( + _display_attr_value(key, str(value)), style=f"{PLANO_COLOR} bold" + ) if idx == len(sorted_items) - 1: attr_line.append("\n") node.add(attr_line) @@ -882,7 +1151,7 @@ def _run_trace_show( _build_tree(trace_obj, console, verbose=verbose) -@click.group(invoke_without_command=True) +@click.command() @click.argument("target", required=False) @click.option( "--filter", @@ -908,6 +1177,19 @@ def _run_trace_show( @click.option("--limit", type=int, default=None, help="Limit results.") @click.option("--since", default=None, help="Look back window (e.g. 5m, 2h, 1d).") @click.option("--json", "json_out", is_flag=True, help="Output raw JSON.") +@click.option("--host", default="0.0.0.0", show_default=True) +@click.option( + "--port", + type=int, + default=DEFAULT_GRPC_PORT, + show_default=True, + help="gRPC port for receiving OTLP traces when target is 'listen'.", +) +@click.option( + "--debug", + is_flag=True, + help="When used with target 'listen', keep terminal active and stream trace events.", +) @click.option( "--verbose", "-v", @@ -925,12 +1207,14 @@ def trace( limit, since, json_out, + host, + port, + debug, verbose, ): """Trace requests from the local OTLP listener.""" - if ctx.invoked_subcommand: - return - if target == "listen" and not any( + # Handle operational shortcuts when invoked as target values. + has_show_options = any( [ filter_patterns, where_filters, @@ -941,9 +1225,25 @@ def trace( json_out, verbose, ] - ): - _start_trace_listener("0.0.0.0", DEFAULT_GRPC_PORT) + ) + + if target == "listen" and not has_show_options: + _start_trace_listener(host, port, debug=debug) return + + if debug and target != "listen": + raise click.ClickException("--debug is only valid with target 'listen'.") + if (host != "0.0.0.0" or port != DEFAULT_GRPC_PORT) and target != "listen": + raise click.ClickException("--host/--port are only valid with target 'listen'.") + + if target in ("stop", "down") and not has_show_options: + console = Console() + if _stop_background_listener(): + console.print(f"[green]✓[/green] Trace listener stopped") + else: + console.print(f"[dim]No background trace listener running[/dim]") + return + _run_trace_show( target, filter_patterns, @@ -955,17 +1255,3 @@ def trace( json_out, verbose, ) - - -@trace.command("listen") -@click.option("--host", default="0.0.0.0", show_default=True) -@click.option( - "--port", - type=int, - default=DEFAULT_GRPC_PORT, - show_default=True, - help="gRPC port for receiving OTLP traces.", -) -def trace_listen(host: str, port: int) -> None: - """Listen for OTLP/gRPC traces.""" - _start_trace_listener(host, port) diff --git a/cli/test/source/failure.json b/cli/test/source/failure.json new file mode 100644 index 00000000..c9d309d5 --- /dev/null +++ b/cli/test/source/failure.json @@ -0,0 +1,1133 @@ +{ + "traces": [ + { + "trace_id": "f7a31829c4b5d6e8a9f0b1c2d3e4f5a6", + "spans": [ + { + "traceId": "f7a31829c4b5d6e8a9f0b1c2d3e4f5a6", + "spanId": "2e7269ca30eb05fa", + "parentSpanId": "d6e7de4dfc43c662", + "name": "POST archfc.katanemo.dev/v1/chat/completions", + "startTimeUnixNano": "1770937800292451000", + "endTimeUnixNano": "1770937800552403000", + "service": "plano(outbound)", + "attributes": [ + { + "key": "node_id", + "value": { + "stringValue": "" + } + }, + { + "key": "zone", + "value": { + "stringValue": "" + } + }, + { + "key": "guid:x-request-id", + "value": { + "stringValue": "1f2bde55-52fb-a792-a055-g3g2cfd76gbg" + } + }, + { + "key": "http.url", + "value": { + "stringValue": "https://archfc.katanemo.dev/v1/chat/completions" + } + }, + { + "key": "http.method", + "value": { + "stringValue": "POST" + } + }, + { + "key": "downstream_cluster", + "value": { + "stringValue": "-" + } + }, + { + "key": "user_agent", + "value": { + "stringValue": "-" + } + }, + { + "key": "http.protocol", + "value": { + "stringValue": "HTTP/1.1" + } + }, + { + "key": "peer.address", + "value": { + "stringValue": "127.0.0.1" + } + }, + { + "key": "request_size", + "value": { + "stringValue": "3293" + } + }, + { + "key": "response_size", + "value": { + "stringValue": "0" + } + }, + { + "key": "component", + "value": { + "stringValue": "proxy" + } + }, + { + "key": "upstream_cluster", + "value": { + "stringValue": "arch" + } + }, + { + "key": "upstream_cluster.name", + "value": { + "stringValue": "arch" + } + }, + { + "key": "http.status_code", + "value": { + "stringValue": "503" + } + }, + { + "key": "response_flags", + "value": { + "stringValue": "UF,URX" + } + } + ] + }, + { + "traceId": "f7a31829c4b5d6e8a9f0b1c2d3e4f5a6", + "spanId": "5345g8a4b88b51d9", + "parentSpanId": "556g979d6d47395f", + "name": "routing", + "startTimeUnixNano": "1770937800576995630", + "endTimeUnixNano": "1770937800577104880", + "service": "plano(routing)", + "attributes": [ + { + "key": "component", + "value": { + "stringValue": "routing" + } + }, + { + "key": "http.method", + "value": { + "stringValue": "POST" + } + }, + { + "key": "http.target", + "value": { + "stringValue": "/v1/chat/completions" + } + }, + { + "key": "model.requested", + "value": { + "stringValue": "openai/gpt-4o-mini" + } + }, + { + "key": "model.alias_resolved", + "value": { + "stringValue": "openai/gpt-4o-mini" + } + }, + { + "key": "service.name.override", + "value": { + "stringValue": "plano(routing)" + } + }, + { + "key": "routing.determination_ms", + "value": { + "intValue": "0" + } + }, + { + "key": "route.selected_model", + "value": { + "stringValue": "none" + } + } + ] + }, + { + "traceId": "f7a31829c4b5d6e8a9f0b1c2d3e4f5a6", + "spanId": "556g979d6d47395f", + "parentSpanId": "9422e3356e969f82", + "name": "POST /v1/chat/completions openai/gpt-4o-mini", + "startTimeUnixNano": "1770937800576869630", + "endTimeUnixNano": "1770937800878245214", + "service": "plano(llm)", + "attributes": [ + { + "key": "component", + "value": { + "stringValue": "llm" + } + }, + { + "key": "request_id", + "value": { + "stringValue": "1f2bde55-52fb-a792-a055-g3g2cfd76gbg" + } + }, + { + "key": "http.method", + "value": { + "stringValue": "POST" + } + }, + { + "key": "http.path", + "value": { + "stringValue": "/v1/chat/completions" + } + }, + { + "key": "service.name.override", + "value": { + "stringValue": "plano(llm)" + } + }, + { + "key": "llm.temperature", + "value": { + "stringValue": "0.1" + } + }, + { + "key": "llm.user_message_preview", + "value": { + "stringValue": "What's the weather in Seattle?" + } + }, + { + "key": "llm.model", + "value": { + "stringValue": "openai/gpt-4o-mini" + } + }, + { + "key": "service.name.override", + "value": { + "stringValue": "plano(llm)" + } + }, + { + "key": "llm.error", + "value": { + "stringValue": "upstream_connection_failure" + } + }, + { + "key": "signals.quality", + "value": { + "stringValue": "Error" + } + } + ] + }, + { + "traceId": "f7a31829c4b5d6e8a9f0b1c2d3e4f5a6", + "spanId": "eb459c08901b7d0c", + "parentSpanId": "", + "name": "POST /v1/chat/completions", + "startTimeUnixNano": "1770937800183402000", + "endTimeUnixNano": "1770937800981234000", + "service": "plano(inbound)", + "attributes": [ + { + "key": "node_id", + "value": { + "stringValue": "" + } + }, + { + "key": "zone", + "value": { + "stringValue": "" + } + }, + { + "key": "guid:x-request-id", + "value": { + "stringValue": "1f2bde55-52fb-a792-a055-g3g2cfd76gbg" + } + }, + { + "key": "http.url", + "value": { + "stringValue": "https://localhost/v1/chat/completions" + } + }, + { + "key": "http.method", + "value": { + "stringValue": "POST" + } + }, + { + "key": "downstream_cluster", + "value": { + "stringValue": "-" + } + }, + { + "key": "user_agent", + "value": { + "stringValue": "Python/3.11 aiohttp/3.13.2" + } + }, + { + "key": "http.protocol", + "value": { + "stringValue": "HTTP/1.1" + } + }, + { + "key": "peer.address", + "value": { + "stringValue": "172.18.0.1" + } + }, + { + "key": "request_size", + "value": { + "stringValue": "125" + } + }, + { + "key": "response_size", + "value": { + "stringValue": "156" + } + }, + { + "key": "component", + "value": { + "stringValue": "proxy" + } + }, + { + "key": "upstream_cluster", + "value": { + "stringValue": "bright_staff" + } + }, + { + "key": "upstream_cluster.name", + "value": { + "stringValue": "bright_staff" + } + }, + { + "key": "http.status_code", + "value": { + "stringValue": "503" + } + }, + { + "key": "response_flags", + "value": { + "stringValue": "UF" + } + } + ] + }, + { + "traceId": "f7a31829c4b5d6e8a9f0b1c2d3e4f5a6", + "spanId": "9422e3356e969f82", + "parentSpanId": "d6e7de4dfc43c662", + "name": "weather_agent /v1/chat/completions", + "startTimeUnixNano": "1770937800553490130", + "endTimeUnixNano": "1770937800879123299", + "service": "plano(agent)", + "attributes": [ + { + "key": "agent_id", + "value": { + "stringValue": "weather_agent" + } + }, + { + "key": "message_count", + "value": { + "stringValue": "1" + } + }, + { + "key": "service.name.override", + "value": { + "stringValue": "plano(agent)" + } + }, + { + "key": "agent.error", + "value": { + "stringValue": "llm_call_failed" + } + } + ] + }, + { + "traceId": "f7a31829c4b5d6e8a9f0b1c2d3e4f5a6", + "spanId": "d6e7de4dfc43c662", + "parentSpanId": "eb459c08901b7d0c", + "name": "travel_booking_service", + "startTimeUnixNano": "1770937800188669630", + "endTimeUnixNano": "1770937800979234091", + "service": "plano(orchestrator)", + "attributes": [ + { + "key": "component", + "value": { + "stringValue": "orchestrator" + } + }, + { + "key": "request_id", + "value": { + "stringValue": "1f2bde55-52fb-a792-a055-g3g2cfd76gbg" + } + }, + { + "key": "http.method", + "value": { + "stringValue": "POST" + } + }, + { + "key": "http.path", + "value": { + "stringValue": "/agents/v1/chat/completions" + } + }, + { + "key": "service.name.override", + "value": { + "stringValue": "plano(orchestrator)" + } + }, + { + "key": "selection.listener", + "value": { + "stringValue": "travel_booking_service" + } + }, + { + "key": "selection.agent_count", + "value": { + "intValue": "1" + } + }, + { + "key": "selection.agents", + "value": { + "stringValue": "weather_agent" + } + }, + { + "key": "selection.determination_ms", + "value": { + "stringValue": "264.48" + } + } + ] + } + ] + }, + { + "trace_id": "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6", + "spans": [ + { + "traceId": "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6", + "spanId": "3f8379db41fc16gb", + "parentSpanId": "e7f8ef5efd54d773", + "name": "POST archfc.katanemo.dev/v1/chat/completions", + "startTimeUnixNano": "1770937900292451000", + "endTimeUnixNano": "1770937900552403000", + "service": "plano(outbound)", + "attributes": [ + { + "key": "node_id", + "value": { + "stringValue": "" + } + }, + { + "key": "zone", + "value": { + "stringValue": "" + } + }, + { + "key": "guid:x-request-id", + "value": { + "stringValue": "2g3cef66-63gc-b8a3-b166-h4h3dge87hch" + } + }, + { + "key": "http.url", + "value": { + "stringValue": "https://archfc.katanemo.dev/v1/chat/completions" + } + }, + { + "key": "http.method", + "value": { + "stringValue": "POST" + } + }, + { + "key": "downstream_cluster", + "value": { + "stringValue": "-" + } + }, + { + "key": "user_agent", + "value": { + "stringValue": "-" + } + }, + { + "key": "http.protocol", + "value": { + "stringValue": "HTTP/1.1" + } + }, + { + "key": "peer.address", + "value": { + "stringValue": "127.0.0.1" + } + }, + { + "key": "request_size", + "value": { + "stringValue": "3293" + } + }, + { + "key": "response_size", + "value": { + "stringValue": "341" + } + }, + { + "key": "component", + "value": { + "stringValue": "proxy" + } + }, + { + "key": "upstream_cluster", + "value": { + "stringValue": "arch" + } + }, + { + "key": "upstream_cluster.name", + "value": { + "stringValue": "arch" + } + }, + { + "key": "http.status_code", + "value": { + "stringValue": "200" + } + }, + { + "key": "response_flags", + "value": { + "stringValue": "-" + } + } + ] + }, + { + "traceId": "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6", + "spanId": "6456h9b5c99c62ea", + "parentSpanId": "667ha8ae7e58406g", + "name": "routing", + "startTimeUnixNano": "1770937900576995630", + "endTimeUnixNano": "1770937900577104880", + "service": "plano(routing)", + "attributes": [ + { + "key": "component", + "value": { + "stringValue": "routing" + } + }, + { + "key": "http.method", + "value": { + "stringValue": "POST" + } + }, + { + "key": "http.target", + "value": { + "stringValue": "/v1/chat/completions" + } + }, + { + "key": "model.requested", + "value": { + "stringValue": "openai/gpt-4o-mini" + } + }, + { + "key": "model.alias_resolved", + "value": { + "stringValue": "openai/gpt-4o-mini" + } + }, + { + "key": "service.name.override", + "value": { + "stringValue": "plano(routing)" + } + }, + { + "key": "routing.determination_ms", + "value": { + "intValue": "0" + } + }, + { + "key": "route.selected_model", + "value": { + "stringValue": "none" + } + } + ] + }, + { + "traceId": "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6", + "spanId": "667ha8ae7e58406g", + "parentSpanId": "a533f4467f07ag93", + "name": "POST /v1/chat/completions openai/gpt-4o-mini", + "startTimeUnixNano": "1770937900576869630", + "endTimeUnixNano": "1770937900789123214", + "service": "plano(llm)", + "attributes": [ + { + "key": "component", + "value": { + "stringValue": "llm" + } + }, + { + "key": "request_id", + "value": { + "stringValue": "2g3cef66-63gc-b8a3-b166-h4h3dge87hch" + } + }, + { + "key": "http.method", + "value": { + "stringValue": "POST" + } + }, + { + "key": "http.path", + "value": { + "stringValue": "/v1/chat/completions" + } + }, + { + "key": "service.name.override", + "value": { + "stringValue": "plano(llm)" + } + }, + { + "key": "llm.temperature", + "value": { + "stringValue": "0.1" + } + }, + { + "key": "llm.user_message_preview", + "value": { + "stringValue": "What's the weather in Seattle?" + } + }, + { + "key": "llm.model", + "value": { + "stringValue": "openai/gpt-4o-mini" + } + }, + { + "key": "service.name.override", + "value": { + "stringValue": "plano(llm)" + } + }, + { + "key": "llm.time_to_first_token", + "value": { + "intValue": "201" + } + }, + { + "key": "signals.quality", + "value": { + "stringValue": "Good" + } + } + ] + }, + { + "traceId": "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6", + "spanId": "fc560d19a12c8e1d", + "parentSpanId": "", + "name": "POST /v1/chat/completions", + "startTimeUnixNano": "1770937900183402000", + "endTimeUnixNano": "1770937900892345000", + "service": "plano(inbound)", + "attributes": [ + { + "key": "node_id", + "value": { + "stringValue": "" + } + }, + { + "key": "zone", + "value": { + "stringValue": "" + } + }, + { + "key": "guid:x-request-id", + "value": { + "stringValue": "2g3cef66-63gc-b8a3-b166-h4h3dge87hch" + } + }, + { + "key": "http.url", + "value": { + "stringValue": "https://localhost/v1/chat/completions" + } + }, + { + "key": "http.method", + "value": { + "stringValue": "POST" + } + }, + { + "key": "downstream_cluster", + "value": { + "stringValue": "-" + } + }, + { + "key": "user_agent", + "value": { + "stringValue": "Python/3.11 aiohttp/3.13.2" + } + }, + { + "key": "http.protocol", + "value": { + "stringValue": "HTTP/1.1" + } + }, + { + "key": "peer.address", + "value": { + "stringValue": "172.18.0.1" + } + }, + { + "key": "request_size", + "value": { + "stringValue": "125" + } + }, + { + "key": "response_size", + "value": { + "stringValue": "234" + } + }, + { + "key": "component", + "value": { + "stringValue": "proxy" + } + }, + { + "key": "upstream_cluster", + "value": { + "stringValue": "bright_staff" + } + }, + { + "key": "upstream_cluster.name", + "value": { + "stringValue": "bright_staff" + } + }, + { + "key": "http.status_code", + "value": { + "stringValue": "429" + } + }, + { + "key": "response_flags", + "value": { + "stringValue": "RL" + } + } + ] + }, + { + "traceId": "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6", + "spanId": "71617cb8b71d62ab", + "parentSpanId": "667ha8ae7e58406g", + "name": "POST api.openai.com/v1/chat/completions", + "startTimeUnixNano": "1770937900589205000", + "endTimeUnixNano": "1770937900787456000", + "service": "plano(outbound)", + "attributes": [ + { + "key": "node_id", + "value": { + "stringValue": "" + } + }, + { + "key": "zone", + "value": { + "stringValue": "" + } + }, + { + "key": "guid:x-request-id", + "value": { + "stringValue": "2g3cef66-63gc-b8a3-b166-h4h3dge87hch" + } + }, + { + "key": "http.url", + "value": { + "stringValue": "https://api.openai.com/v1/chat/completions" + } + }, + { + "key": "http.method", + "value": { + "stringValue": "POST" + } + }, + { + "key": "downstream_cluster", + "value": { + "stringValue": "-" + } + }, + { + "key": "user_agent", + "value": { + "stringValue": "AsyncOpenAI/Python 2.17.0" + } + }, + { + "key": "http.protocol", + "value": { + "stringValue": "HTTP/1.1" + } + }, + { + "key": "peer.address", + "value": { + "stringValue": "127.0.0.1" + } + }, + { + "key": "request_size", + "value": { + "stringValue": "930" + } + }, + { + "key": "response_size", + "value": { + "stringValue": "346" + } + }, + { + "key": "component", + "value": { + "stringValue": "proxy" + } + }, + { + "key": "upstream_cluster", + "value": { + "stringValue": "openai" + } + }, + { + "key": "upstream_cluster.name", + "value": { + "stringValue": "openai" + } + }, + { + "key": "http.status_code", + "value": { + "stringValue": "200" + } + }, + { + "key": "response_flags", + "value": { + "stringValue": "-" + } + } + ] + }, + { + "traceId": "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6", + "spanId": "a533f4467f07ag93", + "parentSpanId": "e7f8ef5efd54d773", + "name": "weather_agent /v1/chat/completions", + "startTimeUnixNano": "1770937900553490130", + "endTimeUnixNano": "1770937900790234299", + "service": "plano(agent)", + "attributes": [ + { + "key": "agent_id", + "value": { + "stringValue": "weather_agent" + } + }, + { + "key": "message_count", + "value": { + "stringValue": "1" + } + }, + { + "key": "service.name.override", + "value": { + "stringValue": "plano(agent)" + } + } + ] + }, + { + "traceId": "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6", + "spanId": "e7f8ef5efd54d773", + "parentSpanId": "fc560d19a12c8e1d", + "name": "travel_booking_service", + "startTimeUnixNano": "1770937900188669630", + "endTimeUnixNano": "1770937900790456091", + "service": "plano(orchestrator)", + "attributes": [ + { + "key": "component", + "value": { + "stringValue": "orchestrator" + } + }, + { + "key": "request_id", + "value": { + "stringValue": "2g3cef66-63gc-b8a3-b166-h4h3dge87hch" + } + }, + { + "key": "http.method", + "value": { + "stringValue": "POST" + } + }, + { + "key": "http.path", + "value": { + "stringValue": "/agents/v1/chat/completions" + } + }, + { + "key": "service.name.override", + "value": { + "stringValue": "plano(orchestrator)" + } + }, + { + "key": "selection.listener", + "value": { + "stringValue": "travel_booking_service" + } + }, + { + "key": "selection.agent_count", + "value": { + "intValue": "1" + } + }, + { + "key": "selection.agents", + "value": { + "stringValue": "weather_agent" + } + }, + { + "key": "selection.determination_ms", + "value": { + "stringValue": "264.48" + } + }, + { + "key": "orchestrator.rate_limited", + "value": { + "stringValue": "true" + } + } + ] + } + ] + }, + { + "trace_id": "b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7", + "spans": [ + { + "traceId": "b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7", + "spanId": "gd671e2ab23d9f2e", + "parentSpanId": "", + "name": "POST /v1/chat/completions", + "startTimeUnixNano": "1770938000183402000", + "endTimeUnixNano": "1770938000245678000", + "service": "plano(inbound)", + "attributes": [ + { + "key": "node_id", + "value": { + "stringValue": "" + } + }, + { + "key": "zone", + "value": { + "stringValue": "" + } + }, + { + "key": "guid:x-request-id", + "value": { + "stringValue": "3h4dfg77-74hd-c9b4-c277-i5i4ehf98idi" + } + }, + { + "key": "http.url", + "value": { + "stringValue": "https://localhost/v1/chat/completions" + } + }, + { + "key": "http.method", + "value": { + "stringValue": "POST" + } + }, + { + "key": "downstream_cluster", + "value": { + "stringValue": "-" + } + }, + { + "key": "user_agent", + "value": { + "stringValue": "Python/3.11 aiohttp/3.13.2" + } + }, + { + "key": "http.protocol", + "value": { + "stringValue": "HTTP/1.1" + } + }, + { + "key": "peer.address", + "value": { + "stringValue": "172.18.0.1" + } + }, + { + "key": "request_size", + "value": { + "stringValue": "125" + } + }, + { + "key": "response_size", + "value": { + "stringValue": "187" + } + }, + { + "key": "component", + "value": { + "stringValue": "proxy" + } + }, + { + "key": "upstream_cluster", + "value": { + "stringValue": "bright_staff" + } + }, + { + "key": "upstream_cluster.name", + "value": { + "stringValue": "bright_staff" + } + }, + { + "key": "http.status_code", + "value": { + "stringValue": "500" + } + }, + { + "key": "response_flags", + "value": { + "stringValue": "UH" + } + } + ] + } + ] + } + ] + } diff --git a/cli/test/source/success.json b/cli/test/source/success.json new file mode 100644 index 00000000..506de2e8 --- /dev/null +++ b/cli/test/source/success.json @@ -0,0 +1,803 @@ +{ + "traces": [ + { + "trace_id": "86f21585168a31a23578d77096cc143b", + "spans": [ + { + "traceId": "86f21585168a31a23578d77096cc143b", + "spanId": "1d6159b920daf4e9", + "parentSpanId": "c5d6cd3cfb32b551", + "name": "POST archfc.katanemo.dev/v1/chat/completions", + "startTimeUnixNano": "1770937700292451000", + "endTimeUnixNano": "1770937700552403000", + "service": "plano(outbound)", + "attributes": [ + { + "key": "node_id", + "value": { + "stringValue": "" + } + }, + { + "key": "zone", + "value": { + "stringValue": "" + } + }, + { + "key": "guid:x-request-id", + "value": { + "stringValue": "0e1acd44-41ea-9681-9944-f2f1bec65faf" + } + }, + { + "key": "http.url", + "value": { + "stringValue": "https://archfc.katanemo.dev/v1/chat/completions" + } + }, + { + "key": "http.method", + "value": { + "stringValue": "POST" + } + }, + { + "key": "downstream_cluster", + "value": { + "stringValue": "-" + } + }, + { + "key": "user_agent", + "value": { + "stringValue": "-" + } + }, + { + "key": "http.protocol", + "value": { + "stringValue": "HTTP/1.1" + } + }, + { + "key": "peer.address", + "value": { + "stringValue": "127.0.0.1" + } + }, + { + "key": "request_size", + "value": { + "stringValue": "3293" + } + }, + { + "key": "response_size", + "value": { + "stringValue": "341" + } + }, + { + "key": "component", + "value": { + "stringValue": "proxy" + } + }, + { + "key": "upstream_cluster", + "value": { + "stringValue": "arch" + } + }, + { + "key": "upstream_cluster.name", + "value": { + "stringValue": "arch" + } + }, + { + "key": "http.status_code", + "value": { + "stringValue": "200" + } + }, + { + "key": "response_flags", + "value": { + "stringValue": "-" + } + } + ] + }, + { + "traceId": "86f21585168a31a23578d77096cc143b", + "spanId": "4234f793a77a40c8", + "parentSpanId": "445f868c5c36294e", + "name": "routing", + "startTimeUnixNano": "1770937700576995630", + "endTimeUnixNano": "1770937700577104880", + "service": "plano(routing)", + "attributes": [ + { + "key": "component", + "value": { + "stringValue": "routing" + } + }, + { + "key": "http.method", + "value": { + "stringValue": "POST" + } + }, + { + "key": "http.target", + "value": { + "stringValue": "/v1/chat/completions" + } + }, + { + "key": "model.requested", + "value": { + "stringValue": "openai/gpt-4o-mini" + } + }, + { + "key": "model.alias_resolved", + "value": { + "stringValue": "openai/gpt-4o-mini" + } + }, + { + "key": "service.name.override", + "value": { + "stringValue": "plano(routing)" + } + }, + { + "key": "routing.determination_ms", + "value": { + "intValue": "0" + } + }, + { + "key": "route.selected_model", + "value": { + "stringValue": "none" + } + } + ] + }, + { + "traceId": "86f21585168a31a23578d77096cc143b", + "spanId": "445f868c5c36294e", + "parentSpanId": "8311d2245d859e71", + "name": "POST /v1/chat/completions openai/gpt-4o-mini", + "startTimeUnixNano": "1770937700576869630", + "endTimeUnixNano": "1770937701151370214", + "service": "plano(llm)", + "attributes": [ + { + "key": "component", + "value": { + "stringValue": "llm" + } + }, + { + "key": "request_id", + "value": { + "stringValue": "0e1acd44-41ea-9681-9944-f2f1bec65faf" + } + }, + { + "key": "http.method", + "value": { + "stringValue": "POST" + } + }, + { + "key": "http.path", + "value": { + "stringValue": "/v1/chat/completions" + } + }, + { + "key": "service.name.override", + "value": { + "stringValue": "plano(llm)" + } + }, + { + "key": "llm.temperature", + "value": { + "stringValue": "0.1" + } + }, + { + "key": "llm.user_message_preview", + "value": { + "stringValue": "What’s the weather in Seattle?" + } + }, + { + "key": "llm.model", + "value": { + "stringValue": "openai/gpt-4o-mini" + } + }, + { + "key": "service.name.override", + "value": { + "stringValue": "plano(llm)" + } + }, + { + "key": "llm.time_to_first_token", + "value": { + "intValue": "572" + } + }, + { + "key": "signals.quality", + "value": { + "stringValue": "Good" + } + } + ] + }, + { + "traceId": "86f21585168a31a23578d77096cc143b", + "spanId": "da348b97890a6c9b", + "parentSpanId": "", + "name": "POST /v1/chat/completions", + "startTimeUnixNano": "1770937700183402000", + "endTimeUnixNano": "1770937704394122000", + "service": "plano(inbound)", + "attributes": [ + { + "key": "node_id", + "value": { + "stringValue": "" + } + }, + { + "key": "zone", + "value": { + "stringValue": "" + } + }, + { + "key": "guid:x-request-id", + "value": { + "stringValue": "0e1acd44-41ea-9681-9944-f2f1bec65faf" + } + }, + { + "key": "http.url", + "value": { + "stringValue": "https://localhost/v1/chat/completions" + } + }, + { + "key": "http.method", + "value": { + "stringValue": "POST" + } + }, + { + "key": "downstream_cluster", + "value": { + "stringValue": "-" + } + }, + { + "key": "user_agent", + "value": { + "stringValue": "Python/3.11 aiohttp/3.13.2" + } + }, + { + "key": "http.protocol", + "value": { + "stringValue": "HTTP/1.1" + } + }, + { + "key": "peer.address", + "value": { + "stringValue": "172.18.0.1" + } + }, + { + "key": "request_size", + "value": { + "stringValue": "125" + } + }, + { + "key": "response_size", + "value": { + "stringValue": "34401" + } + }, + { + "key": "component", + "value": { + "stringValue": "proxy" + } + }, + { + "key": "upstream_cluster", + "value": { + "stringValue": "bright_staff" + } + }, + { + "key": "upstream_cluster.name", + "value": { + "stringValue": "bright_staff" + } + }, + { + "key": "http.status_code", + "value": { + "stringValue": "200" + } + }, + { + "key": "response_flags", + "value": { + "stringValue": "-" + } + } + ] + }, + { + "traceId": "86f21585168a31a23578d77096cc143b", + "spanId": "79a116cf7d63602a", + "parentSpanId": "8b6345129425cf4a", + "name": "POST api.openai.com/v1/chat/completions", + "startTimeUnixNano": "1770937702607128000", + "endTimeUnixNano": "1770937704391625000", + "service": "plano(outbound)", + "attributes": [ + { + "key": "node_id", + "value": { + "stringValue": "" + } + }, + { + "key": "zone", + "value": { + "stringValue": "" + } + }, + { + "key": "guid:x-request-id", + "value": { + "stringValue": "0e1acd44-41ea-9681-9944-f2f1bec65faf" + } + }, + { + "key": "http.url", + "value": { + "stringValue": "https://api.openai.com/v1/chat/completions" + } + }, + { + "key": "http.method", + "value": { + "stringValue": "POST" + } + }, + { + "key": "downstream_cluster", + "value": { + "stringValue": "-" + } + }, + { + "key": "user_agent", + "value": { + "stringValue": "AsyncOpenAI/Python 2.17.0" + } + }, + { + "key": "http.protocol", + "value": { + "stringValue": "HTTP/1.1" + } + }, + { + "key": "peer.address", + "value": { + "stringValue": "127.0.0.1" + } + }, + { + "key": "request_size", + "value": { + "stringValue": "1927" + } + }, + { + "key": "response_size", + "value": { + "stringValue": "20646" + } + }, + { + "key": "component", + "value": { + "stringValue": "proxy" + } + }, + { + "key": "upstream_cluster", + "value": { + "stringValue": "openai" + } + }, + { + "key": "upstream_cluster.name", + "value": { + "stringValue": "openai" + } + }, + { + "key": "http.status_code", + "value": { + "stringValue": "200" + } + }, + { + "key": "response_flags", + "value": { + "stringValue": "-" + } + } + ] + }, + { + "traceId": "86f21585168a31a23578d77096cc143b", + "spanId": "60508ba7960d51bc", + "parentSpanId": "445f868c5c36294e", + "name": "POST api.openai.com/v1/chat/completions", + "startTimeUnixNano": "1770937700589205000", + "endTimeUnixNano": "1770937701149191000", + "service": "plano(outbound)", + "attributes": [ + { + "key": "node_id", + "value": { + "stringValue": "" + } + }, + { + "key": "zone", + "value": { + "stringValue": "" + } + }, + { + "key": "guid:x-request-id", + "value": { + "stringValue": "0e1acd44-41ea-9681-9944-f2f1bec65faf" + } + }, + { + "key": "http.url", + "value": { + "stringValue": "https://api.openai.com/v1/chat/completions" + } + }, + { + "key": "http.method", + "value": { + "stringValue": "POST" + } + }, + { + "key": "downstream_cluster", + "value": { + "stringValue": "-" + } + }, + { + "key": "user_agent", + "value": { + "stringValue": "AsyncOpenAI/Python 2.17.0" + } + }, + { + "key": "http.protocol", + "value": { + "stringValue": "HTTP/1.1" + } + }, + { + "key": "peer.address", + "value": { + "stringValue": "127.0.0.1" + } + }, + { + "key": "request_size", + "value": { + "stringValue": "930" + } + }, + { + "key": "response_size", + "value": { + "stringValue": "346" + } + }, + { + "key": "component", + "value": { + "stringValue": "proxy" + } + }, + { + "key": "upstream_cluster", + "value": { + "stringValue": "openai" + } + }, + { + "key": "upstream_cluster.name", + "value": { + "stringValue": "openai" + } + }, + { + "key": "http.status_code", + "value": { + "stringValue": "200" + } + }, + { + "key": "response_flags", + "value": { + "stringValue": "-" + } + } + ] + }, + { + "traceId": "86f21585168a31a23578d77096cc143b", + "spanId": "8311d2245d859e71", + "parentSpanId": "c5d6cd3cfb32b551", + "name": "weather_agent /v1/chat/completions", + "startTimeUnixNano": "1770937700553490130", + "endTimeUnixNano": "1770937704393946299", + "service": "plano(agent)", + "attributes": [ + { + "key": "agent_id", + "value": { + "stringValue": "weather_agent" + } + }, + { + "key": "message_count", + "value": { + "stringValue": "1" + } + }, + { + "key": "service.name.override", + "value": { + "stringValue": "plano(agent)" + } + } + ] + }, + { + "traceId": "86f21585168a31a23578d77096cc143b", + "spanId": "9eb8a70a8c167f85", + "parentSpanId": "8b6345129425cf4a", + "name": "routing", + "startTimeUnixNano": "1770937702591610381", + "endTimeUnixNano": "1770937702592150423", + "service": "plano(routing)", + "attributes": [ + { + "key": "component", + "value": { + "stringValue": "routing" + } + }, + { + "key": "http.method", + "value": { + "stringValue": "POST" + } + }, + { + "key": "http.target", + "value": { + "stringValue": "/v1/chat/completions" + } + }, + { + "key": "model.requested", + "value": { + "stringValue": "openai/gpt-5.2" + } + }, + { + "key": "model.alias_resolved", + "value": { + "stringValue": "openai/gpt-5.2" + } + }, + { + "key": "service.name.override", + "value": { + "stringValue": "plano(routing)" + } + }, + { + "key": "routing.determination_ms", + "value": { + "intValue": "0" + } + }, + { + "key": "route.selected_model", + "value": { + "stringValue": "none" + } + } + ] + }, + { + "traceId": "86f21585168a31a23578d77096cc143b", + "spanId": "c5d6cd3cfb32b551", + "parentSpanId": "da348b97890a6c9b", + "name": "travel_booking_service", + "startTimeUnixNano": "1770937700188669630", + "endTimeUnixNano": "1770937704393949091", + "service": "plano(orchestrator)", + "attributes": [ + { + "key": "component", + "value": { + "stringValue": "orchestrator" + } + }, + { + "key": "request_id", + "value": { + "stringValue": "0e1acd44-41ea-9681-9944-f2f1bec65faf" + } + }, + { + "key": "http.method", + "value": { + "stringValue": "POST" + } + }, + { + "key": "http.path", + "value": { + "stringValue": "/agents/v1/chat/completions" + } + }, + { + "key": "service.name.override", + "value": { + "stringValue": "plano(orchestrator)" + } + }, + { + "key": "selection.listener", + "value": { + "stringValue": "travel_booking_service" + } + }, + { + "key": "selection.agent_count", + "value": { + "intValue": "1" + } + }, + { + "key": "selection.agents", + "value": { + "stringValue": "weather_agent" + } + }, + { + "key": "selection.determination_ms", + "value": { + "stringValue": "264.48" + } + } + ] + }, + { + "traceId": "86f21585168a31a23578d77096cc143b", + "spanId": "8b6345129425cf4a", + "parentSpanId": "8311d2245d859e71", + "name": "POST /v1/chat/completions openai/gpt-5.2", + "startTimeUnixNano": "1770937702591499256", + "endTimeUnixNano": "1770937704393043174", + "service": "plano(llm)", + "attributes": [ + { + "key": "component", + "value": { + "stringValue": "llm" + } + }, + { + "key": "request_id", + "value": { + "stringValue": "0e1acd44-41ea-9681-9944-f2f1bec65faf" + } + }, + { + "key": "http.method", + "value": { + "stringValue": "POST" + } + }, + { + "key": "http.path", + "value": { + "stringValue": "/v1/chat/completions" + } + }, + { + "key": "service.name.override", + "value": { + "stringValue": "plano(llm)" + } + }, + { + "key": "llm.temperature", + "value": { + "stringValue": "0.7" + } + }, + { + "key": "llm.user_message_preview", + "value": { + "stringValue": "What’s the weather in Seattle?\n\nWeather data for S..." + } + }, + { + "key": "llm.model", + "value": { + "stringValue": "openai/gpt-5.2" + } + }, + { + "key": "service.name.override", + "value": { + "stringValue": "plano(llm)" + } + }, + { + "key": "llm.time_to_first_token", + "value": { + "intValue": "506" + } + }, + { + "key": "signals.quality", + "value": { + "stringValue": "Good" + } + } + ] + } + ] + } + ] + } diff --git a/cli/test/test_trace_cmd.py b/cli/test/test_trace_cmd.py new file mode 100644 index 00000000..d05d1c91 --- /dev/null +++ b/cli/test/test_trace_cmd.py @@ -0,0 +1,392 @@ +import copy +import json +from pathlib import Path + +import pytest +from click.testing import CliRunner + +from planoai.trace_cmd import trace +import planoai.trace_cmd as trace_cmd + + +def _load_success_traces() -> list[dict]: + source_path = Path(__file__).parent / "source" / "success.json" + payload = json.loads(source_path.read_text(encoding="utf-8")) + return payload["traces"] + + +def _load_failure_traces() -> list[dict]: + source_path = Path(__file__).parent / "source" / "failure.json" + payload = json.loads(source_path.read_text(encoding="utf-8")) + return payload["traces"] + + +def _build_trace_set() -> list[dict]: + traces = copy.deepcopy(_load_success_traces()) + primary = traces[0] + + secondary = copy.deepcopy(primary) + secondary["trace_id"] = "1234567890abcdef1234567890abcdef" + for span in secondary.get("spans", []): + span["traceId"] = secondary["trace_id"] + if span.get("startTimeUnixNano", "").isdigit(): + span["startTimeUnixNano"] = str( + int(span["startTimeUnixNano"]) - 1_000_000_000 + ) + if span.get("endTimeUnixNano", "").isdigit(): + span["endTimeUnixNano"] = str(int(span["endTimeUnixNano"]) - 1_000_000_000) + + return [primary, secondary] + + +def _json_from_output(output: str) -> dict: + start = output.find("{") + if start == -1: + raise AssertionError(f"No JSON object found in output:\n{output}") + return json.loads(output[start:]) + + +@pytest.fixture +def runner() -> CliRunner: + return CliRunner() + + +@pytest.fixture +def traces() -> list[dict]: + return _build_trace_set() + + +@pytest.fixture +def failure_traces() -> list[dict]: + return copy.deepcopy(_load_failure_traces()) + + +def test_trace_listen_starts_listener_with_debug_and_custom_bind_after_target( + runner, monkeypatch +): + seen = {} + + def fake_start(host: str, port: int, debug: bool) -> None: + seen["host"] = host + seen["port"] = port + seen["debug"] = debug + + monkeypatch.setattr(trace_cmd, "_start_trace_listener", fake_start) + + result = runner.invoke( + trace, ["listen", "--host", "127.0.0.1", "--port", "9876", "--debug"] + ) + + assert result.exit_code == 0, result.output + assert seen == {"host": "127.0.0.1", "port": 9876, "debug": True} + + +def test_trace_listen_starts_listener_with_debug_before_target(runner, monkeypatch): + seen = {} + + def fake_start(host: str, port: int, debug: bool) -> None: + seen["host"] = host + seen["port"] = port + seen["debug"] = debug + + monkeypatch.setattr(trace_cmd, "_start_trace_listener", fake_start) + + result = runner.invoke(trace, ["--debug", "listen"]) + + assert result.exit_code == 0, result.output + assert seen == {"host": "0.0.0.0", "port": 4317, "debug": True} + + +def test_trace_down_prints_success_when_listener_stopped(runner, monkeypatch): + monkeypatch.setattr(trace_cmd, "_stop_background_listener", lambda: True) + + result = runner.invoke(trace, ["down"]) + + assert result.exit_code == 0, result.output + assert "Trace listener stopped" in result.output + + +def test_trace_down_prints_no_listener_when_not_running(runner, monkeypatch): + monkeypatch.setattr(trace_cmd, "_stop_background_listener", lambda: False) + + result = runner.invoke(trace, ["down"]) + + assert result.exit_code == 0, result.output + assert "No background trace listener running" in result.output + + +def test_trace_debug_requires_listen_target(runner): + result = runner.invoke(trace, ["--debug", "any"]) + + assert result.exit_code != 0 + assert "--debug is only valid with target 'listen'." in result.output + + +def test_trace_host_port_requires_listen_target(runner): + result = runner.invoke(trace, ["--host", "127.0.0.1", "any"]) + + assert result.exit_code != 0 + assert "--host/--port are only valid with target 'listen'." in result.output + + +def test_trace_default_target_uses_last_and_builds_first_trace( + runner, monkeypatch, traces +): + monkeypatch.setattr(trace_cmd, "_fetch_traces_raw", lambda: copy.deepcopy(traces)) + seen = {} + + def fake_build_tree(trace_obj, _console, verbose=False): + seen["trace_id"] = trace_obj["trace_id"] + seen["verbose"] = verbose + + monkeypatch.setattr(trace_cmd, "_build_tree", fake_build_tree) + + result = runner.invoke(trace, []) + + assert result.exit_code == 0, result.output + assert seen["trace_id"] == traces[0]["trace_id"] + assert seen["verbose"] is False + + +def test_trace_list_any_prints_short_trace_ids(runner, monkeypatch, traces): + monkeypatch.setattr(trace_cmd, "_fetch_traces_raw", lambda: copy.deepcopy(traces)) + + result = runner.invoke(trace, ["--list", "any"]) + + assert result.exit_code == 0, result.output + assert "Trace IDs:" in result.output + assert traces[0]["trace_id"][:8] in result.output + assert traces[1]["trace_id"][:8] in result.output + + +def test_trace_list_target_conflict_errors(runner, traces, monkeypatch): + monkeypatch.setattr(trace_cmd, "_fetch_traces_raw", lambda: copy.deepcopy(traces)) + + result = runner.invoke(trace, ["--list", traces[0]["trace_id"]]) + + assert result.exit_code != 0 + assert "Target and --list cannot be used together." in result.output + + +def test_trace_json_list_with_limit_outputs_trace_ids(runner, monkeypatch, traces): + monkeypatch.setattr(trace_cmd, "_fetch_traces_raw", lambda: copy.deepcopy(traces)) + + result = runner.invoke(trace, ["--list", "any", "--json", "--limit", "1"]) + + assert result.exit_code == 0, result.output + payload = _json_from_output(result.output) + assert payload == {"trace_ids": [traces[0]["trace_id"]]} + + +def test_trace_json_for_short_target_returns_one_trace(runner, monkeypatch, traces): + monkeypatch.setattr(trace_cmd, "_fetch_traces_raw", lambda: copy.deepcopy(traces)) + short_target = traces[0]["trace_id"][:8] + + result = runner.invoke(trace, [short_target, "--json"]) + + assert result.exit_code == 0, result.output + payload = _json_from_output(result.output) + assert len(payload["traces"]) == 1 + assert payload["traces"][0]["trace_id"] == traces[0]["trace_id"] + + +@pytest.mark.parametrize( + ("target", "message"), + [ + ("abc", "Trace ID must be 8 or 32 hex characters."), + ("00000000", "Short trace ID must be 8 hex characters."), + ("0" * 32, "Trace ID must be 32 hex characters."), + ], +) +def test_trace_target_validation_errors(runner, target, message): + result = runner.invoke(trace, [target]) + assert result.exit_code != 0 + assert message in result.output + + +def test_trace_where_invalid_format_errors(runner): + result = runner.invoke(trace, ["any", "--where", "bad-format"]) + + assert result.exit_code != 0 + assert "Invalid --where filter(s): bad-format. Use key=value." in result.output + + +def test_trace_where_unknown_key_errors(runner, monkeypatch, traces): + monkeypatch.setattr(trace_cmd, "_fetch_traces_raw", lambda: copy.deepcopy(traces)) + + result = runner.invoke(trace, ["any", "--where", "not.a.real.key=value"]) + + assert result.exit_code != 0 + assert "Unknown --where key(s): not.a.real.key" in result.output + + +def test_trace_where_filters_to_matching_trace(runner, monkeypatch, traces): + monkeypatch.setattr(trace_cmd, "_fetch_traces_raw", lambda: copy.deepcopy(traces)) + + result = runner.invoke( + trace, ["any", "--where", "agent_id=weather_agent", "--json"] + ) + + assert result.exit_code == 0, result.output + payload = _json_from_output(result.output) + assert [trace_item["trace_id"] for trace_item in payload["traces"]] == [ + traces[0]["trace_id"], + traces[1]["trace_id"], + ] + + +def test_trace_where_and_filters_can_exclude_all(runner, monkeypatch, traces): + monkeypatch.setattr(trace_cmd, "_fetch_traces_raw", lambda: copy.deepcopy(traces)) + + result = runner.invoke( + trace, + [ + "any", + "--where", + "agent_id=weather_agent", + "--where", + "http.status_code=500", + "--json", + ], + ) + + assert result.exit_code == 0, result.output + payload = _json_from_output(result.output) + assert payload == {"traces": []} + + +def test_trace_filter_restricts_attributes_by_pattern(runner, monkeypatch, traces): + monkeypatch.setattr(trace_cmd, "_fetch_traces_raw", lambda: copy.deepcopy(traces)) + + result = runner.invoke(trace, ["any", "--filter", "http.*", "--json"]) + + assert result.exit_code == 0, result.output + payload = _json_from_output(result.output) + for trace_item in payload["traces"]: + for span in trace_item["spans"]: + for attr in span.get("attributes", []): + assert attr["key"].startswith("http.") + + +def test_trace_filter_unmatched_warns_and_returns_unfiltered( + runner, monkeypatch, traces +): + monkeypatch.setattr(trace_cmd, "_fetch_traces_raw", lambda: copy.deepcopy(traces)) + + result = runner.invoke(trace, ["any", "--filter", "not-found-*", "--json"]) + + assert result.exit_code == 0, result.output + assert ( + "Filter key(s) not found: not-found-*. Returning unfiltered traces." + in result.output + ) + payload = _json_from_output(result.output) + assert len(payload["traces"]) == len(traces) + + +def test_trace_since_can_filter_out_old_traces(runner, monkeypatch, traces): + monkeypatch.setattr(trace_cmd, "_fetch_traces_raw", lambda: copy.deepcopy(traces)) + monkeypatch.setattr(trace_cmd.time, "time", lambda: 1_999_999_999.0) + + result = runner.invoke(trace, ["any", "--since", "1m", "--json"]) + + assert result.exit_code == 0, result.output + payload = _json_from_output(result.output) + assert payload == {"traces": []} + + +def test_trace_negative_limit_errors(runner): + result = runner.invoke(trace, ["any", "--limit", "-1"]) + + assert result.exit_code != 0 + assert "Limit must be greater than or equal to 0." in result.output + + +def test_trace_empty_data_prints_no_traces_found(runner, monkeypatch): + monkeypatch.setattr(trace_cmd, "_fetch_traces_raw", lambda: []) + + result = runner.invoke(trace, []) + + assert result.exit_code == 0, result.output + assert "No traces found." in result.output + + +def test_trace_invalid_filter_token_errors(runner): + result = runner.invoke(trace, ["any", "--filter", "http.method,"]) + + assert result.exit_code != 0 + assert "Filter contains empty tokens." in result.output + + +def test_trace_failure_json_any_contains_all_fixture_trace_ids( + runner, monkeypatch, failure_traces +): + monkeypatch.setattr( + trace_cmd, "_fetch_traces_raw", lambda: copy.deepcopy(failure_traces) + ) + + result = runner.invoke(trace, ["any", "--json"]) + + assert result.exit_code == 0, result.output + payload = _json_from_output(result.output) + assert [item["trace_id"] for item in payload["traces"]] == [ + "f7a31829c4b5d6e8a9f0b1c2d3e4f5a6", + "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6", + "b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7", + ] + + +@pytest.mark.parametrize( + ("status_code", "expected_trace_ids"), + [ + ("503", ["f7a31829c4b5d6e8a9f0b1c2d3e4f5a6"]), + ("429", ["a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6"]), + ("500", ["b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7"]), + ], +) +def test_trace_failure_where_status_filters_expected_traces( + runner, monkeypatch, failure_traces, status_code, expected_trace_ids +): + monkeypatch.setattr( + trace_cmd, "_fetch_traces_raw", lambda: copy.deepcopy(failure_traces) + ) + + result = runner.invoke( + trace, ["any", "--where", f"http.status_code={status_code}", "--json"] + ) + + assert result.exit_code == 0, result.output + payload = _json_from_output(result.output) + assert [item["trace_id"] for item in payload["traces"]] == expected_trace_ids + + +def test_trace_failure_default_render_shows_service_unavailable_banner( + runner, monkeypatch, failure_traces +): + monkeypatch.setattr( + trace_cmd, "_fetch_traces_raw", lambda: copy.deepcopy(failure_traces) + ) + + result = runner.invoke(trace, []) + + assert result.exit_code == 0, result.output + assert "Service Unavailable" in result.output + assert "503" in result.output + + +def test_trace_failure_filter_keeps_http_status_code_attributes( + runner, monkeypatch, failure_traces +): + monkeypatch.setattr( + trace_cmd, "_fetch_traces_raw", lambda: copy.deepcopy(failure_traces) + ) + + result = runner.invoke(trace, ["any", "--filter", "http.status_code", "--json"]) + + assert result.exit_code == 0, result.output + payload = _json_from_output(result.output) + assert payload["traces"], "Expected traces in failure fixture" + for trace_item in payload["traces"]: + for span in trace_item["spans"]: + keys = [attr["key"] for attr in span.get("attributes", [])] + assert set(keys).issubset({"http.status_code"}) diff --git a/cli/uv.lock b/cli/uv.lock index d7e6b3a0..f8f72721 100644 --- a/cli/uv.lock +++ b/cli/uv.lock @@ -337,7 +337,7 @@ wheels = [ [[package]] name = "planoai" -version = "0.4.4" +version = "0.4.6" source = { editable = "." } dependencies = [ { name = "click" },