From 46de89590b2a682e26a4a8e432111bc459311a7c Mon Sep 17 00:00:00 2001 From: Adil Hafeez Date: Mon, 9 Feb 2026 13:33:27 -0800 Subject: [PATCH] use standard tracing and logging in brightstaff (#721) --- .github/workflows/rust_tests.yml | 3 - Dockerfile | 2 +- archgw.code-workspace | 3 +- cli/planoai/config_generator.py | 27 + cli/planoai/consts.py | 1 + cli/planoai/docker_cli.py | 7 + cli/planoai/main.py | 3 +- config/arch_config_schema.yaml | 2 + config/docker-compose.dev.yaml | 2 +- config/envoy.template.yaml | 113 +---- crates/.vscode/launch.json | 5 +- crates/Cargo.lock | 177 +++---- crates/Cargo.toml | 4 - crates/brightstaff/Cargo.toml | 15 +- .../src/handlers/agent_chat_completions.rs | 403 ++++++++------- .../src/handlers/agent_selector.rs | 15 +- .../src/handlers/function_calling.rs | 52 +- .../src/handlers/integration_tests.rs | 10 +- crates/brightstaff/src/handlers/llm.rs | 302 +++++------- .../src/handlers/pipeline_processor.rs | 460 ++++-------------- .../src/handlers/response_handler.rs | 53 +- .../brightstaff/src/handlers/router_chat.rs | 122 +---- crates/brightstaff/src/handlers/utils.rs | 342 ++++++------- crates/brightstaff/src/main.rs | 46 +- crates/brightstaff/src/router/llm_router.rs | 26 +- .../src/router/orchestrator_model_v1.rs | 30 +- .../src/router/plano_orchestrator.rs | 46 +- .../brightstaff/src/router/router_model_v1.rs | 30 +- .../src/state/response_state_processor.rs | 81 ++- crates/brightstaff/src/tracing/mod.rs | 30 ++ .../src/tracing/service_name_exporter.rs | 187 +++++++ crates/brightstaff/src/utils/tracing.rs | 119 ++++- crates/common/Cargo.toml | 6 - crates/common/src/configuration.rs | 2 + crates/common/src/consts.rs | 1 - crates/common/src/traces/collector.rs | 292 ----------- crates/common/src/traces/mod.rs | 9 - .../src/traces/tests/mock_otel_collector.rs | 96 ---- crates/common/src/traces/tests/mod.rs | 4 - .../traces/tests/trace_integration_test.rs | 342 ------------- crates/llm_gateway/src/filter_context.rs | 9 +- crates/llm_gateway/src/stream_context.rs | 247 ++++++---- demos/use_cases/http_filter/README.md | 6 +- .../use_cases/http_filter/docker-compose.yaml | 17 +- .../use_cases/llm_routing/docker-compose.yaml | 31 +- demos/use_cases/mcp_filter/README.md | 6 +- .../use_cases/mcp_filter/docker-compose.yaml | 19 +- .../README.md | 10 +- .../config.yaml | 1 + .../docker-compose.yaml | 21 +- .../preference_based_routing/README.md | 4 +- .../docker-compose.yaml | 51 +- .../arch_config_full_reference_rendered.yaml | 1 + tests/e2e/docker-compose.yaml | 2 +- tests/rest/tracing.rest | 31 -- 55 files changed, 1494 insertions(+), 2432 deletions(-) create mode 100644 crates/brightstaff/src/tracing/service_name_exporter.rs delete mode 100644 crates/common/src/traces/collector.rs delete mode 100644 crates/common/src/traces/tests/mock_otel_collector.rs delete mode 100644 crates/common/src/traces/tests/mod.rs delete mode 100644 crates/common/src/traces/tests/trace_integration_test.rs delete mode 100644 tests/rest/tracing.rest diff --git a/.github/workflows/rust_tests.yml b/.github/workflows/rust_tests.yml index e871745c..d72e7af4 100644 --- a/.github/workflows/rust_tests.yml +++ b/.github/workflows/rust_tests.yml @@ -29,6 +29,3 @@ jobs: - name: Run unit tests run: cargo test --lib - - - name: Run trace integration tests - run: cargo test -p common --features trace-collection traces::tests::trace_integration_test diff --git a/Dockerfile b/Dockerfile index a9c7887f..2bd15377 100644 --- a/Dockerfile +++ b/Dockerfile @@ -40,7 +40,7 @@ COPY crates/brightstaff/src brightstaff/src RUN find common hermesllm brightstaff -name "*.rs" -exec touch {} + RUN cargo build --release -p brightstaff -FROM docker.io/envoyproxy/envoy:v1.36.4 AS envoy +FROM docker.io/envoyproxy/envoy:v1.37.0 AS envoy FROM python:3.13.6-slim AS arch diff --git a/archgw.code-workspace b/archgw.code-workspace index bd42f64a..366191ef 100644 --- a/archgw.code-workspace +++ b/archgw.code-workspace @@ -25,8 +25,7 @@ "[python]": { "editor.defaultFormatter": "ms-python.black-formatter", "editor.formatOnSave": true - }, - "rust-analyzer.cargo.features": ["trace-collection"] + } }, "extensions": { "recommendations": [ diff --git a/cli/planoai/config_generator.py b/cli/planoai/config_generator.py index 2157b714..27768546 100644 --- a/cli/planoai/config_generator.py +++ b/cli/planoai/config_generator.py @@ -6,6 +6,7 @@ import yaml from jsonschema import validate from urllib.parse import urlparse from copy import deepcopy +from planoai.consts import DEFAULT_OTEL_TRACING_GRPC_ENDPOINT SUPPORTED_PROVIDERS_WITH_BASE_URL = [ @@ -158,6 +159,32 @@ def validate_and_render_schema(): arch_tracing = config_yaml.get("tracing", {}) + # Resolution order: config yaml > OTEL_TRACING_GRPC_ENDPOINT env var > hardcoded default + opentracing_grpc_endpoint = arch_tracing.get( + "opentracing_grpc_endpoint", + os.environ.get( + "OTEL_TRACING_GRPC_ENDPOINT", DEFAULT_OTEL_TRACING_GRPC_ENDPOINT + ), + ) + # resolve env vars in opentracing_grpc_endpoint if present + if opentracing_grpc_endpoint and "$" in opentracing_grpc_endpoint: + opentracing_grpc_endpoint = os.path.expandvars(opentracing_grpc_endpoint) + print( + f"Resolved opentracing_grpc_endpoint to {opentracing_grpc_endpoint} after expanding environment variables" + ) + arch_tracing["opentracing_grpc_endpoint"] = opentracing_grpc_endpoint + # ensure that opentracing_grpc_endpoint is a valid URL if present and start with http and must not have any path + if opentracing_grpc_endpoint: + urlparse_result = urlparse(opentracing_grpc_endpoint) + if urlparse_result.scheme != "http": + raise Exception( + f"Invalid opentracing_grpc_endpoint {opentracing_grpc_endpoint}, scheme must be http" + ) + if urlparse_result.path and urlparse_result.path != "/": + raise Exception( + f"Invalid opentracing_grpc_endpoint {opentracing_grpc_endpoint}, path must be empty" + ) + llms_with_endpoint = [] llms_with_endpoint_cluster_names = set() updated_model_providers = [] diff --git a/cli/planoai/consts.py b/cli/planoai/consts.py index 52c92130..e51466f3 100644 --- a/cli/planoai/consts.py +++ b/cli/planoai/consts.py @@ -3,3 +3,4 @@ import os SERVICE_NAME_ARCHGW = "plano" PLANO_DOCKER_NAME = "plano" PLANO_DOCKER_IMAGE = os.getenv("PLANO_DOCKER_IMAGE", "katanemo/plano:0.4.4") +DEFAULT_OTEL_TRACING_GRPC_ENDPOINT = "http://host.docker.internal:4317" diff --git a/cli/planoai/docker_cli.py b/cli/planoai/docker_cli.py index 518606a6..599d57a1 100644 --- a/cli/planoai/docker_cli.py +++ b/cli/planoai/docker_cli.py @@ -116,11 +116,18 @@ def stream_gateway_logs(follow, service="plano"): def docker_validate_plano_schema(arch_config_file): + import os + + env = os.environ.copy() + env.pop("PATH", None) + env_args = [item for key, value in env.items() for item in ["-e", f"{key}={value}"]] + result = subprocess.run( [ "docker", "run", "--rm", + *env_args, "-v", f"{arch_config_file}:/app/arch_config.yaml:ro", "--entrypoint", diff --git a/cli/planoai/main.py b/cli/planoai/main.py index a11945b7..3ba28d52 100644 --- a/cli/planoai/main.py +++ b/cli/planoai/main.py @@ -27,6 +27,7 @@ from planoai.core import ( start_cli_agent, ) from planoai.consts import ( + DEFAULT_OTEL_TRACING_GRPC_ENDPOINT, PLANO_DOCKER_IMAGE, PLANO_DOCKER_NAME, SERVICE_NAME_ARCHGW, @@ -156,7 +157,7 @@ def up(file, path, foreground): # Set the ARCH_CONFIG_FILE environment variable env_stage = { - "OTEL_TRACING_HTTP_ENDPOINT": "http://host.docker.internal:4318/v1/traces", + "OTEL_TRACING_GRPC_ENDPOINT": DEFAULT_OTEL_TRACING_GRPC_ENDPOINT, } env = os.environ.copy() # Remove PATH variable if present diff --git a/config/arch_config_schema.yaml b/config/arch_config_schema.yaml index 003bb9b4..0f3cefb7 100644 --- a/config/arch_config_schema.yaml +++ b/config/arch_config_schema.yaml @@ -382,6 +382,8 @@ properties: type: integer trace_arch_internal: type: boolean + opentracing_grpc_endpoint: + type: string additionalProperties: false mode: type: string diff --git a/config/docker-compose.dev.yaml b/config/docker-compose.dev.yaml index c683cba3..2e061939 100644 --- a/config/docker-compose.dev.yaml +++ b/config/docker-compose.dev.yaml @@ -21,4 +21,4 @@ services: environment: - OPENAI_API_KEY=${OPENAI_API_KEY:?error} - MISTRAL_API_KEY=${MISTRAL_API_KEY:?error} - - OTEL_TRACING_HTTP_ENDPOINT=http://host.docker.internal:4318/v1/traces + - OTEL_TRACING_GRPC_ENDPOINT=http://host.docker.internal:4317 diff --git a/config/envoy.template.yaml b/config/envoy.template.yaml index ed9254ea..6fa623ac 100644 --- a/config/envoy.template.yaml +++ b/config/envoy.template.yaml @@ -54,6 +54,7 @@ static_resources: service_name: plano(inbound) random_sampling: value: {{ arch_tracing.random_sampling }} + operation: "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)%" {% endif %} stat_prefix: plano(inbound) codec_type: AUTO @@ -206,7 +207,7 @@ static_resources: - name: outbound_api_traffic address: socket_address: - address: 127.0.0.1 + address: 0.0.0.0 port_value: 11000 traffic_direction: OUTBOUND filter_chains: @@ -303,6 +304,7 @@ static_resources: service_name: plano(inbound) random_sampling: value: {{ arch_tracing.random_sampling }} + operation: "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)%" {% endif %} stat_prefix: {{ listener.name | replace(" ", "_") }}_traffic codec_type: AUTO @@ -388,21 +390,6 @@ static_resources: - name: envoy.filters.network.http_connection_manager typed_config: "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager - {% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %} - generate_request_id: true - tracing: - provider: - name: envoy.tracers.opentelemetry - typed_config: - "@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig - grpc_service: - envoy_grpc: - cluster_name: opentelemetry_collector - timeout: 0.250s - service_name: plano(outbound) - random_sampling: - value: {{ arch_tracing.random_sampling }} - {% endif %} stat_prefix: egress_traffic codec_type: AUTO scheme_header_transformation: @@ -470,50 +457,6 @@ static_resources: typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router -{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %} - - name: otel_collector_proxy - address: - socket_address: - address: 127.0.0.1 - port_value: 9903 - traffic_direction: OUTBOUND - filter_chains: - - filters: - - name: envoy.filters.network.http_connection_manager - typed_config: - "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager - stat_prefix: otel_proxy - codec_type: AUTO - # access_log: - # - name: envoy.access_loggers.file - # typed_config: - # "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog - # path: "/var/log/access_otel.log" - # format: | - # [%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%" - route_config: - name: otel_route - virtual_hosts: - - name: otel_backend - domains: ["*"] - routes: - - match: - prefix: "/v1/traces" - route: - cluster: opentelemetry_collector_http - timeout: 5s - retry_policy: - retry_on: "5xx,connect-failure,refused-stream,reset" - num_retries: 3 - per_try_timeout: 2s - host_selection_retry_max_attempts: 5 - retriable_status_codes: [500, 502, 503, 504] - http_filters: - - name: envoy.filters.http.router - typed_config: - "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router -{% endif %} - - name: egress_traffic_llm address: socket_address: @@ -524,6 +467,22 @@ static_resources: - name: envoy.filters.network.http_connection_manager typed_config: "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager + {% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %} + generate_request_id: true + tracing: + provider: + name: envoy.tracers.opentelemetry + typed_config: + "@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig + grpc_service: + envoy_grpc: + cluster_name: opentelemetry_collector + timeout: 0.250s + service_name: plano(outbound) + random_sampling: + value: {{ arch_tracing.random_sampling }} + operation: "%REQ(:METHOD)% %REQ(:AUTHORITY)%%REQ(X-ENVOY-ORIGINAL-PATH?:PATH)%" + {% endif %} stat_prefix: egress_traffic codec_type: AUTO scheme_header_transformation: @@ -1071,35 +1030,7 @@ static_resources: - endpoint: address: socket_address: - address: host.docker.internal - port_value: 4317 - - name: opentelemetry_collector_http - type: STRICT_DNS - dns_lookup_family: V4_ONLY - lb_policy: ROUND_ROBIN - typed_extension_protocol_options: - load_assignment: - cluster_name: opentelemetry_collector_http - endpoints: - - lb_endpoints: - - endpoint: - address: - socket_address: - address: host.docker.internal - port_value: 4318 - # Circuit breaker configuration to prevent overwhelming OTEL collector - circuit_breakers: - thresholds: - - priority: DEFAULT - max_connections: 100 - max_pending_requests: 100 - max_requests: 100 - max_retries: 3 - # Health checking and outlier detection - outlier_detection: - consecutive_5xx: 5 - interval: 10s - base_ejection_time: 30s - max_ejection_percent: 50 - enforcing_consecutive_5xx: 100 + {% set _otel_endpoint = arch_tracing.opentracing_grpc_endpoint | default('host.docker.internal:4317') | replace("http://", "") | replace("https://", "") %} + address: {{ _otel_endpoint.split(":") | first }} + port_value: {{ _otel_endpoint.split(":") | last }} {% endif %} diff --git a/crates/.vscode/launch.json b/crates/.vscode/launch.json index 56a29b46..1dc48199 100644 --- a/crates/.vscode/launch.json +++ b/crates/.vscode/launch.json @@ -13,7 +13,10 @@ "env": { "RUST_LOG": "debug", "RUST_BACKTRACE": "1", - "ARCH_CONFIG_PATH_RENDERED": "../demos/use_cases/preference_based_routing/arch_config_rendered.yaml" + "ARCH_CONFIG_PATH_RENDERED": "../demos/use_cases/multi_agent_with_crewai_langchain/config.yaml_rendered", + // "ARCH_CONFIG_PATH_RENDERED": "../demos/use_cases/preference_based_routing/config.yaml_rendered", + "OTEL_TRACING_GRPC_ENDPOINT": "http://localhost:4317", + "OTEL_TRACING_ENABLED": "true" }, "preLaunchTask": "rust: cargo build" } diff --git a/crates/Cargo.lock b/crates/Cargo.lock index 1dab5467..f2744ad2 100644 --- a/crates/Cargo.lock +++ b/crates/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "acap" @@ -195,7 +195,7 @@ dependencies = [ "serde_urlencoded", "sync_wrapper", "tokio", - "tower 0.5.2", + "tower", "tower-layer", "tower-service", "tracing", @@ -354,7 +354,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4" dependencies = [ "memchr", - "regex-automata 0.4.9", + "regex-automata", "serde", ] @@ -445,7 +445,6 @@ dependencies = [ "pretty_assertions", "proxy-wasm", "rand 0.8.5", - "reqwest", "serde", "serde_json", "serde_with", @@ -454,7 +453,6 @@ dependencies = [ "thiserror 1.0.69", "tiktoken-rs", "tokio", - "tracing", "url", "urlencoding", ] @@ -946,12 +944,6 @@ version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" -[[package]] -name = "glob" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" - [[package]] name = "governor" version = "0.6.3" @@ -1573,11 +1565,11 @@ checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" [[package]] name = "matchers" -version = "0.1.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" dependencies = [ - "regex-automata 0.1.10", + "regex-automata", ] [[package]] @@ -1719,12 +1711,11 @@ checksum = "38bf9645c8b145698bb0b18a4637dcacbc421ea49bef2317e4fd8065a387cf21" [[package]] name = "nu-ansi-term" -version = "0.46.0" +version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "overload", - "winapi", + "windows-sys 0.59.0", ] [[package]] @@ -1812,9 +1803,9 @@ dependencies = [ [[package]] name = "opentelemetry" -version = "0.29.1" +version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e87237e2775f74896f9ad219d26a2081751187eb7c9f5c58dde20a23b95d16c" +checksum = "b84bcd6ae87133e903af7ef497404dda70c60d0ea14895fc8a5e6722754fc2a0" dependencies = [ "futures-core", "futures-sink", @@ -1826,25 +1817,23 @@ dependencies = [ [[package]] name = "opentelemetry-http" -version = "0.29.0" +version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46d7ab32b827b5b495bd90fa95a6cb65ccc293555dcc3199ae2937d2d237c8ed" +checksum = "d7a6d09a73194e6b66df7c8f1b680f156d916a1a942abf2de06823dd02b7855d" dependencies = [ "async-trait", "bytes", "http 1.3.1", "opentelemetry", "reqwest", - "tracing", ] [[package]] name = "opentelemetry-otlp" -version = "0.29.0" +version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d899720fe06916ccba71c01d04ecd77312734e2de3467fd30d9d580c8ce85656" +checksum = "7a2366db2dca4d2ad033cad11e6ee42844fd727007af5ad04a1730f4cb8163bf" dependencies = [ - "futures-core", "http 1.3.1", "opentelemetry", "opentelemetry-http", @@ -1860,44 +1849,43 @@ dependencies = [ [[package]] name = "opentelemetry-proto" -version = "0.29.0" +version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c40da242381435e18570d5b9d50aca2a4f4f4d8e146231adb4e7768023309b3" +checksum = "a7175df06de5eaee9909d4805a3d07e28bb752c34cab57fa9cff549da596b30f" dependencies = [ "opentelemetry", "opentelemetry_sdk", "prost", "tonic", + "tonic-prost", ] [[package]] name = "opentelemetry-stdout" -version = "0.29.0" +version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7e27d446dabd68610ef0b77d07b102ecde827a4596ea9c01a4d3811e945b286" +checksum = "bc8887887e169414f637b18751487cce4e095be787d23fad13c454e2fb1b3811" dependencies = [ "chrono", - "futures-util", "opentelemetry", "opentelemetry_sdk", ] [[package]] name = "opentelemetry_sdk" -version = "0.29.0" +version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afdefb21d1d47394abc1ba6c57363ab141be19e27cc70d0e422b7f303e4d290b" +checksum = "e14ae4f5991976fd48df6d843de219ca6d31b01daaab2dad5af2badeded372bd" dependencies = [ "futures-channel", "futures-executor", "futures-util", - "glob", "opentelemetry", "percent-encoding", "rand 0.9.2", - "serde_json", "thiserror 2.0.12", - "tracing", + "tokio", + "tokio-stream", ] [[package]] @@ -1906,12 +1894,6 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" -[[package]] -name = "overload" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" - [[package]] name = "parking_lot" version = "0.12.4" @@ -2101,9 +2083,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.13.5" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" +checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568" dependencies = [ "bytes", "prost-derive", @@ -2111,9 +2093,9 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.13.5" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" +checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" dependencies = [ "anyhow", "itertools", @@ -2298,17 +2280,8 @@ checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ "aho-corasick", "memchr", - "regex-automata 0.4.9", - "regex-syntax 0.8.5", -] - -[[package]] -name = "regex-automata" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" -dependencies = [ - "regex-syntax 0.6.29", + "regex-automata", + "regex-syntax", ] [[package]] @@ -2319,15 +2292,9 @@ checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.8.5", + "regex-syntax", ] -[[package]] -name = "regex-syntax" -version = "0.6.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" - [[package]] name = "regex-syntax" version = "0.8.5" @@ -2375,7 +2342,7 @@ dependencies = [ "tokio-native-tls", "tokio-rustls 0.26.2", "tokio-util", - "tower 0.5.2", + "tower", "tower-http", "tower-service", "url", @@ -3205,9 +3172,9 @@ dependencies = [ [[package]] name = "tonic" -version = "0.12.3" +version = "0.14.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" +checksum = "eb7613188ce9f7df5bfe185db26c5814347d110db17920415cf2fbcad85e7203" dependencies = [ "async-trait", "base64 0.22.1", @@ -3220,33 +3187,24 @@ dependencies = [ "hyper-util", "percent-encoding", "pin-project", - "prost", + "sync_wrapper", "tokio", "tokio-stream", - "tower 0.4.13", + "tower", "tower-layer", "tower-service", "tracing", ] [[package]] -name = "tower" -version = "0.4.13" +name = "tonic-prost" +version = "0.14.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" +checksum = "66bd50ad6ce1252d87ef024b3d64fe4c3cf54a86fb9ef4c631fdd0ded7aeaa67" dependencies = [ - "futures-core", - "futures-util", - "indexmap 1.9.3", - "pin-project", - "pin-project-lite", - "rand 0.8.5", - "slab", - "tokio", - "tokio-util", - "tower-layer", - "tower-service", - "tracing", + "bytes", + "prost", + "tonic", ] [[package]] @@ -3257,9 +3215,12 @@ checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" dependencies = [ "futures-core", "futures-util", + "indexmap 2.9.0", "pin-project-lite", + "slab", "sync_wrapper", "tokio", + "tokio-util", "tower-layer", "tower-service", "tracing", @@ -3278,7 +3239,7 @@ dependencies = [ "http-body 1.0.1", "iri-string", "pin-project-lite", - "tower 0.5.2", + "tower", "tower-layer", "tower-service", ] @@ -3297,9 +3258,9 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" -version = "0.1.41" +version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" dependencies = [ "log", "pin-project-lite", @@ -3309,9 +3270,9 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.28" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", @@ -3320,9 +3281,9 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.33" +version = "0.1.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" dependencies = [ "once_cell", "valuable", @@ -3341,14 +3302,12 @@ dependencies = [ [[package]] name = "tracing-opentelemetry" -version = "0.30.0" +version = "0.32.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd8e764bd6f5813fd8bebc3117875190c5b0415be8f7f8059bffb6ecd979c444" +checksum = "1ac28f2d093c6c477eaa76b23525478f38de514fa9aeb1285738d4b97a9552fc" dependencies = [ "js-sys", - "once_cell", "opentelemetry", - "opentelemetry_sdk", "smallvec", "tracing", "tracing-core", @@ -3359,14 +3318,14 @@ dependencies = [ [[package]] name = "tracing-subscriber" -version = "0.3.19" +version = "0.3.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" +checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e" dependencies = [ "matchers", "nu-ansi-term", "once_cell", - "regex", + "regex-automata", "sharded-slab", "smallvec", "thread_local", @@ -3684,28 +3643,6 @@ dependencies = [ "web-sys", ] -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - [[package]] name = "windows-core" version = "0.61.2" diff --git a/crates/Cargo.toml b/crates/Cargo.toml index c22e252e..5cd6b29c 100644 --- a/crates/Cargo.toml +++ b/crates/Cargo.toml @@ -1,7 +1,3 @@ [workspace] resolver = "2" members = ["llm_gateway", "prompt_gateway", "common", "brightstaff", "hermesllm"] - -[workspace.metadata.rust-analyzer] -# Enable features for better IDE support -cargo.features = ["trace-collection"] diff --git a/crates/brightstaff/Cargo.toml b/crates/brightstaff/Cargo.toml index f48a8d68..5d986ffa 100644 --- a/crates/brightstaff/Cargo.toml +++ b/crates/brightstaff/Cargo.toml @@ -8,7 +8,7 @@ async-openai = "0.30.1" async-trait = "0.1" bytes = "1.10.1" chrono = "0.4" -common = { version = "0.1.0", path = "../common", features = ["trace-collection"] } +common = { version = "0.1.0", path = "../common" } eventsource-client = "0.15.0" eventsource-stream = "0.2.3" flate2 = "1.0" @@ -19,11 +19,11 @@ http-body = "1.0.1" http-body-util = "0.1.3" hyper = { version = "1.6.0", features = ["full"] } hyper-util = "0.1.11" -opentelemetry = "0.29.1" -opentelemetry-http = "0.29.0" -opentelemetry-otlp = {version="0.29.0", features=["trace", "tonic", "grpc-tonic"]} -opentelemetry-stdout = "0.29.0" -opentelemetry_sdk = "0.29.0" +opentelemetry = "0.31" +opentelemetry-http = "0.31" +opentelemetry-otlp = {version="0.31", features=["trace", "grpc-tonic"]} +opentelemetry-stdout = "0.31" +opentelemetry_sdk = { version = "0.31", features = ["rt-tokio"] } pretty_assertions = "1.4.1" rand = "0.9.2" reqwest = { version = "0.12.15", features = ["stream"] } @@ -38,6 +38,7 @@ tokio-postgres = { version = "0.7", features = ["with-serde_json-1"] } tokio-stream = "0.1" time = { version = "0.3", features = ["formatting", "macros"] } tracing = "0.1" +tracing-opentelemetry = "0.32.1" tracing-subscriber = { version = "0.3", features = ["env-filter"] } uuid = { version = "1.0", features = ["v4", "serde"] } @@ -45,5 +46,5 @@ uuid = { version = "1.0", features = ["v4", "serde"] } mockito = "1.0" tokio-stream = "0.1.17" tracing = "0.1.41" -tracing-opentelemetry = "0.30.0" +tracing-opentelemetry = "0.32.1" tracing-subscriber = { version = "0.3.19", features = ["env-filter", "fmt", "time"] } diff --git a/crates/brightstaff/src/handlers/agent_chat_completions.rs b/crates/brightstaff/src/handlers/agent_chat_completions.rs index 5ced34c0..adfdce02 100644 --- a/crates/brightstaff/src/handlers/agent_chat_completions.rs +++ b/crates/brightstaff/src/handlers/agent_chat_completions.rs @@ -1,9 +1,7 @@ use std::sync::Arc; -use std::time::{Instant, SystemTime}; +use std::time::Instant; use bytes::Bytes; -use common::consts::TRACE_PARENT_HEADER; -use common::traces::{generate_random_span_id, parse_traceparent, SpanBuilder, SpanKind}; use hermesllm::apis::OpenAIMessage; use hermesllm::clients::SupportedAPIsFromClient; use hermesllm::providers::request::ProviderRequest; @@ -11,14 +9,15 @@ use hermesllm::ProviderRequestType; use http_body_util::combinators::BoxBody; use http_body_util::BodyExt; use hyper::{Request, Response}; +use opentelemetry::trace::get_active_span; use serde::ser::Error as SerError; -use tracing::{debug, info, warn}; +use tracing::{debug, info, info_span, warn, Instrument}; use super::agent_selector::{AgentSelectionError, AgentSelector}; use super::pipeline_processor::{PipelineError, PipelineProcessor}; use super::response_handler::ResponseHandler; use crate::router::plano_orchestrator::OrchestratorService; -use crate::tracing::{http, operation_component, OperationNameBuilder}; +use crate::tracing::{operation_component, set_service_name}; /// Main errors for agent chat completions #[derive(Debug, thiserror::Error)] @@ -41,92 +40,122 @@ pub async fn agent_chat( _: String, agents_list: Arc>>>, listeners: Arc>>, - trace_collector: Arc, ) -> Result>, hyper::Error> { - match handle_agent_chat( - request, - orchestrator_service, - agents_list, - listeners, - trace_collector, - ) - .await + // Extract request_id from headers or generate a new one + let request_id: String = match request + .headers() + .get(common::consts::REQUEST_ID_HEADER) + .and_then(|h| h.to_str().ok()) + .map(|s| s.to_string()) { - Ok(response) => Ok(response), - Err(err) => { - // Check if this is a client error from the pipeline that should be cascaded - if let AgentFilterChainError::Pipeline(PipelineError::ClientError { - agent, - status, - body, - }) = &err - { - warn!( - "Client error from agent '{}' (HTTP {}): {}", - agent, status, body - ); + Some(id) => id, + None => uuid::Uuid::new_v4().to_string(), + }; - // Create error response with the original status code and body + // Create a span with request_id that will be included in all log lines + let request_span = info_span!( + "(orchestrator)", + component = "orchestrator", + request_id = %request_id, + http.method = %request.method(), + http.path = %request.uri().path() + ); + + // Execute the handler inside the span + async { + // Set service name for orchestrator operations + set_service_name(operation_component::ORCHESTRATOR); + + match handle_agent_chat_inner( + request, + orchestrator_service, + agents_list, + listeners, + request_id, + ) + .await + { + Ok(response) => Ok(response), + Err(err) => { + // Check if this is a client error from the pipeline that should be cascaded + if let AgentFilterChainError::Pipeline(PipelineError::ClientError { + agent, + status, + body, + }) = &err + { + warn!( + agent = %agent, + status = %status, + body = %body, + "client error from agent" + ); + + // Create error response with the original status code and body + let error_json = serde_json::json!({ + "error": "ClientError", + "agent": agent, + "status": status, + "agent_response": body + }); + + let json_string = error_json.to_string(); + let mut response = + Response::new(ResponseHandler::create_full_body(json_string)); + *response.status_mut() = hyper::StatusCode::from_u16(*status) + .unwrap_or(hyper::StatusCode::BAD_REQUEST); + response.headers_mut().insert( + hyper::header::CONTENT_TYPE, + "application/json".parse().unwrap(), + ); + return Ok(response); + } + + // Print detailed error information with full error chain for other errors + let mut error_chain = Vec::new(); + let mut current_error: &dyn std::error::Error = &err; + + // Collect the full error chain + loop { + error_chain.push(current_error.to_string()); + match current_error.source() { + Some(source) => current_error = source, + None => break, + } + } + + // Log the complete error chain + warn!(error_chain = ?error_chain, "agent chat error chain"); + warn!(root_error = ?err, "root error"); + + // Create structured error response as JSON let error_json = serde_json::json!({ - "error": "ClientError", - "agent": agent, - "status": status, - "agent_response": body + "error": { + "type": "AgentFilterChainError", + "message": err.to_string(), + "error_chain": error_chain, + "debug_info": format!("{:?}", err) + } }); - let json_string = error_json.to_string(); - let mut response = Response::new(ResponseHandler::create_full_body(json_string)); - *response.status_mut() = - hyper::StatusCode::from_u16(*status).unwrap_or(hyper::StatusCode::BAD_REQUEST); - response.headers_mut().insert( - hyper::header::CONTENT_TYPE, - "application/json".parse().unwrap(), - ); - return Ok(response); + // Log the error for debugging + info!(error = %error_json, "structured error info"); + + // Return JSON error response + Ok(ResponseHandler::create_json_error_response(&error_json)) } - - // Print detailed error information with full error chain for other errors - let mut error_chain = Vec::new(); - let mut current_error: &dyn std::error::Error = &err; - - // Collect the full error chain - loop { - error_chain.push(current_error.to_string()); - match current_error.source() { - Some(source) => current_error = source, - None => break, - } - } - - // Log the complete error chain - warn!("Agent chat error chain: {:#?}", error_chain); - warn!("Root error: {:?}", err); - - // Create structured error response as JSON - let error_json = serde_json::json!({ - "error": { - "type": "AgentFilterChainError", - "message": err.to_string(), - "error_chain": error_chain, - "debug_info": format!("{:?}", err) - } - }); - - // Log the error for debugging - info!("Structured error info: {}", error_json); - - // Return JSON error response - Ok(ResponseHandler::create_json_error_response(&error_json)) } } + .instrument(request_span) + .await } -async fn handle_agent_chat( +async fn handle_agent_chat_inner( request: Request, orchestrator_service: Arc, agents_list: Arc>>>, listeners: Arc>>, - trace_collector: Arc, + request_id: String, ) -> Result>, AgentFilterChainError> { // Initialize services let agent_selector = AgentSelector::new(orchestrator_service); @@ -140,14 +169,18 @@ async fn handle_agent_chat( .and_then(|name| name.to_str().ok()); // Find the appropriate listener - let listener = { + let listener: common::configuration::Listener = { let listeners = listeners.read().await; agent_selector .find_listener(listener_name, &listeners) .await? }; - info!("Handling request for listener: {}", listener.name); + get_active_span(|span| { + span.update_name(listener.name.to_string()); + }); + + info!(listener = %listener.name, "handling request"); // Parse request body let request_path = request @@ -162,12 +195,8 @@ async fn handle_agent_chat( let mut headers = request.headers().clone(); headers.remove(common::consts::ENVOY_ORIGINAL_PATH_HEADER); + // Set the request_id in headers if not already present if !headers.contains_key(common::consts::REQUEST_ID_HEADER) { - let request_id = uuid::Uuid::new_v4().to_string(); - info!( - "Request id not found in headers, generated new request id: {}", - request_id - ); headers.insert( common::consts::REQUEST_ID_HEADER, hyper::header::HeaderValue::from_str(&request_id).unwrap(), @@ -180,8 +209,8 @@ async fn handle_agent_chat( let chat_request_bytes = request.collect().await?.to_bytes(); debug!( - "Received request body (raw utf8): {}", - String::from_utf8_lossy(&chat_request_bytes) + body = %String::from_utf8_lossy(&chat_request_bytes), + "received request body" ); // Determine the API type from the endpoint @@ -195,7 +224,7 @@ async fn handle_agent_chat( let client_request = match ProviderRequestType::try_from((&chat_request_bytes[..], &api_type)) { Ok(request) => request, Err(err) => { - warn!("Failed to parse request as ProviderRequestType: {}", err); + warn!("failed to parse request as ProviderRequestType: {}", err); let err_msg = format!("Failed to parse request: {}", err); return Err(AgentFilterChainError::RequestParsing( serde_json::Error::custom(err_msg), @@ -205,12 +234,6 @@ async fn handle_agent_chat( let message: Vec = client_request.get_messages(); - // Extract trace parent for routing - let traceparent = request_headers - .iter() - .find(|(key, _)| key.as_str() == TRACE_PARENT_HEADER) - .map(|(_, value)| value.to_str().unwrap_or_default().to_string()); - let request_id = request_headers .get(common::consts::REQUEST_ID_HEADER) .and_then(|val| val.to_str().ok()) @@ -223,87 +246,58 @@ async fn handle_agent_chat( agent_selector.create_agent_map(agents) }; - // Parse trace parent to get trace_id and parent_span_id - let (trace_id, parent_span_id) = if let Some(ref tp) = traceparent { - parse_traceparent(tp) - } else { - (String::new(), None) - }; - // Select appropriate agents using arch orchestrator llm model - let selection_span_id = generate_random_span_id(); - let selection_start_time = SystemTime::now(); - let selection_start_instant = Instant::now(); - + let selection_start = Instant::now(); let selected_agents = agent_selector - .select_agents(&message, &listener, traceparent.clone(), request_id.clone()) + .select_agents(&message, &listener, request_id.clone()) .await?; - // Record agent selection span - let selection_end_time = SystemTime::now(); - let selection_elapsed = selection_start_instant.elapsed(); - let selection_operation_name = OperationNameBuilder::new() - .with_method("POST") - .with_path("/agents/select") - .with_target(&listener.name) - .build(); - - let mut selection_span_builder = SpanBuilder::new(&selection_operation_name) - .with_span_id(selection_span_id) - .with_kind(SpanKind::Internal) - .with_start_time(selection_start_time) - .with_end_time(selection_end_time) - .with_attribute(http::METHOD, "POST") - .with_attribute(http::TARGET, "/agents/select") - .with_attribute("selection.listener", listener.name.clone()) - .with_attribute("selection.agent_count", selected_agents.len().to_string()) - .with_attribute( + // Record selection attributes on the current orchestrator span + let selection_elapsed_ms = selection_start.elapsed().as_secs_f64() * 1000.0; + get_active_span(|span| { + span.set_attribute(opentelemetry::KeyValue::new( + "selection.listener", + listener.name.clone(), + )); + span.set_attribute(opentelemetry::KeyValue::new( + "selection.agent_count", + selected_agents.len() as i64, + )); + span.set_attribute(opentelemetry::KeyValue::new( "selection.agents", selected_agents .iter() .map(|a| a.id.as_str()) .collect::>() .join(","), - ) - .with_attribute( - "duration_ms", - format!("{:.2}", selection_elapsed.as_secs_f64() * 1000.0), - ); + )); + span.set_attribute(opentelemetry::KeyValue::new( + "selection.determination_ms", + format!("{:.2}", selection_elapsed_ms), + )); + }); - if !trace_id.is_empty() { - selection_span_builder = selection_span_builder.with_trace_id(trace_id.clone()); - } - if let Some(parent_id) = parent_span_id.clone() { - selection_span_builder = selection_span_builder.with_parent_span_id(parent_id); - } - - let selection_span = selection_span_builder.build(); - trace_collector.record_span(operation_component::ORCHESTRATOR, selection_span); - - info!("Selected {} agent(s) for execution", selected_agents.len()); + info!( + count = selected_agents.len(), + "selected agents for execution" + ); // Execute agents sequentially, passing output from one to the next let mut current_messages = message.clone(); let agent_count = selected_agents.len(); for (agent_index, selected_agent) in selected_agents.iter().enumerate() { + // Get agent name + let agent_name = selected_agent.id.clone(); let is_last_agent = agent_index == agent_count - 1; debug!( - "Processing agent {}/{}: {}", - agent_index + 1, - agent_count, - selected_agent.id + agent_index = agent_index + 1, + total = agent_count, + agent = %agent_name, + "processing agent" ); - // Record the start time for agent span - let agent_start_time = SystemTime::now(); - let agent_start_instant = Instant::now(); - let span_id = generate_random_span_id(); - - // Get agent name - let agent_name = selected_agent.id.clone(); - // Process the filter chain let chat_history = pipeline_processor .process_filter_chain( @@ -311,88 +305,71 @@ async fn handle_agent_chat( selected_agent, &agent_map, &request_headers, - Some(&trace_collector), - trace_id.clone(), - span_id.clone(), ) .await?; // Get agent details and invoke let agent = agent_map.get(&agent_name).unwrap(); - debug!("Invoking agent: {}", agent_name); + debug!(agent = %agent_name, "invoking agent"); - let llm_response = pipeline_processor - .invoke_agent( - &chat_history, - client_request.clone(), - agent, - &request_headers, - trace_id.clone(), - span_id.clone(), - ) - .await?; + let agent_span = info_span!( + "agent", + agent_id = %agent_name, + message_count = chat_history.len(), + ); - // Record agent span - let agent_end_time = SystemTime::now(); - let agent_elapsed = agent_start_instant.elapsed(); - let full_path = format!("/agents{}", request_path); - let operation_name = OperationNameBuilder::new() - .with_method("POST") - .with_path(&full_path) - .with_target(&agent_name) - .build(); + let llm_response = async { + set_service_name(operation_component::AGENT); + get_active_span(|span| { + span.update_name(format!("{} /v1/chat/completions", agent_name)); + }); - let mut span_builder = SpanBuilder::new(&operation_name) - .with_span_id(span_id) - .with_kind(SpanKind::Internal) - .with_start_time(agent_start_time) - .with_end_time(agent_end_time) - .with_attribute(http::METHOD, "POST") - .with_attribute(http::TARGET, full_path) - .with_attribute("agent.name", agent_name.clone()) - .with_attribute( - "agent.sequence", - format!("{}/{}", agent_index + 1, agent_count), - ) - .with_attribute( - "duration_ms", - format!("{:.2}", agent_elapsed.as_secs_f64() * 1000.0), - ); - - if !trace_id.is_empty() { - span_builder = span_builder.with_trace_id(trace_id.clone()); + pipeline_processor + .invoke_agent( + &chat_history, + client_request.clone(), + agent, + &request_headers, + ) + .await } - if let Some(parent_id) = parent_span_id.clone() { - span_builder = span_builder.with_parent_span_id(parent_id); - } - - let span = span_builder.build(); - trace_collector.record_span(operation_component::AGENT, span); + .instrument(agent_span.clone()) + .await?; // If this is the last agent, return the streaming response if is_last_agent { info!( - "Completed agent chain, returning response from last agent: {}", - agent_name + agent = %agent_name, + "completed agent chain, returning response" ); - return response_handler - .create_streaming_response(llm_response) - .await - .map_err(AgentFilterChainError::from); + // Capture the orchestrator span (parent of the agent span) so it + // stays open for the full streaming duration alongside the agent span. + let orchestrator_span = tracing::Span::current(); + return async { + response_handler + .create_streaming_response( + llm_response, + tracing::Span::current(), // agent span (inner) + orchestrator_span, // orchestrator span (outer) + ) + .await + .map_err(AgentFilterChainError::from) + } + .instrument(agent_span) + .await; } // For intermediate agents, collect the full response and pass to next agent - debug!( - "Collecting response from intermediate agent: {}", - agent_name - ); - let response_text = response_handler.collect_full_response(llm_response).await?; + debug!(agent = %agent_name, "collecting response from intermediate agent"); + let response_text = async { response_handler.collect_full_response(llm_response).await } + .instrument(agent_span) + .await?; info!( - "Agent {} completed, passing {} character response to next agent", - agent_name, - response_text.len() + agent = %agent_name, + response_len = response_text.len(), + "agent completed, passing response to next agent" ); // remove last message and add new one at the end diff --git a/crates/brightstaff/src/handlers/agent_selector.rs b/crates/brightstaff/src/handlers/agent_selector.rs index 727dc6c0..faa734ee 100644 --- a/crates/brightstaff/src/handlers/agent_selector.rs +++ b/crates/brightstaff/src/handlers/agent_selector.rs @@ -75,7 +75,7 @@ impl AgentSelector { .cloned() .or_else(|| { warn!( - "No default agent found, routing request to first agent: {}", + "no default agent found, routing request to first agent: {}", agents[0].id ); Some(agents[0].clone()) @@ -108,7 +108,6 @@ impl AgentSelector { &self, messages: &[Message], listener: &Listener, - trace_parent: Option, request_id: Option, ) -> Result, AgentSelectionError> { let agents = listener @@ -118,7 +117,7 @@ impl AgentSelector { // If only one agent, skip orchestration if agents.len() == 1 { - debug!("Only one agent available, skipping orchestration"); + debug!("only one agent available, skipping orchestration"); return Ok(vec![agents[0].clone()]); } @@ -132,15 +131,15 @@ impl AgentSelector { match self .orchestrator_service - .determine_orchestration(messages, trace_parent, Some(usage_preferences), request_id) + .determine_orchestration(messages, Some(usage_preferences), request_id) .await { Ok(Some(routes)) => { - debug!("Determined {} agent(s) via orchestration", routes.len()); + debug!(count = routes.len(), "determined agents via orchestration"); let mut selected_agents = Vec::new(); for (route_name, agent_name) in routes { - debug!("Processing route: {}, agent: {}", route_name, agent_name); + debug!(route = %route_name, agent = %agent_name, "processing route"); let selected_agent = agents .iter() .find(|a| a.id == agent_name) @@ -155,14 +154,14 @@ impl AgentSelector { } if selected_agents.is_empty() { - debug!("No agents determined using orchestration, using default agent"); + debug!("no agents determined via orchestration, using default"); Ok(vec![self.get_default_agent(agents, &listener.name)?]) } else { Ok(selected_agents) } } Ok(None) => { - debug!("No agents determined using orchestration, using default agent"); + debug!("no agents determined using orchestration, using default agent"); Ok(vec![self.get_default_agent(agents, &listener.name)?]) } Err(err) => Err(AgentSelectionError::OrchestrationError(err.to_string())), diff --git a/crates/brightstaff/src/handlers/function_calling.rs b/crates/brightstaff/src/handlers/function_calling.rs index 7ba15e2d..0d6c6d3c 100644 --- a/crates/brightstaff/src/handlers/function_calling.rs +++ b/crates/brightstaff/src/handlers/function_calling.rs @@ -944,7 +944,7 @@ impl ArchFunctionHandler { ) -> Result { use tracing::{error, info}; - info!("[Arch-Function] - ChatCompletion"); + info!("processing chat completion request"); let messages = self.process_messages( &request.messages, @@ -955,9 +955,9 @@ impl ArchFunctionHandler { )?; info!( - "[request to arch-fc]: model: {}, messages count: {}", - self.model_name, - messages.len() + model = %self.model_name, + message_count = messages.len(), + "sending request to arch-fc" ); let use_agent_orchestrator = request @@ -991,7 +991,7 @@ impl ArchFunctionHandler { } } } - info!("[Agent Orchestrator]: response received"); + info!("agent orchestrator response received"); } else if let Some(tools) = request.tools.as_ref() { let mut hallucination_state = HallucinationState::new(tools); let mut has_tool_calls = None; @@ -1040,7 +1040,10 @@ impl ArchFunctionHandler { } if has_tool_calls == Some(true) && has_hallucination { - info!("[Hallucination]: {}", hallucination_state.error_message); + info!( + "detected hallucination: {}", + hallucination_state.error_message + ); let clarify_messages = self.prefill_message(messages.clone(), &self.clarify_prefix); let clarify_request = self.create_request_with_extra_body(clarify_messages, false); @@ -1075,8 +1078,8 @@ impl ArchFunctionHandler { let response_dict = self.parse_model_response(&model_response); info!( - "[arch-fc]: raw model response: {}", - response_dict.raw_response + raw_response = %response_dict.raw_response, + "arch-fc model response" ); // General model response (no intent matched - should route to default target) @@ -1126,7 +1129,7 @@ impl ArchFunctionHandler { if verification.is_valid { info!( - "[Tool calls]: {:?}", + "tool calls extracted: {:?}", response_dict .tool_calls .iter() @@ -1143,7 +1146,7 @@ impl ArchFunctionHandler { tool_calls: Some(response_dict.tool_calls.clone()), } } else { - error!("Invalid tool call - {}", verification.error_message); + error!(error = %verification.error_message, "invalid tool call"); ResponseMessage { role: Role::Assistant, content: Some(String::new()), @@ -1155,7 +1158,7 @@ impl ArchFunctionHandler { } } } else { - error!("Tool calls present but no tools provided in request"); + error!("tool calls present but no tools provided in request"); ResponseMessage { role: Role::Assistant, content: Some(String::new()), @@ -1168,7 +1171,7 @@ impl ArchFunctionHandler { } } else { info!( - "[Tool calls]: {:?}", + "tool calls extracted: {:?}", response_dict .tool_calls .iter() @@ -1187,8 +1190,8 @@ impl ArchFunctionHandler { } } else { error!( - "Invalid tool calls in response: {}", - response_dict.error_message + error = %response_dict.error_message, + "invalid tool calls in response" ); ResponseMessage { role: Role::Assistant, @@ -1201,7 +1204,7 @@ impl ArchFunctionHandler { } } } else { - error!("Invalid model response - {}", model_response); + error!(response = %model_response, "invalid model response"); ResponseMessage { role: Role::Assistant, content: Some(String::new()), @@ -1244,7 +1247,7 @@ impl ArchFunctionHandler { metadata: Some(metadata), }; - info!("[response arch-fc]: {:?}", chat_completion_response); + info!(response = ?chat_completion_response, "arch-fc response"); Ok(chat_completion_response) } @@ -1331,7 +1334,7 @@ pub async fn function_calling_chat_handler( let mut body_json: Value = match serde_json::from_slice(&whole_body) { Ok(json) => json, Err(e) => { - error!("Failed to parse request body as JSON: {}", e); + error!(error = %e, "failed to parse request body as json"); let mut response = Response::new(full( serde_json::json!({ "error": format!("Invalid request body: {}", e) @@ -1355,13 +1358,13 @@ pub async fn function_calling_chat_handler( let chat_request: ChatCompletionsRequest = match serde_json::from_value(body_json) { Ok(req) => { info!( - "[request body]: {}", - serde_json::to_string(&req).unwrap_or_default() + request_body = %serde_json::to_string(&req).unwrap_or_default(), + "received request" ); req } Err(e) => { - error!("Failed to parse request body: {}", e); + error!(error = %e, "failed to parse request body"); let mut response = Response::new(full( serde_json::json!({ "error": format!("Invalid request body: {}", e) @@ -1384,7 +1387,10 @@ pub async fn function_calling_chat_handler( .and_then(|v| v.as_bool()) .unwrap_or(false); - info!("Use agent orchestrator: {}", use_agent_orchestrator); + info!( + use_agent_orchestrator = use_agent_orchestrator, + "handler mode" + ); // Create the appropriate handler let handler_name = if use_agent_orchestrator { @@ -1415,7 +1421,7 @@ pub async fn function_calling_chat_handler( match final_response { Ok(response_data) => { let response_json = serde_json::to_string(&response_data).unwrap_or_else(|e| { - error!("Failed to serialize response: {}", e); + error!(error = %e, "failed to serialize response"); serde_json::json!({"error": "Failed to serialize response"}).to_string() }); @@ -1428,7 +1434,7 @@ pub async fn function_calling_chat_handler( Ok(response) } Err(e) => { - error!("[{}] - Error in function calling: {}", handler_name, e); + error!(handler = handler_name, error = %e, "error in function calling"); let error_response = serde_json::json!({ "error": format!("[{}] - Error in function calling: {}", handler_name, e) diff --git a/crates/brightstaff/src/handlers/integration_tests.rs b/crates/brightstaff/src/handlers/integration_tests.rs index 9239f94a..70eaacd7 100644 --- a/crates/brightstaff/src/handlers/integration_tests.rs +++ b/crates/brightstaff/src/handlers/integration_tests.rs @@ -112,15 +112,7 @@ mod tests { let headers = HeaderMap::new(); let result = pipeline_processor - .process_filter_chain( - &request.messages, - &test_pipeline, - &agent_map, - &headers, - None, - String::new(), - String::new(), - ) + .process_filter_chain(&request.messages, &test_pipeline, &agent_map, &headers) .await; println!("Pipeline processing result: {:?}", result); diff --git a/crates/brightstaff/src/handlers/llm.rs b/crates/brightstaff/src/handlers/llm.rs index e1fe5a93..b9fe0ba3 100644 --- a/crates/brightstaff/src/handlers/llm.rs +++ b/crates/brightstaff/src/handlers/llm.rs @@ -4,7 +4,6 @@ use common::consts::{ ARCH_IS_STREAMING_HEADER, ARCH_PROVIDER_HINT_HEADER, REQUEST_ID_HEADER, TRACE_PARENT_HEADER, }; use common::llm_providers::LlmProviders; -use common::traces::TraceCollector; use hermesllm::apis::openai_responses::InputParam; use hermesllm::clients::{SupportedAPIsFromClient, SupportedUpstreamAPIs}; use hermesllm::{ProviderRequest, ProviderRequestType}; @@ -12,10 +11,13 @@ use http_body_util::combinators::BoxBody; use http_body_util::{BodyExt, Full}; use hyper::header::{self}; use hyper::{Request, Response, StatusCode}; +use opentelemetry::global; +use opentelemetry::trace::get_active_span; +use opentelemetry_http::HeaderInjector; use std::collections::HashMap; use std::sync::Arc; use tokio::sync::RwLock; -use tracing::{debug, info, warn}; +use tracing::{debug, info, info_span, warn, Instrument}; use crate::handlers::router_chat::router_chat_get_upstream_model; use crate::handlers::utils::{ @@ -26,7 +28,7 @@ use crate::state::response_state_processor::ResponsesStateProcessor; use crate::state::{ extract_input_items, retrieve_and_combine_input, StateStorage, StateStorageError, }; -use crate::tracing::operation_component; +use crate::tracing::{operation_component, set_service_name}; fn full>(chunk: T) -> BoxBody { Full::new(chunk.into()) @@ -40,7 +42,6 @@ pub async fn llm_chat( full_qualified_llm_provider_url: String, model_aliases: Arc>>, llm_providers: Arc>, - trace_collector: Arc, state_storage: Option>, ) -> Result>, hyper::Error> { let request_path = request.uri().path().to_string(); @@ -51,16 +52,49 @@ pub async fn llm_chat( .map(|s| s.to_string()) { Some(id) => id, - None => { - let generated_id = uuid::Uuid::new_v4().to_string(); - warn!( - "[PLANO_REQ_ID:{}] | REQUEST_ID header missing, generated new ID", - generated_id - ); - generated_id - } + None => uuid::Uuid::new_v4().to_string(), }; + // Create a span with request_id that will be included in all log lines + let request_span = info_span!( + "llm", + component = "llm", + request_id = %request_id, + http.method = %request.method(), + http.path = %request_path, + ); + + // Execute the rest of the handler inside the span + llm_chat_inner( + request, + router_service, + full_qualified_llm_provider_url, + model_aliases, + llm_providers, + state_storage, + request_id, + request_path, + request_headers, + ) + .instrument(request_span) + .await +} + +#[allow(clippy::too_many_arguments)] +async fn llm_chat_inner( + request: Request, + router_service: Arc, + full_qualified_llm_provider_url: String, + model_aliases: Arc>>, + llm_providers: Arc>, + state_storage: Option>, + request_id: String, + request_path: String, + mut request_headers: hyper::HeaderMap, +) -> Result>, hyper::Error> { + // Set service name for LLM operations + set_service_name(operation_component::LLM); + // Extract or generate traceparent - this establishes the trace context for all spans let traceparent: String = match request_headers .get(TRACE_PARENT_HEADER) @@ -73,20 +107,18 @@ pub async fn llm_chat( let trace_id = Uuid::new_v4().to_string().replace("-", ""); let generated_tp = format!("00-{}-0000000000000000-01", trace_id); warn!( - "[PLANO_REQ_ID:{}] | TRACE_PARENT header missing, generated new traceparent: {}", - request_id, generated_tp + generated_traceparent = %generated_tp, + "TRACE_PARENT header missing, generated new traceparent" ); generated_tp } }; - let mut request_headers = request_headers; let chat_request_bytes = request.collect().await?.to_bytes(); debug!( - "[PLANO_REQ_ID:{}] | REQUEST_BODY (UTF8): {}", - request_id, - String::from_utf8_lossy(&chat_request_bytes) + body = %String::from_utf8_lossy(&chat_request_bytes), + "request body received" ); let mut client_request = match ProviderRequestType::try_from(( @@ -96,13 +128,10 @@ pub async fn llm_chat( Ok(request) => request, Err(err) => { warn!( - "[PLANO_REQ_ID:{}] | FAILURE | Failed to parse request as ProviderRequestType: {}", - request_id, err - ); - let err_msg = format!( - "[PLANO_REQ_ID:{}] | FAILURE | Failed to parse request: {}", - request_id, err + error = %err, + "failed to parse request as ProviderRequestType" ); + let err_msg = format!("Failed to parse request: {}", err); let mut bad_request = Response::new(full(err_msg)); *bad_request.status_mut() = StatusCode::BAD_REQUEST; return Ok(bad_request); @@ -120,18 +149,23 @@ pub async fn llm_chat( // Model alias resolution: update model field in client_request immediately // This ensures all downstream objects use the resolved model let model_from_request = client_request.model().to_string(); - let temperature = client_request.get_temperature(); + let _temperature = client_request.get_temperature(); let is_streaming_request = client_request.is_streaming(); - let resolved_model = resolve_model_alias(&model_from_request, &model_aliases); + let alias_resolved_model = resolve_model_alias(&model_from_request, &model_aliases); // Validate that the requested model exists in configuration // This matches the validation in llm_gateway routing.rs - if llm_providers.read().await.get(&resolved_model).is_none() { + if llm_providers + .read() + .await + .get(&alias_resolved_model) + .is_none() + { let err_msg = format!( "Model '{}' not found in configured providers", - resolved_model + alias_resolved_model ); - warn!("[PLANO_REQ_ID:{}] | FAILURE | {}", request_id, err_msg); + warn!(model = %alias_resolved_model, "model not found in configured providers"); let mut bad_request = Response::new(full(err_msg)); *bad_request.status_mut() = StatusCode::BAD_REQUEST; return Ok(bad_request); @@ -139,29 +173,26 @@ pub async fn llm_chat( // Handle provider/model slug format (e.g., "openai/gpt-4") // Extract just the model name for upstream (providers don't understand the slug) - let model_name_only = if let Some((_, model)) = resolved_model.split_once('/') { + let model_name_only = if let Some((_, model)) = alias_resolved_model.split_once('/') { model.to_string() } else { - resolved_model.clone() + alias_resolved_model.clone() }; // Extract tool names and user message preview for span attributes - let tool_names = client_request.get_tool_names(); - let user_message_preview = client_request + let _tool_names = client_request.get_tool_names(); + let _user_message_preview = client_request .get_recent_user_message() .map(|msg| truncate_message(&msg, 50)); // Extract messages for signal analysis (clone before moving client_request) - let messages_for_signals = client_request.get_messages(); + let messages_for_signals = Some(client_request.get_messages()); // Set the model to just the model name (without provider prefix) // This ensures upstream receives "gpt-4" not "openai/gpt-4" client_request.set_model(model_name_only.clone()); if client_request.remove_metadata_key("archgw_preference_config") { - debug!( - "[PLANO_REQ_ID:{}] Removed archgw_preference_config from metadata", - request_id - ); + debug!("removed archgw_preference_config from metadata"); } // === v1/responses state management: Determine upstream API and combine input if needed === @@ -180,9 +211,9 @@ pub async fn llm_chat( // Get the upstream path and check if it's ResponsesAPI let upstream_path = get_upstream_path( &llm_providers, - &resolved_model, + &alias_resolved_model, &request_path, - &resolved_model, + &alias_resolved_model, is_streaming_request, ) .await; @@ -209,14 +240,17 @@ pub async fn llm_chat( // Update both the request and original_input_items responses_req.input = InputParam::Items(combined_input.clone()); original_input_items = combined_input; - info!("[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Updated request with conversation history ({} items)", request_id, original_input_items.len()); + info!( + items = original_input_items.len(), + "updated request with conversation history" + ); } Err(StateStorageError::NotFound(_)) => { // Return 409 Conflict when previous_response_id not found - warn!("[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Previous response_id not found: {}", request_id, prev_resp_id); + warn!(previous_response_id = %prev_resp_id, "previous response_id not found"); let err_msg = format!( - "[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Conversation state not found for previous_response_id: {}", - request_id, prev_resp_id + "Conversation state not found for previous_response_id: {}", + prev_resp_id ); let mut conflict_response = Response::new(full(err_msg)); *conflict_response.status_mut() = StatusCode::CONFLICT; @@ -225,8 +259,9 @@ pub async fn llm_chat( Err(e) => { // Log warning but continue on other storage errors warn!( - "[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Failed to retrieve conversation state for {}: {}", - request_id, prev_resp_id, e + previous_response_id = %prev_resp_id, + error = %e, + "failed to retrieve conversation state" ); // Restore original_input_items since we passed ownership original_input_items = extract_input_items(&responses_req.input); @@ -234,10 +269,7 @@ pub async fn llm_chat( } } } else { - debug!( - "[PLANO_REQ_ID:{}] | BRIGHT_STAFF | Upstream supports ResponsesAPI natively.", - request_id - ); + debug!("upstream supports ResponsesAPI natively"); } } } @@ -246,14 +278,29 @@ pub async fn llm_chat( let client_request_bytes_for_upstream = ProviderRequestType::to_bytes(&client_request).unwrap(); // Determine routing using the dedicated router_chat module - let routing_result = match router_chat_get_upstream_model( - router_service, - client_request, // Pass the original request - router_chat will convert it - trace_collector.clone(), - &traceparent, - &request_path, - &request_id, - ) + // This gets its own span for latency and error tracking + let routing_span = info_span!( + "routing", + component = "routing", + http.method = "POST", + http.target = %request_path, + model.requested = %model_from_request, + model.alias_resolved = %alias_resolved_model, + route.selected_model = tracing::field::Empty, + routing.determination_ms = tracing::field::Empty, + ); + let routing_result = match async { + set_service_name(operation_component::ROUTING); + router_chat_get_upstream_model( + router_service, + client_request, // Pass the original request - router_chat will convert it + &traceparent, + &request_path, + &request_id, + ) + .await + } + .instrument(routing_span) .await { Ok(result) => result, @@ -267,22 +314,36 @@ pub async fn llm_chat( // Determine final model to use // Router returns "none" as a sentinel value when it doesn't select a specific model let router_selected_model = routing_result.model_name; - let model_name = if router_selected_model != "none" { + let resolved_model = if router_selected_model != "none" { // Router selected a specific model via routing preferences router_selected_model } else { // Router returned "none" sentinel, use validated resolved_model from request - resolved_model.clone() + alias_resolved_model.clone() }; + let span_name = if model_from_request == resolved_model { + format!("POST {} {}", request_path, resolved_model) + } else { + format!( + "POST {} {} -> {}", + request_path, model_from_request, resolved_model + ) + }; + get_active_span(|span| { + span.update_name(span_name.clone()); + }); + debug!( - "[PLANO_REQ_ID:{}] | ARCH_ROUTER URL | {}, Provider Hint: {}, Model for upstream: {}", - request_id, full_qualified_llm_provider_url, model_name, model_name_only + url = %full_qualified_llm_provider_url, + provider_hint = %resolved_model, + upstream_model = %model_name_only, + "Routing to upstream" ); request_headers.insert( ARCH_PROVIDER_HINT_HEADER, - header::HeaderValue::from_str(&model_name).unwrap(), + header::HeaderValue::from_str(&resolved_model).unwrap(), ); request_headers.insert( @@ -292,12 +353,18 @@ pub async fn llm_chat( // remove content-length header if it exists request_headers.remove(header::CONTENT_LENGTH); + // Inject current LLM span's trace context so upstream spans are children of plano(llm) + global::get_text_map_propagator(|propagator| { + let cx = tracing_opentelemetry::OpenTelemetrySpanExt::context(&tracing::Span::current()); + propagator.inject_context(&cx, &mut HeaderInjector(&mut request_headers)); + }); + // Capture start time right before sending request to upstream let request_start_time = std::time::Instant::now(); - let request_start_system_time = std::time::SystemTime::now(); + let _request_start_system_time = std::time::SystemTime::now(); let llm_response = match reqwest::Client::new() - .post(full_qualified_llm_provider_url) + .post(&full_qualified_llm_provider_url) .headers(request_headers) .body(client_request_bytes_for_upstream) .send() @@ -324,29 +391,12 @@ pub async fn llm_chat( // Build LLM span with actual status code using constants let byte_stream = llm_response.bytes_stream(); - // Build the LLM span (will be finalized after streaming completes) - let llm_span = build_llm_span( - &traceparent, - &request_path, - &resolved_model, - &model_name, - upstream_status.as_u16(), - is_streaming_request, - request_start_system_time, - tool_names, - user_message_preview, - temperature, - &llm_providers, - ) - .await; - // Create base processor for metrics and tracing let base_processor = ObservableStreamProcessor::new( - trace_collector, operation_component::LLM, - llm_span, + span_name, request_start_time, - Some(messages_for_signals), + messages_for_signals, ); // === v1/responses state management: Wrap with ResponsesStateProcessor === @@ -367,8 +417,8 @@ pub async fn llm_chat( base_processor, state_store, original_input_items, + alias_resolved_model.clone(), resolved_model.clone(), - model_name.clone(), is_streaming_request, false, // Not OpenAI upstream since should_manage_state is true content_encoding, @@ -409,88 +459,6 @@ fn resolve_model_alias( model_from_request.to_string() } -/// Builds the LLM span with all required and optional attributes. -#[allow(clippy::too_many_arguments)] -async fn build_llm_span( - traceparent: &str, - request_path: &str, - resolved_model: &str, - model_name: &str, - status_code: u16, - is_streaming: bool, - start_time: std::time::SystemTime, - tool_names: Option>, - user_message_preview: Option, - temperature: Option, - llm_providers: &Arc>, -) -> common::traces::Span { - use crate::tracing::{http, llm, OperationNameBuilder}; - use common::traces::{parse_traceparent, SpanBuilder, SpanKind}; - - // Calculate the upstream path based on provider configuration - let upstream_path = get_upstream_path( - llm_providers, - model_name, - request_path, - resolved_model, - is_streaming, - ) - .await; - - // Build operation name showing path transformation if different - let operation_name = if request_path != upstream_path { - OperationNameBuilder::new() - .with_method("POST") - .with_path(format!("{} >> {}", request_path, upstream_path)) - .with_target(resolved_model) - .build() - } else { - OperationNameBuilder::new() - .with_method("POST") - .with_path(request_path) - .with_target(resolved_model) - .build() - }; - - let (trace_id, parent_span_id) = parse_traceparent(traceparent); - - let mut span_builder = SpanBuilder::new(&operation_name) - .with_trace_id(&trace_id) - .with_kind(SpanKind::Client) - .with_start_time(start_time) - .with_attribute(http::METHOD, "POST") - .with_attribute(http::STATUS_CODE, status_code.to_string()) - .with_attribute(http::TARGET, request_path.to_string()) - .with_attribute(http::UPSTREAM_TARGET, upstream_path) - .with_attribute(llm::MODEL_NAME, resolved_model.to_string()) - .with_attribute(llm::IS_STREAMING, is_streaming.to_string()); - - // Only set parent span ID if it exists (not a root span) - if let Some(parent) = parent_span_id { - span_builder = span_builder.with_parent_span_id(&parent); - } - - // Add optional attributes - if let Some(temp) = temperature { - span_builder = span_builder.with_attribute(llm::TEMPERATURE, temp.to_string()); - } - - if let Some(tools) = tool_names { - let formatted_tools = tools - .iter() - .map(|name| format!("{}(...)", name)) - .collect::>() - .join("\n"); - span_builder = span_builder.with_attribute(llm::TOOLS, formatted_tools); - } - - if let Some(preview) = user_message_preview { - span_builder = span_builder.with_attribute(llm::USER_MESSAGE_PREVIEW, preview); - } - - span_builder.build() -} - /// Calculates the upstream path for the provider based on the model name. /// Looks up provider configuration, gets the ProviderId and base_url_path_prefix, /// then uses target_endpoint_for_provider to calculate the correct upstream path. diff --git a/crates/brightstaff/src/handlers/pipeline_processor.rs b/crates/brightstaff/src/handlers/pipeline_processor.rs index bc36de01..d9e87095 100644 --- a/crates/brightstaff/src/handlers/pipeline_processor.rs +++ b/crates/brightstaff/src/handlers/pipeline_processor.rs @@ -4,20 +4,18 @@ use common::configuration::{Agent, AgentFilterChain}; use common::consts::{ ARCH_UPSTREAM_HOST_HEADER, BRIGHT_STAFF_SERVICE_NAME, ENVOY_RETRY_HEADER, TRACE_PARENT_HEADER, }; -use common::traces::{generate_random_span_id, SpanBuilder, SpanKind}; use hermesllm::apis::openai::Message; use hermesllm::{ProviderRequest, ProviderRequestType}; use hyper::header::HeaderMap; -use std::time::{Instant, SystemTime}; -use tracing::{debug, info, warn}; - -use crate::tracing::operation_component::{self}; -use crate::tracing::{http, OperationNameBuilder}; +use opentelemetry::global; +use opentelemetry_http::HeaderInjector; +use tracing::{debug, info, instrument, warn}; use crate::handlers::jsonrpc::{ JsonRpcId, JsonRpcNotification, JsonRpcRequest, JsonRpcResponse, JSON_RPC_VERSION, MCP_INITIALIZE, MCP_INITIALIZE_NOTIFICATION, TOOL_CALL_METHOD, }; +use crate::tracing::{operation_component, set_service_name}; use uuid::Uuid; /// Errors that can occur during pipeline processing @@ -81,115 +79,14 @@ impl PipelineProcessor { } } - /// Record a span for filter execution - #[allow(clippy::too_many_arguments)] - fn record_filter_span( - &self, - collector: &std::sync::Arc, - agent_name: &str, - tool_name: &str, - start_time: SystemTime, - end_time: SystemTime, - elapsed: std::time::Duration, - trace_id: String, - parent_span_id: String, - span_id: String, - ) -> String { - // let (trace_id, parent_span_id) = self.extract_trace_context(); - - // Build operation name: POST /agents/* {filter_name} - // Using generic path since we don't have access to specific endpoint here - let operation_name = OperationNameBuilder::new() - .with_method("POST") - .with_path("/agents/*") - .with_target(agent_name) - .build(); - - let mut span_builder = SpanBuilder::new(&operation_name) - .with_span_id(span_id.clone()) - .with_kind(SpanKind::Client) - .with_start_time(start_time) - .with_end_time(end_time) - .with_attribute(http::METHOD, "POST") - .with_attribute(http::TARGET, "/agents/*") - .with_attribute("filter.name", agent_name.to_string()) - .with_attribute("filter.tool_name", tool_name.to_string()) - .with_attribute( - "duration_ms", - format!("{:.2}", elapsed.as_secs_f64() * 1000.0), - ); - - if !trace_id.is_empty() { - span_builder = span_builder.with_trace_id(trace_id); - } - if !parent_span_id.is_empty() { - span_builder = span_builder.with_parent_span_id(parent_span_id); - } - - let span = span_builder.build(); - // Use plano(filter) as service name for filter execution spans - collector.record_span(operation_component::AGENT_FILTER, span); - span_id.clone() - } - - /// Record a span for MCP protocol interactions - #[allow(clippy::too_many_arguments)] - fn record_agent_filter_span( - &self, - collector: &std::sync::Arc, - operation: &str, - agent_id: &str, - start_time: SystemTime, - end_time: SystemTime, - elapsed: std::time::Duration, - additional_attrs: Option>, - trace_id: String, - parent_span_id: String, - span_id: Option, - ) { - // let (trace_id, parent_span_id) = self.extract_trace_context(); - - // Build operation name: POST /mcp {agent_id} - let operation_name = OperationNameBuilder::new() - .with_method("POST") - .with_path("/mcp") - .with_operation(operation) - .with_target(agent_id) - .build(); - - let mut span_builder = SpanBuilder::new(&operation_name) - .with_span_id(span_id.unwrap_or_else(generate_random_span_id)) - .with_kind(SpanKind::Client) - .with_start_time(start_time) - .with_end_time(end_time) - .with_attribute(http::METHOD, "POST") - .with_attribute(http::TARGET, format!("/mcp ({})", operation)) - .with_attribute("mcp.operation", operation.to_string()) - .with_attribute("mcp.agent_id", agent_id.to_string()) - .with_attribute( - "duration_ms", - format!("{:.2}", elapsed.as_secs_f64() * 1000.0), - ); - - if let Some(attrs) = additional_attrs { - for (key, value) in attrs { - span_builder = span_builder.with_attribute(key, value); - } - } - - if !trace_id.is_empty() { - span_builder = span_builder.with_trace_id(trace_id); - } - if !parent_span_id.is_empty() { - span_builder = span_builder.with_parent_span_id(parent_span_id); - } - - let span = span_builder.build(); - // MCP spans also use plano(filter) service name as they are part of filter operations - collector.record_span(operation_component::AGENT_FILTER, span); - } - - /// Process the filter chain of agents (all except the terminal agent) + // /// Process the filter chain of agents (all except the terminal agent) + // #[instrument( + // skip(self, chat_history, agent_filter_chain, agent_map, request_headers), + // fields( + // filter_count = agent_filter_chain.filter_chain.as_ref().map(|fc| fc.len()).unwrap_or(0), + // message_count = chat_history.len() + // ) + // )] #[allow(clippy::too_many_arguments)] pub async fn process_filter_chain( &mut self, @@ -197,9 +94,6 @@ impl PipelineProcessor { agent_filter_chain: &AgentFilterChain, agent_map: &HashMap, request_headers: &HeaderMap, - trace_collector: Option<&std::sync::Arc>, - trace_id: String, - parent_span_id: String, ) -> Result, PipelineError> { let mut chat_history_updated = chat_history.to_vec(); @@ -210,7 +104,7 @@ impl PipelineProcessor { }; for agent_name in filter_chain { - debug!("Processing filter agent: {}", agent_name); + debug!(agent = %agent_name, "processing filter agent"); let agent = agent_map .get(agent_name) @@ -219,68 +113,29 @@ impl PipelineProcessor { let tool_name = agent.tool.as_deref().unwrap_or(&agent.id); info!( - "executing filter: {}/{}, url: {}, type: {}, conversation length: {}", - agent_name, - tool_name, - agent.url, - agent.agent_type.as_deref().unwrap_or("mcp"), - chat_history.len() + agent = %agent_name, + tool = %tool_name, + url = %agent.url, + agent_type = %agent.agent_type.as_deref().unwrap_or("mcp"), + conversation_len = chat_history.len(), + "executing filter" ); - let start_time = SystemTime::now(); - let start_instant = Instant::now(); - - // Generate filter span ID before execution so MCP spans can use it as parent - let filter_span_id = generate_random_span_id(); - if agent.agent_type.as_deref().unwrap_or("mcp") == "mcp" { chat_history_updated = self - .execute_mcp_filter( - &chat_history_updated, - agent, - request_headers, - trace_collector, - trace_id.clone(), - filter_span_id.clone(), - ) + .execute_mcp_filter(&chat_history_updated, agent, request_headers) .await?; } else { chat_history_updated = self - .execute_http_filter( - &chat_history_updated, - agent, - request_headers, - trace_collector, - trace_id.clone(), - filter_span_id.clone(), - ) + .execute_http_filter(&chat_history_updated, agent, request_headers) .await?; } - let end_time = SystemTime::now(); - let elapsed = start_instant.elapsed(); - info!( - "Filter '{}' completed in {:.2}ms, updated conversation length: {}", - agent_name, - elapsed.as_secs_f64() * 1000.0, - chat_history_updated.len() + agent = %agent_name, + updated_len = chat_history_updated.len(), + "filter completed" ); - - // Record span for this filter execution - if let Some(collector) = trace_collector { - self.record_filter_span( - collector, - agent_name, - tool_name, - start_time, - end_time, - elapsed, - trace_id.clone(), - parent_span_id.clone(), - filter_span_id, - ); - } } Ok(chat_history_updated) @@ -292,18 +147,17 @@ impl PipelineProcessor { request_headers: &HeaderMap, agent_id: &str, session_id: Option<&str>, - trace_id: String, - parent_span_id: String, ) -> Result { - let trace_parent = format!("00-{}-{}-01", trace_id, parent_span_id); let mut headers = request_headers.clone(); headers.remove(hyper::header::CONTENT_LENGTH); + // Inject OpenTelemetry trace context automatically headers.remove(TRACE_PARENT_HEADER); - headers.insert( - TRACE_PARENT_HEADER, - hyper::header::HeaderValue::from_str(&trace_parent).unwrap(), - ); + global::get_text_map_propagator(|propagator| { + let cx = + tracing_opentelemetry::OpenTelemetrySpanExt::context(&tracing::Span::current()); + propagator.inject_context(&cx, &mut HeaderInjector(&mut headers)); + }); headers.insert( ARCH_UPSTREAM_HOST_HEADER, @@ -348,9 +202,9 @@ impl PipelineProcessor { // Validate SSE format: first line should be "event: message" if lines.is_empty() || lines[0] != "event: message" { warn!( - "Invalid SSE response format from agent {}: expected 'event: message' as first line, got: {:?}", - agent_id, - lines.first() + agent = %agent_id, + first_line = ?lines.first(), + "invalid SSE response format" ); return Err(PipelineError::NoContentInResponse(format!( "Invalid SSE response format from agent {}: expected 'event: message' as first line", @@ -367,9 +221,9 @@ impl PipelineProcessor { if data_lines.len() != 1 { warn!( - "Expected exactly one 'data:' line from agent {}, found {}", - agent_id, - data_lines.len() + agent = %agent_id, + found = data_lines.len(), + "expected exactly one 'data:' line" ); return Err(PipelineError::NoContentInResponse(format!( "Expected exactly one 'data:' line from agent {}, found {}", @@ -429,27 +283,34 @@ impl PipelineProcessor { } /// Send request to a specific agent and return the response content + #[instrument( + skip(self, messages, agent, request_headers), + fields( + agent_id = %agent.id, + filter_name = %agent.id, + message_count = messages.len() + ) + )] async fn execute_mcp_filter( &mut self, messages: &[Message], agent: &Agent, request_headers: &HeaderMap, - trace_collector: Option<&std::sync::Arc>, - trace_id: String, - filter_span_id: String, ) -> Result, PipelineError> { + // Set service name for this filter span + set_service_name(operation_component::AGENT_FILTER); + + // Update current span name to include filter name + use opentelemetry::trace::get_active_span; + get_active_span(|span| { + span.update_name(format!("execute_mcp_filter ({})", agent.id)); + }); + // Get or create MCP session let mcp_session_id = if let Some(session_id) = self.agent_id_session_map.get(&agent.id) { session_id.clone() } else { - let session_id = self - .get_new_session_id( - &agent.id, - trace_id.clone(), - filter_span_id.clone(), - request_headers, - ) - .await; + let session_id = self.get_new_session_id(&agent.id, request_headers).await; self.agent_id_session_map .insert(agent.id.clone(), session_id.clone()); session_id @@ -464,21 +325,9 @@ impl PipelineProcessor { let tool_name = agent.tool.as_deref().unwrap_or(&agent.id); let json_rpc_request = self.build_tool_call_request(tool_name, messages)?; - // Generate span ID for this MCP tool call (child of filter span) - let mcp_span_id = generate_random_span_id(); - // Build headers - let agent_headers = self.build_mcp_headers( - request_headers, - &agent.id, - Some(&mcp_session_id), - trace_id.clone(), - mcp_span_id.clone(), - )?; - - // Send request with tracing - let start_time = SystemTime::now(); - let start_instant = Instant::now(); + let agent_headers = + self.build_mcp_headers(request_headers, &agent.id, Some(&mcp_session_id))?; let response = self .send_mcp_request(&json_rpc_request, &agent_headers, &agent.id) @@ -486,31 +335,6 @@ impl PipelineProcessor { let http_status = response.status(); let response_bytes = response.bytes().await?; - let end_time = SystemTime::now(); - let elapsed = start_instant.elapsed(); - - // Record MCP tool call span - if let Some(collector) = trace_collector { - let mut attrs = HashMap::new(); - attrs.insert("mcp.method", "tools/call".to_string()); - attrs.insert("mcp.tool_name", tool_name.to_string()); - attrs.insert("mcp.session_id", mcp_session_id.clone()); - attrs.insert("http.status_code", http_status.as_u16().to_string()); - - self.record_agent_filter_span( - collector, - "tool_call", - &agent.id, - start_time, - end_time, - elapsed, - Some(attrs), - trace_id.clone(), - filter_span_id.clone(), - Some(mcp_span_id), - ); - } - // Handle HTTP errors if !http_status.is_success() { let error_body = String::from_utf8_lossy(&response_bytes).to_string(); @@ -611,8 +435,6 @@ impl PipelineProcessor { &self, agent_id: &str, session_id: &str, - trace_id: String, - parent_span_id: String, request_headers: &HeaderMap, ) -> Result<(), PipelineError> { let initialized_notification = JsonRpcNotification { @@ -622,15 +444,9 @@ impl PipelineProcessor { }; let notification_body = serde_json::to_string(&initialized_notification)?; - debug!("Sending initialized notification for agent {}", agent_id); + debug!("sending initialized notification for agent {}", agent_id); - let headers = self.build_mcp_headers( - request_headers, - agent_id, - Some(session_id), - trace_id.clone(), - parent_span_id.clone(), - )?; + let headers = self.build_mcp_headers(request_headers, agent_id, Some(session_id))?; let response = self .client @@ -641,31 +457,19 @@ impl PipelineProcessor { .await?; info!( - "Initialized notification response status: {}", + "initialized notification response status: {}", response.status() ); Ok(()) } - async fn get_new_session_id( - &self, - agent_id: &str, - trace_id: String, - parent_span_id: String, - request_headers: &HeaderMap, - ) -> String { - info!("Initializing MCP session for agent {}", agent_id); + async fn get_new_session_id(&self, agent_id: &str, request_headers: &HeaderMap) -> String { + info!("initializing MCP session for agent {}", agent_id); let initialize_request = self.build_initialize_request(); let headers = self - .build_mcp_headers( - request_headers, - agent_id, - None, - trace_id.clone(), - parent_span_id.clone(), - ) + .build_mcp_headers(request_headers, agent_id, None) .expect("Failed to build headers for initialization"); let response = self @@ -673,7 +477,7 @@ impl PipelineProcessor { .await .expect("Failed to initialize MCP session"); - info!("Initialize response status: {}", response.status()); + info!("initialize response status: {}", response.status()); let session_id = response .headers() @@ -683,49 +487,54 @@ impl PipelineProcessor { .to_string(); info!( - "Created new MCP session for agent {}: {}", + "created new MCP session for agent {}: {}", agent_id, session_id ); // Send initialized notification - self.send_initialized_notification( - agent_id, - &session_id, - trace_id.clone(), - parent_span_id.clone(), - &headers, - ) - .await - .expect("Failed to send initialized notification"); + self.send_initialized_notification(agent_id, &session_id, &headers) + .await + .expect("Failed to send initialized notification"); session_id } /// Execute a HTTP-based filter agent + #[instrument( + skip(self, messages, agent, request_headers), + fields( + agent_id = %agent.id, + agent_url = %agent.url, + filter_name = %agent.id, + message_count = messages.len() + ) + )] async fn execute_http_filter( &mut self, messages: &[Message], agent: &Agent, request_headers: &HeaderMap, - trace_collector: Option<&std::sync::Arc>, - trace_id: String, - filter_span_id: String, ) -> Result, PipelineError> { - let tool_name = agent.tool.as_deref().unwrap_or(&agent.id); + // Set service name for this filter span + set_service_name(operation_component::AGENT_FILTER); - // Generate span ID for this HTTP call (child of filter span) - let http_span_id = generate_random_span_id(); + // Update current span name to include filter name + use opentelemetry::trace::get_active_span; + get_active_span(|span| { + span.update_name(format!("execute_http_filter ({})", agent.id)); + }); // Build headers - let trace_parent = format!("00-{}-{}-01", trace_id, http_span_id); let mut agent_headers = request_headers.clone(); agent_headers.remove(hyper::header::CONTENT_LENGTH); + // Inject OpenTelemetry trace context automatically agent_headers.remove(TRACE_PARENT_HEADER); - agent_headers.insert( - TRACE_PARENT_HEADER, - hyper::header::HeaderValue::from_str(&trace_parent).unwrap(), - ); + global::get_text_map_propagator(|propagator| { + let cx = + tracing_opentelemetry::OpenTelemetrySpanExt::context(&tracing::Span::current()); + propagator.inject_context(&cx, &mut HeaderInjector(&mut agent_headers)); + }); agent_headers.insert( ARCH_UPSTREAM_HOST_HEADER, @@ -748,10 +557,6 @@ impl PipelineProcessor { hyper::header::HeaderValue::from_static("application/json"), ); - // Send request with tracing - let start_time = SystemTime::now(); - let start_instant = Instant::now(); - debug!( "Sending HTTP request to agent {} at URL: {}", agent.id, agent.url @@ -769,30 +574,6 @@ impl PipelineProcessor { let http_status = response.status(); let response_bytes = response.bytes().await?; - let end_time = SystemTime::now(); - let elapsed = start_instant.elapsed(); - - // Record HTTP call span - if let Some(collector) = trace_collector { - let mut attrs = HashMap::new(); - attrs.insert("http.tool_name", tool_name.to_string()); - attrs.insert("http.url", agent.url.clone()); - attrs.insert("http.status_code", http_status.as_u16().to_string()); - - self.record_agent_filter_span( - collector, - "http_call", - &agent.id, - start_time, - end_time, - elapsed, - Some(attrs), - trace_id.clone(), - filter_span_id.clone(), - Some(http_span_id), - ); - } - // Handle HTTP errors if !http_status.is_success() { let error_body = String::from_utf8_lossy(&response_bytes).to_string(); @@ -825,34 +606,34 @@ impl PipelineProcessor { } /// Send request to terminal agent and return the raw response for streaming + /// Note: The caller is responsible for creating the plano(agent) span that wraps + /// both this call and the subsequent response consumption. pub async fn invoke_agent( &self, messages: &[Message], mut original_request: ProviderRequestType, terminal_agent: &Agent, request_headers: &HeaderMap, - trace_id: String, - agent_span_id: String, ) -> Result { // let mut request = original_request.clone(); original_request.set_messages(messages); + let request_url = "/v1/chat/completions"; + let request_body = ProviderRequestType::to_bytes(&original_request).unwrap(); // let request_body = serde_json::to_string(&request)?; - debug!("Sending request to terminal agent {}", terminal_agent.id); + debug!("sending request to terminal agent {}", terminal_agent.id); let mut agent_headers = request_headers.clone(); agent_headers.remove(hyper::header::CONTENT_LENGTH); - // Set traceparent header to make the egress span a child of the agent span - if !trace_id.is_empty() && !agent_span_id.is_empty() { - let trace_parent = format!("00-{}-{}-01", trace_id, agent_span_id); - agent_headers.remove(TRACE_PARENT_HEADER); - agent_headers.insert( - TRACE_PARENT_HEADER, - hyper::header::HeaderValue::from_str(&trace_parent).unwrap(), - ); - } + // Inject OpenTelemetry trace context automatically + agent_headers.remove(TRACE_PARENT_HEADER); + global::get_text_map_propagator(|propagator| { + let cx = + tracing_opentelemetry::OpenTelemetrySpanExt::context(&tracing::Span::current()); + propagator.inject_context(&cx, &mut HeaderInjector(&mut agent_headers)); + }); agent_headers.insert( ARCH_UPSTREAM_HOST_HEADER, @@ -867,7 +648,7 @@ impl PipelineProcessor { let response = self .client - .post(format!("{}/v1/chat/completions", self.url)) + .post(format!("{}{}", self.url, request_url)) .headers(agent_headers) .body(request_body) .send() @@ -914,15 +695,7 @@ mod tests { let pipeline = create_test_pipeline(vec!["nonexistent-agent", "terminal-agent"]); let result = processor - .process_filter_chain( - &messages, - &pipeline, - &agent_map, - &request_headers, - None, - String::new(), - String::new(), - ) + .process_filter_chain(&messages, &pipeline, &agent_map, &request_headers) .await; assert!(result.is_err()); @@ -956,14 +729,7 @@ mod tests { let request_headers = HeaderMap::new(); let result = processor - .execute_mcp_filter( - &messages, - &agent, - &request_headers, - None, - "trace-123".to_string(), - "span-123".to_string(), - ) + .execute_mcp_filter(&messages, &agent, &request_headers) .await; match result { @@ -1002,14 +768,7 @@ mod tests { let request_headers = HeaderMap::new(); let result = processor - .execute_mcp_filter( - &messages, - &agent, - &request_headers, - None, - "trace-456".to_string(), - "span-456".to_string(), - ) + .execute_mcp_filter(&messages, &agent, &request_headers) .await; match result { @@ -1061,14 +820,7 @@ mod tests { let request_headers = HeaderMap::new(); let result = processor - .execute_mcp_filter( - &messages, - &agent, - &request_headers, - None, - "trace-789".to_string(), - "span-789".to_string(), - ) + .execute_mcp_filter(&messages, &agent, &request_headers) .await; match result { diff --git a/crates/brightstaff/src/handlers/response_handler.rs b/crates/brightstaff/src/handlers/response_handler.rs index d386df1e..e2561a8f 100644 --- a/crates/brightstaff/src/handlers/response_handler.rs +++ b/crates/brightstaff/src/handlers/response_handler.rs @@ -9,7 +9,7 @@ use hyper::{Response, StatusCode}; use tokio::sync::mpsc; use tokio_stream::wrappers::ReceiverStream; use tokio_stream::StreamExt; -use tracing::{info, warn}; +use tracing::{info, warn, Instrument}; /// Errors that can occur during response handling #[derive(Debug, thiserror::Error)] @@ -69,10 +69,14 @@ impl ResponseHandler { response } - /// Create a streaming response from a reqwest response + /// Create a streaming response from a reqwest response. + /// The spawned streaming task is instrumented with both `agent_span` and `orchestrator_span` + /// so their durations reflect the actual time spent streaming to the client. pub async fn create_streaming_response( &self, llm_response: reqwest::Response, + agent_span: tracing::Span, + orchestrator_span: tracing::Span, ) -> Result>, ResponseError> { // Copy headers from the original response let response_headers = llm_response.headers(); @@ -89,25 +93,30 @@ impl ResponseHandler { // Create channel for async streaming let (tx, rx) = mpsc::channel::(16); - // Spawn task to stream data - tokio::spawn(async move { - let mut byte_stream = llm_response.bytes_stream(); + // Spawn streaming task instrumented with both spans (nested) so both + // remain entered for the full streaming duration. + tokio::spawn( + async move { + let mut byte_stream = llm_response.bytes_stream(); - while let Some(item) = byte_stream.next().await { - let chunk = match item { - Ok(chunk) => chunk, - Err(err) => { - warn!("Error receiving chunk: {:?}", err); + while let Some(item) = byte_stream.next().await { + let chunk = match item { + Ok(chunk) => chunk, + Err(err) => { + warn!(error = ?err, "error receiving chunk"); + break; + } + }; + + if tx.send(chunk).await.is_err() { + warn!("receiver dropped"); break; } - }; - - if tx.send(chunk).await.is_err() { - warn!("Receiver dropped"); - break; } } - }); + .instrument(agent_span) + .instrument(orchestrator_span), + ); let stream = ReceiverStream::new(rx).map(|chunk| Ok::<_, hyper::Error>(Frame::data(chunk))); let stream_body = BoxBody::new(StreamBody::new(stream)); @@ -164,11 +173,11 @@ impl ResponseHandler { if let Some(content) = provider_response.content_delta() { accumulated_text.push_str(content); } else { - info!("No content delta in provider response"); + info!("no content delta in provider response"); } } Err(e) => { - warn!("Failed to parse provider response: {:?}", e); + warn!(error = ?e, "failed to parse provider response"); } } } @@ -248,7 +257,13 @@ mod tests { let llm_response = client.get(&(server.url() + "/test")).send().await.unwrap(); let handler = ResponseHandler::new(); - let result = handler.create_streaming_response(llm_response).await; + let result = handler + .create_streaming_response( + llm_response, + tracing::Span::current(), + tracing::Span::current(), + ) + .await; mock.assert_async().await; assert!(result.is_ok()); diff --git a/crates/brightstaff/src/handlers/router_chat.rs b/crates/brightstaff/src/handlers/router_chat.rs index c3a517e0..01c67a94 100644 --- a/crates/brightstaff/src/handlers/router_chat.rs +++ b/crates/brightstaff/src/handlers/router_chat.rs @@ -1,14 +1,12 @@ use common::configuration::ModelUsagePreference; -use common::traces::{parse_traceparent, SpanBuilder, SpanKind, TraceCollector}; use hermesllm::clients::endpoints::SupportedUpstreamAPIs; use hermesllm::{ProviderRequest, ProviderRequestType}; use hyper::StatusCode; -use std::collections::HashMap; use std::sync::Arc; use tracing::{debug, info, warn}; use crate::router::llm_router::RouterService; -use crate::tracing::{http, operation_component, routing, OperationNameBuilder}; +use crate::tracing::routing; pub struct RoutingResult { pub model_name: String, @@ -36,7 +34,6 @@ impl RoutingError { pub async fn router_chat_get_upstream_model( router_service: Arc, client_request: ProviderRequestType, - trace_collector: Arc, traceparent: &str, request_path: &str, request_id: &str, @@ -56,14 +53,14 @@ pub async fn router_chat_get_upstream_model( | ProviderRequestType::BedrockConverseStream(_) | ProviderRequestType::ResponsesAPIRequest(_), ) => { - warn!("Unexpected: got non-ChatCompletions request after converting to OpenAI format"); + warn!("unexpected: got non-ChatCompletions request after converting to OpenAI format"); return Err(RoutingError::internal_error( "Request conversion failed".to_string(), )); } Err(err) => { warn!( - "Failed to convert request to ChatCompletionsRequest: {}", + "failed to convert request to ChatCompletionsRequest: {}", err ); return Err(RoutingError::internal_error(format!( @@ -74,9 +71,8 @@ pub async fn router_chat_get_upstream_model( }; debug!( - "[PLANO_REQ_ID: {:?}]: ROUTER_REQ: {}", - request_id, - &serde_json::to_string(&chat_request).unwrap() + request = %serde_json::to_string(&chat_request).unwrap(), + "router request" ); // Extract usage preferences from metadata @@ -112,16 +108,14 @@ pub async fn router_chat_get_upstream_model( }; info!( - "[PLANO_REQ_ID: {:?}] | ROUTER_REQ | Usage preferences from request: {}, request_path: {}, latest message: {}", - request_id, - usage_preferences.is_some(), - request_path, - latest_message_for_log + has_usage_preferences = usage_preferences.is_some(), + path = %request_path, + latest_message = %latest_message_for_log, + "processing router request" ); // Capture start time for routing span let routing_start_time = std::time::Instant::now(); - let routing_start_system_time = std::time::SystemTime::now(); // Attempt to determine route using the router service let routing_result = router_service @@ -133,41 +127,21 @@ pub async fn router_chat_get_upstream_model( ) .await; + let determination_ms = routing_start_time.elapsed().as_millis() as i64; + let current_span = tracing::Span::current(); + current_span.record(routing::ROUTE_DETERMINATION_MS, determination_ms); + match routing_result { Ok(route) => match route { Some((_, model_name)) => { - // Record successful routing span - let mut attrs: HashMap = HashMap::new(); - attrs.insert("route.selected_model".to_string(), model_name.clone()); - record_routing_span( - trace_collector, - traceparent, - routing_start_time, - routing_start_system_time, - attrs, - ) - .await; - + current_span.record("route.selected_model", model_name.as_str()); Ok(RoutingResult { model_name }) } None => { // No route determined, return sentinel value "none" // This signals to llm.rs to use the original validated request model - info!( - "[PLANO_REQ_ID: {}] | ROUTER_REQ | No route determined, returning sentinel 'none'", - request_id - ); - - let mut attrs = HashMap::new(); - attrs.insert("route.selected_model".to_string(), "none".to_string()); - record_routing_span( - trace_collector, - traceparent, - routing_start_time, - routing_start_system_time, - attrs, - ) - .await; + current_span.record("route.selected_model", "none"); + info!("no route determined, using default model"); Ok(RoutingResult { model_name: "none".to_string(), @@ -175,19 +149,7 @@ pub async fn router_chat_get_upstream_model( } }, Err(err) => { - // Record failed routing span - let mut attrs = HashMap::new(); - attrs.insert("route.selected_model".to_string(), "unknown".to_string()); - attrs.insert("error.message".to_string(), err.to_string()); - record_routing_span( - trace_collector, - traceparent, - routing_start_time, - routing_start_system_time, - attrs, - ) - .await; - + current_span.record("route.selected_model", "unknown"); Err(RoutingError::internal_error(format!( "Failed to determine route: {}", err @@ -195,53 +157,3 @@ pub async fn router_chat_get_upstream_model( } } } - -/// Helper function to record a routing span with the given attributes. -/// Reduces code duplication across different routing outcomes. -async fn record_routing_span( - trace_collector: Arc, - traceparent: &str, - start_time: std::time::Instant, - start_system_time: std::time::SystemTime, - attrs: HashMap, -) { - // The routing always uses OpenAI Chat Completions format internally, - // so we log that as the actual API being used for routing - let routing_api_path = "/v1/chat/completions"; - - let routing_operation_name = OperationNameBuilder::new() - .with_method("POST") - .with_path(routing_api_path) - .with_target("Arch-Router-1.5B") - .build(); - - let (trace_id, parent_span_id) = parse_traceparent(traceparent); - - // Build the routing span directly using constants - let mut span_builder = SpanBuilder::new(&routing_operation_name) - .with_trace_id(&trace_id) - .with_kind(SpanKind::Client) - .with_start_time(start_system_time) - .with_end_time(std::time::SystemTime::now()) - .with_attribute(http::METHOD, "POST") - .with_attribute(http::TARGET, routing_api_path.to_string()) - .with_attribute( - routing::ROUTE_DETERMINATION_MS, - start_time.elapsed().as_millis().to_string(), - ); - - // Only set parent span ID if it exists (not a root span) - if let Some(parent) = parent_span_id { - span_builder = span_builder.with_parent_span_id(&parent); - } - - // Add all custom attributes - for (key, value) in attrs { - span_builder = span_builder.with_attribute(key, value); - } - - let span = span_builder.build(); - - // Record the span directly to the collector - trace_collector.record_span(operation_component::ROUTING, span); -} diff --git a/crates/brightstaff/src/handlers/utils.rs b/crates/brightstaff/src/handlers/utils.rs index f2a318a4..bff87ad4 100644 --- a/crates/brightstaff/src/handlers/utils.rs +++ b/crates/brightstaff/src/handlers/utils.rs @@ -1,18 +1,18 @@ use bytes::Bytes; -use common::traces::{Attribute, AttributeValue, Event, Span, TraceCollector}; use http_body_util::combinators::BoxBody; use http_body_util::StreamBody; use hyper::body::Frame; -use std::sync::Arc; -use std::time::{Instant, SystemTime}; +use opentelemetry::trace::TraceContextExt; +use opentelemetry::KeyValue; +use std::time::Instant; use tokio::sync::mpsc; use tokio_stream::wrappers::ReceiverStream; use tokio_stream::StreamExt; -use tracing::warn; +use tracing::{info, warn, Instrument}; +use tracing_opentelemetry::OpenTelemetrySpanExt; -// Import tracing constants and signals use crate::signals::{InteractionQuality, SignalAnalyzer, TextBasedSignalAnalyzer, FLAG_MARKER}; -use crate::tracing::{error, llm, signals as signal_constants}; +use crate::tracing::{llm, set_service_name, signals as signal_constants}; use hermesllm::apis::openai::Message; /// Trait for processing streaming chunks @@ -31,11 +31,10 @@ pub trait StreamProcessor: Send + 'static { fn on_error(&mut self, _error: &str) {} } -/// A processor that tracks streaming metrics and finalizes the span +/// A processor that tracks streaming metrics pub struct ObservableStreamProcessor { - collector: Arc, service_name: String, - span: Span, + operation_name: String, total_bytes: usize, chunk_count: usize, start_time: Instant, @@ -47,22 +46,28 @@ impl ObservableStreamProcessor { /// Create a new passthrough processor /// /// # Arguments - /// * `collector` - The trace collector to record the span to - /// * `service_name` - The service name for this span (e.g., "archgw(llm)") - /// * `span` - The span to finalize after streaming completes + /// * `service_name` - The service name for this span (e.g., "plano(llm)") + /// This will be set as the `service.name.override` attribute on the current span, + /// allowing the ServiceNameOverrideExporter to route spans to different services. + /// * `operation_name` - The current span operation name (e.g., "POST /v1/chat/completions gpt-4") + /// Used to append the flag marker when concerning signals are detected. /// * `start_time` - When the request started (for duration calculation) /// * `messages` - Optional conversation messages for signal analysis pub fn new( - collector: Arc, service_name: impl Into, - span: Span, + operation_name: impl Into, start_time: Instant, messages: Option>, ) -> Self { + let service_name = service_name.into(); + + // Set the service name override on the current span for OpenTelemetry export + // This allows the ServiceNameOverrideExporter to route this span to the correct service + set_service_name(&service_name); + Self { - collector, - service_name: service_name.into(), - span, + service_name, + operation_name: operation_name.into(), total_bytes: 0, chunk_count: 0, start_time, @@ -87,89 +92,81 @@ impl StreamProcessor for ObservableStreamProcessor { } fn on_complete(&mut self) { - // Update span with streaming metrics and end time - let end_time_nanos = SystemTime::now() - .duration_since(SystemTime::UNIX_EPOCH) - .unwrap_or_default() - .as_nanos(); - - self.span.end_time_unix_nano = format!("{}", end_time_nanos); - - // Add streaming metrics as attributes using constants - self.span.attributes.push(Attribute { - key: llm::RESPONSE_BYTES.to_string(), - value: AttributeValue { - string_value: Some(self.total_bytes.to_string()), - }, - }); - - self.span.attributes.push(Attribute { - key: llm::DURATION_MS.to_string(), - value: AttributeValue { - string_value: Some(self.start_time.elapsed().as_millis().to_string()), - }, - }); - - // Add time to first token if available (streaming only) + // Record time-to-first-token as an OTel span attribute + event (streaming only) if let Some(ttft) = self.time_to_first_token { - self.span.attributes.push(Attribute { - key: llm::TIME_TO_FIRST_TOKEN_MS.to_string(), - value: AttributeValue { - string_value: Some(ttft.to_string()), - }, - }); - - // Add time to first token as a span event - // Calculate the timestamp by adding ttft duration to span start time - if let Ok(start_time_nanos) = self.span.start_time_unix_nano.parse::() { - // Convert ttft from milliseconds to nanoseconds and add to start time - let event_timestamp = start_time_nanos + (ttft * 1_000_000); - let mut event = - Event::new(llm::TIME_TO_FIRST_TOKEN_MS.to_string(), event_timestamp); - event.add_attribute(llm::TIME_TO_FIRST_TOKEN_MS.to_string(), ttft.to_string()); - - // Initialize events vector if needed - if self.span.events.is_none() { - self.span.events = Some(Vec::new()); - } - - if let Some(ref mut events) = self.span.events { - events.push(event); - } - } + let span = tracing::Span::current(); + let otel_context = span.context(); + let otel_span = otel_context.span(); + otel_span.set_attribute(KeyValue::new(llm::TIME_TO_FIRST_TOKEN_MS, ttft as i64)); + otel_span.add_event( + llm::TIME_TO_FIRST_TOKEN_MS, + vec![KeyValue::new(llm::TIME_TO_FIRST_TOKEN_MS, ttft as i64)], + ); } - // Analyze signals if messages are available and add to span attributes + // Analyze signals if messages are available and record as span attributes if let Some(ref messages) = self.messages { let analyzer: Box = Box::new(TextBasedSignalAnalyzer::new()); let report = analyzer.analyze(messages); + // Get the current OTel span to set signal attributes + let span = tracing::Span::current(); + let otel_context = span.context(); + let otel_span = otel_context.span(); + // Add overall quality - self.span.attributes.push(Attribute { - key: signal_constants::QUALITY.to_string(), - value: AttributeValue { - string_value: Some(format!("{:?}", report.overall_quality)), - }, - }); + otel_span.set_attribute(KeyValue::new( + signal_constants::QUALITY, + format!("{:?}", report.overall_quality), + )); // Add repair/follow-up metrics if concerning if report.follow_up.is_concerning || report.follow_up.repair_count > 0 { - self.span.attributes.push(Attribute { - key: signal_constants::REPAIR_COUNT.to_string(), - value: AttributeValue { - string_value: Some(report.follow_up.repair_count.to_string()), - }, - }); - - self.span.attributes.push(Attribute { - key: signal_constants::REPAIR_RATIO.to_string(), - value: AttributeValue { - string_value: Some(format!("{:.3}", report.follow_up.repair_ratio)), - }, - }); + otel_span.set_attribute(KeyValue::new( + signal_constants::REPAIR_COUNT, + report.follow_up.repair_count as i64, + )); + otel_span.set_attribute(KeyValue::new( + signal_constants::REPAIR_RATIO, + format!("{:.3}", report.follow_up.repair_ratio), + )); } - // Add flag marker to operation name if any concerning signal is detected + // Add frustration metrics + if report.frustration.has_frustration { + otel_span.set_attribute(KeyValue::new( + signal_constants::FRUSTRATION_COUNT, + report.frustration.frustration_count as i64, + )); + otel_span.set_attribute(KeyValue::new( + signal_constants::FRUSTRATION_SEVERITY, + report.frustration.severity as i64, + )); + } + + // Add repetition metrics + if report.repetition.has_looping { + otel_span.set_attribute(KeyValue::new( + signal_constants::REPETITION_COUNT, + report.repetition.repetition_count as i64, + )); + } + + // Add escalation metrics + if report.escalation.escalation_requested { + otel_span + .set_attribute(KeyValue::new(signal_constants::ESCALATION_REQUESTED, true)); + } + + // Add positive feedback metrics + if report.positive_feedback.has_positive_feedback { + otel_span.set_attribute(KeyValue::new( + signal_constants::POSITIVE_FEEDBACK_COUNT, + report.positive_feedback.positive_count as i64, + )); + } + + // Flag the span name if any concerning signal is detected let should_flag = report.frustration.has_frustration || report.repetition.has_looping || report.escalation.escalation_requested @@ -179,94 +176,27 @@ impl StreamProcessor for ObservableStreamProcessor { ); if should_flag { - // Prepend flag marker to the operation name - self.span.name = format!("{} {}", self.span.name, FLAG_MARKER); - } - - // Add key signal metrics - if report.frustration.has_frustration { - self.span.attributes.push(Attribute { - key: signal_constants::FRUSTRATION_COUNT.to_string(), - value: AttributeValue { - string_value: Some(report.frustration.frustration_count.to_string()), - }, - }); - self.span.attributes.push(Attribute { - key: signal_constants::FRUSTRATION_SEVERITY.to_string(), - value: AttributeValue { - string_value: Some(report.frustration.severity.to_string()), - }, - }); - } - - if report.repetition.has_looping { - self.span.attributes.push(Attribute { - key: signal_constants::REPETITION_COUNT.to_string(), - value: AttributeValue { - string_value: Some(report.repetition.repetition_count.to_string()), - }, - }); - } - - if report.escalation.escalation_requested { - self.span.attributes.push(Attribute { - key: signal_constants::ESCALATION_REQUESTED.to_string(), - value: AttributeValue { - string_value: Some("true".to_string()), - }, - }); - } - - if report.positive_feedback.has_positive_feedback { - self.span.attributes.push(Attribute { - key: signal_constants::POSITIVE_FEEDBACK_COUNT.to_string(), - value: AttributeValue { - string_value: Some(report.positive_feedback.positive_count.to_string()), - }, - }); + otel_span.update_name(format!("{} {}", self.operation_name, FLAG_MARKER)); } } - // Record the finalized span - self.collector - .record_span(&self.service_name, self.span.clone()); + info!( + service = %self.service_name, + total_bytes = self.total_bytes, + chunk_count = self.chunk_count, + duration_ms = self.start_time.elapsed().as_millis(), + time_to_first_token_ms = ?self.time_to_first_token, + "streaming completed" + ); } fn on_error(&mut self, error_msg: &str) { - warn!("Stream error in PassthroughProcessor: {}", error_msg); - - // Update span with error info and end time - let end_time_nanos = SystemTime::now() - .duration_since(SystemTime::UNIX_EPOCH) - .unwrap_or_default() - .as_nanos(); - - self.span.end_time_unix_nano = format!("{}", end_time_nanos); - - self.span.attributes.push(Attribute { - key: error::ERROR.to_string(), - value: AttributeValue { - string_value: Some("true".to_string()), - }, - }); - - self.span.attributes.push(Attribute { - key: error::MESSAGE.to_string(), - value: AttributeValue { - string_value: Some(error_msg.to_string()), - }, - }); - - self.span.attributes.push(Attribute { - key: llm::DURATION_MS.to_string(), - value: AttributeValue { - string_value: Some(self.start_time.elapsed().as_millis().to_string()), - }, - }); - - // Record the error span - self.collector - .record_span(&self.service_name, self.span.clone()); + warn!( + service = %self.service_name, + error = error_msg, + duration_ms = self.start_time.elapsed().as_millis(), + "stream error" + ); } } @@ -287,49 +217,55 @@ where { let (tx, rx) = mpsc::channel::(buffer_size); + // Capture the current span so the spawned task inherits the request context + let current_span = tracing::Span::current(); + // Spawn a task to process and forward chunks - let processor_handle = tokio::spawn(async move { - let mut is_first_chunk = true; + let processor_handle = tokio::spawn( + async move { + let mut is_first_chunk = true; - while let Some(item) = byte_stream.next().await { - let chunk = match item { - Ok(chunk) => chunk, - Err(err) => { - let err_msg = format!("Error receiving chunk: {:?}", err); - warn!("{}", err_msg); - processor.on_error(&err_msg); - break; + while let Some(item) = byte_stream.next().await { + let chunk = match item { + Ok(chunk) => chunk, + Err(err) => { + let err_msg = format!("Error receiving chunk: {:?}", err); + warn!(error = %err_msg, "stream error"); + processor.on_error(&err_msg); + break; + } + }; + + // Call on_first_bytes for the first chunk + if is_first_chunk { + processor.on_first_bytes(); + is_first_chunk = false; } - }; - // Call on_first_bytes for the first chunk - if is_first_chunk { - processor.on_first_bytes(); - is_first_chunk = false; - } - - // Process the chunk - match processor.process_chunk(chunk) { - Ok(Some(processed_chunk)) => { - if tx.send(processed_chunk).await.is_err() { - warn!("Receiver dropped"); + // Process the chunk + match processor.process_chunk(chunk) { + Ok(Some(processed_chunk)) => { + if tx.send(processed_chunk).await.is_err() { + warn!("receiver dropped"); + break; + } + } + Ok(None) => { + // Skip this chunk + continue; + } + Err(err) => { + warn!("processor error: {}", err); + processor.on_error(&err); break; } } - Ok(None) => { - // Skip this chunk - continue; - } - Err(err) => { - warn!("Processor error: {}", err); - processor.on_error(&err); - break; - } } - } - processor.on_complete(); - }); + processor.on_complete(); + } + .instrument(current_span), + ); // Convert channel receiver to HTTP stream let stream = ReceiverStream::new(rx).map(|chunk| Ok::<_, hyper::Error>(Frame::data(chunk))); diff --git a/crates/brightstaff/src/main.rs b/crates/brightstaff/src/main.rs index b8fa8832..3469d8bd 100644 --- a/crates/brightstaff/src/main.rs +++ b/crates/brightstaff/src/main.rs @@ -14,7 +14,6 @@ use common::consts::{ CHAT_COMPLETIONS_PATH, MESSAGES_PATH, OPENAI_RESPONSES_API_PATH, PLANO_ORCHESTRATOR_MODEL_NAME, }; use common::llm_providers::LlmProviders; -use common::traces::TraceCollector; use http_body_util::{combinators::BoxBody, BodyExt, Empty}; use hyper::body::Incoming; use hyper::server::conn::http1; @@ -51,13 +50,12 @@ fn empty() -> BoxBody { #[tokio::main] async fn main() -> Result<(), Box> { - let _tracer_provider = init_tracer(); let bind_address = env::var("BIND_ADDRESS").unwrap_or_else(|_| BIND_ADDRESS.to_string()); - // loading arch_config.yaml file + // loading arch_config.yaml file (before tracing init so we can read tracing config) let arch_config_path = env::var("ARCH_CONFIG_PATH_RENDERED") .unwrap_or_else(|_| "./arch_config_rendered.yaml".to_string()); - info!("Loading arch_config.yaml from {}", arch_config_path); + eprintln!("loading arch_config.yaml from {}", arch_config_path); let config_contents = fs::read_to_string(&arch_config_path).expect("Failed to read arch_config.yaml"); @@ -65,6 +63,10 @@ async fn main() -> Result<(), Box> { let config: Configuration = serde_yaml::from_str(&config_contents).expect("Failed to parse arch_config.yaml"); + // Initialize tracing using config.yaml tracing section + let _tracer_provider = init_tracer(config.tracing.as_ref()); + info!(path = %arch_config_path, "loaded arch_config.yaml"); + let arch_config = Arc::new(config); // combine agents and filters into a single list of agents @@ -116,17 +118,7 @@ async fn main() -> Result<(), Box> { // Initialize trace collector and start background flusher // Tracing is enabled if the tracing config is present in arch_config.yaml // Pass Some(true/false) to override, or None to use env var OTEL_TRACING_ENABLED - let tracing_enabled = if arch_config.tracing.is_some() { - info!("Tracing configuration found in arch_config.yaml"); - Some(true) - } else { - info!( - "No tracing configuration in arch_config.yaml, will check OTEL_TRACING_ENABLED env var" - ); - None - }; - let trace_collector = Arc::new(TraceCollector::new(tracing_enabled)); - let _flusher_handle = trace_collector.clone().start_background_flusher(); + // OpenTelemetry automatic instrumentation is configured in utils/tracing.rs // Initialize conversation state storage for v1/responses // Configurable via arch_config.yaml state_storage section @@ -136,7 +128,10 @@ async fn main() -> Result<(), Box> { if let Some(storage_config) = &arch_config.state_storage { let storage: Arc = match storage_config.storage_type { common::configuration::StateStorageType::Memory => { - info!("Initialized conversation state storage: Memory"); + info!( + storage_type = "memory", + "initialized conversation state storage" + ); Arc::new(MemoryConversationalStorage::new()) } common::configuration::StateStorageType::Postgres => { @@ -145,8 +140,11 @@ async fn main() -> Result<(), Box> { .as_ref() .expect("connection_string is required for postgres state_storage"); - debug!("Postgres connection string (full): {}", connection_string); - info!("Initializing conversation state storage: Postgres"); + debug!(connection_string = %connection_string, "postgres connection"); + info!( + storage_type = "postgres", + "initializing conversation state storage" + ); Arc::new( PostgreSQLConversationStorage::new(connection_string.clone()) .await @@ -156,7 +154,7 @@ async fn main() -> Result<(), Box> { }; Some(storage) } else { - info!("No state_storage configured - conversation state management disabled"); + info!("no state_storage configured, conversation state management disabled"); None }; @@ -175,7 +173,6 @@ async fn main() -> Result<(), Box> { let llm_providers = llm_providers.clone(); let agents_list = combined_agents_filters_list.clone(); let listeners = listeners.clone(); - let trace_collector = trace_collector.clone(); let state_storage = state_storage.clone(); let service = service_fn(move |req| { let router_service = Arc::clone(&router_service); @@ -186,7 +183,6 @@ async fn main() -> Result<(), Box> { let model_aliases = Arc::clone(&model_aliases); let agents_list = agents_list.clone(); let listeners = listeners.clone(); - let trace_collector = trace_collector.clone(); let state_storage = state_storage.clone(); async move { @@ -206,7 +202,6 @@ async fn main() -> Result<(), Box> { fully_qualified_url, agents_list, listeners, - trace_collector, ) .with_context(parent_cx) .await; @@ -224,7 +219,6 @@ async fn main() -> Result<(), Box> { fully_qualified_url, model_aliases, llm_providers, - trace_collector, state_storage, ) .with_context(parent_cx) @@ -265,7 +259,7 @@ async fn main() -> Result<(), Box> { Ok(response) } _ => { - debug!("No route for {} {}", req.method(), req.uri().path()); + debug!(method = %req.method(), path = %req.uri().path(), "no route found"); let mut not_found = Response::new(empty()); *not_found.status_mut() = StatusCode::NOT_FOUND; Ok(not_found) @@ -275,13 +269,13 @@ async fn main() -> Result<(), Box> { }); tokio::task::spawn(async move { - debug!("Accepted connection from {:?}", peer_addr); + debug!(peer = ?peer_addr, "accepted connection"); if let Err(err) = http1::Builder::new() // .serve_connection(io, service_fn(chat_completion)) .serve_connection(io, service) .await { - warn!("Error serving connection: {:?}", err); + warn!(error = ?err, "error serving connection"); } }); } diff --git a/crates/brightstaff/src/router/llm_router.rs b/crates/brightstaff/src/router/llm_router.rs index 743630e6..ec3fe3ab 100644 --- a/crates/brightstaff/src/router/llm_router.rs +++ b/crates/brightstaff/src/router/llm_router.rs @@ -96,14 +96,14 @@ impl RouterService { .generate_request(messages, &usage_preferences); debug!( - "sending request to arch-router model: {}, endpoint: {}", - self.router_model.get_model_name(), - self.router_url + model = %self.router_model.get_model_name(), + endpoint = %self.router_url, + "sending request to arch-router" ); debug!( - "arch request body: {}", - &serde_json::to_string(&router_request).unwrap(), + body = %serde_json::to_string(&router_request).unwrap(), + "arch router request" ); let mut llm_route_request_headers = header::HeaderMap::new(); @@ -148,9 +148,9 @@ impl RouterService { Ok(response) => response, Err(err) => { warn!( - "Failed to parse JSON: {}. Body: {}", - err, - &serde_json::to_string(&body).unwrap() + error = %err, + body = %serde_json::to_string(&body).unwrap(), + "failed to parse json response" ); return Err(RoutingError::JsonError( err, @@ -160,7 +160,7 @@ impl RouterService { }; if chat_completion_response.choices.is_empty() { - warn!("No choices in router response: {}", body); + warn!(body = %body, "no choices in router response"); return Ok(None); } @@ -169,10 +169,10 @@ impl RouterService { .router_model .parse_response(content, &usage_preferences)?; info!( - "arch-router determined route: {}, selected_model: {:?}, response time: {}ms", - content.replace("\n", "\\n"), - parsed_response, - router_response_time.as_millis() + content = %content.replace("\n", "\\n"), + selected_model = ?parsed_response, + response_time_ms = router_response_time.as_millis(), + "arch-router determined route" ); if let Some(ref parsed_response) = parsed_response { diff --git a/crates/brightstaff/src/router/orchestrator_model_v1.rs b/crates/brightstaff/src/router/orchestrator_model_v1.rs index 8d64f8e7..c6d3d56d 100644 --- a/crates/brightstaff/src/router/orchestrator_model_v1.rs +++ b/crates/brightstaff/src/router/orchestrator_model_v1.rs @@ -197,12 +197,12 @@ impl OrchestratorModel for OrchestratorModelV1 { token_count += message_token_count; if token_count > self.max_token_length { debug!( - "OrchestratorModelV1: token count {} exceeds max token length {}, truncating conversation, selected message count {}, total message count: {}", - token_count, - self.max_token_length - , selected_messsage_count, - messages_vec.len() - ); + token_count = token_count, + max_tokens = self.max_token_length, + selected = selected_messsage_count, + total = messages_vec.len(), + "token count exceeds max, truncating conversation" + ); if message.role == Role::User { // If message that exceeds max token length is from user, we need to keep it selected_messages_list_reversed.push(message); @@ -214,9 +214,7 @@ impl OrchestratorModel for OrchestratorModelV1 { } if selected_messages_list_reversed.is_empty() { - debug!( - "OrchestratorModelV1: no messages selected, using the last message in the conversation" - ); + debug!("no messages selected, using last message"); if let Some(last_message) = messages_vec.last() { selected_messages_list_reversed.push(last_message); } @@ -228,12 +226,12 @@ impl OrchestratorModel for OrchestratorModelV1 { // - last() is the first message in the original conversation if let Some(first_message) = selected_messages_list_reversed.first() { if first_message.role != Role::User { - warn!("OrchestratorModelV1: last message in the conversation is not from user, this may lead to incorrect orchestration"); + warn!("last message is not from user, may lead to incorrect orchestration"); } } if let Some(last_message) = selected_messages_list_reversed.last() { if last_message.role != Role::User { - warn!("OrchestratorModelV1: first message in the selected conversation is not from user, this may lead to incorrect orchestration"); + warn!("first message is not from user, may lead to incorrect orchestration"); } } @@ -323,8 +321,9 @@ impl OrchestratorModel for OrchestratorModelV1 { result.push((selected_route, model_name)); } else { warn!( - "No matching model found for route: {}, usage preferences: {:?}", - selected_route, usage_preferences + route = %selected_route, + preferences = ?usage_preferences, + "no matching model found for route" ); } } @@ -339,8 +338,9 @@ impl OrchestratorModel for OrchestratorModelV1 { result.push((selected_route, model)); } else { warn!( - "No model found for route: {}, orchestrator model preferences: {:?}", - selected_route, self.agent_orchestration_to_model_map + route = %selected_route, + preferences = ?self.agent_orchestration_to_model_map, + "no model found for route" ); } } diff --git a/crates/brightstaff/src/router/plano_orchestrator.rs b/crates/brightstaff/src/router/plano_orchestrator.rs index ee91d52e..cf2688b9 100644 --- a/crates/brightstaff/src/router/plano_orchestrator.rs +++ b/crates/brightstaff/src/router/plano_orchestrator.rs @@ -2,13 +2,12 @@ use std::{collections::HashMap, sync::Arc}; use common::{ configuration::{AgentUsagePreference, OrchestrationPreference}, - consts::{ - ARCH_PROVIDER_HINT_HEADER, PLANO_ORCHESTRATOR_MODEL_NAME, REQUEST_ID_HEADER, - TRACE_PARENT_HEADER, - }, + consts::{ARCH_PROVIDER_HINT_HEADER, PLANO_ORCHESTRATOR_MODEL_NAME, REQUEST_ID_HEADER}, }; use hermesllm::apis::openai::{ChatCompletionsResponse, Message}; use hyper::header; +use opentelemetry::global; +use opentelemetry_http::HeaderInjector; use thiserror::Error; use tracing::{debug, info, warn}; @@ -57,7 +56,6 @@ impl OrchestratorService { pub async fn determine_orchestration( &self, messages: &[Message], - trace_parent: Option, usage_preferences: Option>, request_id: Option, ) -> Result>> { @@ -75,14 +73,14 @@ impl OrchestratorService { .generate_request(messages, &usage_preferences); debug!( - "sending request to arch-orchestrator model: {}, endpoint: {}", - self.orchestrator_model.get_model_name(), - self.orchestrator_url + model = %self.orchestrator_model.get_model_name(), + endpoint = %self.orchestrator_url, + "sending request to arch-orchestrator" ); debug!( - "arch orchestrator request body: {}", - &serde_json::to_string(&orchestrator_request).unwrap(), + body = %serde_json::to_string(&orchestrator_request).unwrap(), + "arch orchestrator request" ); let mut orchestration_request_headers = header::HeaderMap::new(); @@ -96,12 +94,12 @@ impl OrchestratorService { header::HeaderValue::from_str(PLANO_ORCHESTRATOR_MODEL_NAME).unwrap(), ); - if let Some(trace_parent) = trace_parent { - orchestration_request_headers.insert( - header::HeaderName::from_static(TRACE_PARENT_HEADER), - header::HeaderValue::from_str(&trace_parent).unwrap(), - ); - } + // Inject OpenTelemetry trace context from current span + global::get_text_map_propagator(|propagator| { + let cx = + tracing_opentelemetry::OpenTelemetrySpanExt::context(&tracing::Span::current()); + propagator.inject_context(&cx, &mut HeaderInjector(&mut orchestration_request_headers)); + }); if let Some(request_id) = request_id { orchestration_request_headers.insert( @@ -131,9 +129,9 @@ impl OrchestratorService { Ok(response) => response, Err(err) => { warn!( - "Failed to parse JSON: {}. Body: {}", - err, - &serde_json::to_string(&body).unwrap() + error = %err, + body = %serde_json::to_string(&body).unwrap(), + "failed to parse json response" ); return Err(OrchestrationError::JsonError( err, @@ -143,7 +141,7 @@ impl OrchestratorService { }; if chat_completion_response.choices.is_empty() { - warn!("No choices in orchestrator response: {}", body); + warn!(body = %body, "no choices in orchestrator response"); return Ok(None); } @@ -152,10 +150,10 @@ impl OrchestratorService { .orchestrator_model .parse_response(content, &usage_preferences)?; info!( - "arch-orchestrator determined routes: {}, selected_routes: {:?}, response time: {}ms", - content.replace("\n", "\\n"), - parsed_response, - orchestrator_response_time.as_millis() + content = %content.replace("\n", "\\n"), + selected_routes = ?parsed_response, + response_time_ms = orchestrator_response_time.as_millis(), + "arch-orchestrator determined routes" ); if let Some(ref parsed_response) = parsed_response { diff --git a/crates/brightstaff/src/router/router_model_v1.rs b/crates/brightstaff/src/router/router_model_v1.rs index 796dfaac..430b4f8e 100644 --- a/crates/brightstaff/src/router/router_model_v1.rs +++ b/crates/brightstaff/src/router/router_model_v1.rs @@ -94,12 +94,12 @@ impl RouterModel for RouterModelV1 { token_count += message_token_count; if token_count > self.max_token_length { debug!( - "RouterModelV1: token count {} exceeds max token length {}, truncating conversation, selected message count {}, total message count: {}", - token_count, - self.max_token_length - , selected_messsage_count, - messages_vec.len() - ); + token_count = token_count, + max_tokens = self.max_token_length, + selected = selected_messsage_count, + total = messages_vec.len(), + "token count exceeds max, truncating conversation" + ); if message.role == Role::User { // If message that exceeds max token length is from user, we need to keep it selected_messages_list_reversed.push(message); @@ -111,9 +111,7 @@ impl RouterModel for RouterModelV1 { } if selected_messages_list_reversed.is_empty() { - debug!( - "RouterModelV1: no messages selected, using the last message in the conversation" - ); + debug!("no messages selected, using last message"); if let Some(last_message) = messages_vec.last() { selected_messages_list_reversed.push(last_message); } @@ -122,12 +120,12 @@ impl RouterModel for RouterModelV1 { // ensure that first and last selected message is from user if let Some(first_message) = selected_messages_list_reversed.first() { if first_message.role != Role::User { - warn!("RouterModelV1: last message in the conversation is not from user, this may lead to incorrect routing"); + warn!("last message is not from user, may lead to incorrect routing"); } } if let Some(last_message) = selected_messages_list_reversed.last() { if last_message.role != Role::User { - warn!("RouterModelV1: first message in the conversation is not from user, this may lead to incorrect routing"); + warn!("first message is not from user, may lead to incorrect routing"); } } @@ -206,8 +204,9 @@ impl RouterModel for RouterModelV1 { return Ok(Some((selected_route, model_name))); } else { warn!( - "No matching model found for route: {}, usage preferences: {:?}", - selected_route, usage_preferences + route = %selected_route, + preferences = ?usage_preferences, + "no matching model found for route" ); return Ok(None); } @@ -219,8 +218,9 @@ impl RouterModel for RouterModelV1 { } warn!( - "No model found for route: {}, router model preferences: {:?}", - selected_route, self.llm_route_to_model_map + route = %selected_route, + preferences = ?self.llm_route_to_model_map, + "no model found for route" ); Ok(None) diff --git a/crates/brightstaff/src/state/response_state_processor.rs b/crates/brightstaff/src/state/response_state_processor.rs index 3d1e8673..0920324c 100644 --- a/crates/brightstaff/src/state/response_state_processor.rs +++ b/crates/brightstaff/src/state/response_state_processor.rs @@ -92,18 +92,16 @@ impl ResponsesStateProcessor

{ match decoder.read_to_end(&mut decompressed) { Ok(_) => { debug!( - "[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Successfully decompressed {} bytes to {} bytes", - self.request_id, - self.chunk_buffer.len(), - decompressed.len() + original_bytes = self.chunk_buffer.len(), + decompressed_bytes = decompressed.len(), + "Successfully decompressed response" ); decompressed } Err(e) => { warn!( - "[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Failed to decompress gzip buffer: {}", - self.request_id, - e + error = %e, + "Failed to decompress gzip buffer" ); self.chunk_buffer.clone() } @@ -111,9 +109,8 @@ impl ResponsesStateProcessor

{ } Some(encoding) => { warn!( - "[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Unsupported Content-Encoding: {}. Only gzip is currently supported.", - self.request_id, - encoding + encoding = %encoding, + "Unsupported Content-Encoding, only gzip is supported" ); self.chunk_buffer.clone() } @@ -143,10 +140,9 @@ impl ResponsesStateProcessor

{ serde_json::from_str::(data_str) { info!( - "[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Captured streaming response.completed: response_id={}, output_items={}", - self.request_id, - response.id, - response.output.len() + response_id = %response.id, + output_items = response.output.len(), + "Captured streaming response" ); self.response_id = Some(response.id.clone()); self.output_items = Some(response.output.clone()); @@ -175,24 +171,20 @@ impl ResponsesStateProcessor

{ ) { Ok(response) => { info!( - "[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Captured non-streaming response: response_id={}, output_items={}", - self.request_id, - response.id, - response.output.len() + response_id = %response.id, + output_items = response.output.len(), + "Captured non-streaming response" ); self.response_id = Some(response.id.clone()); self.output_items = Some(response.output.clone()); } Err(e) => { - // Log parse error with chunk preview for debugging let chunk_preview = String::from_utf8_lossy(&decompressed); let preview_len = chunk_preview.len().min(200); warn!( - "[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Failed to parse non-streaming ResponsesAPIResponse: {}. Decompressed preview (first {} bytes): {}", - self.request_id, - e, - preview_len, - &chunk_preview[..preview_len] + error = %e, + preview = %&chunk_preview[..preview_len], + "Failed to parse non-streaming ResponsesAPIResponse" ); } } @@ -221,10 +213,7 @@ impl StreamProcessor for ResponsesStateProcessor

{ // Skip storage for OpenAI upstream if self.is_openai_upstream { - debug!( - "[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Skipping state storage for OpenAI upstream provider", - self.request_id - ); + debug!("Skipping state storage for OpenAI upstream"); return; } @@ -234,8 +223,9 @@ impl StreamProcessor for ResponsesStateProcessor

{ let output_as_inputs = outputs_to_inputs(output_items); debug!( - "[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Converting outputs to inputs: output_items_count={}, converted_input_items_count={}", - self.request_id, output_items.len(), output_as_inputs.len() + output_items = output_items.len(), + converted_items = output_as_inputs.len(), + "Converting outputs to inputs" ); // Combine original input + output as new input history @@ -243,11 +233,9 @@ impl StreamProcessor for ResponsesStateProcessor

{ combined_input.extend(output_as_inputs); debug!( - "[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Storing state: original_input_count={}, combined_input_count={}, combined_json={}", - self.request_id, - self.original_input.len(), - combined_input.len(), - serde_json::to_string(&combined_input).unwrap_or_else(|_| "serialization_error".to_string()) + original_input = self.original_input.len(), + combined_input = combined_input.len(), + "Storing conversation state" ); let state = OpenAIConversationState { @@ -270,28 +258,27 @@ impl StreamProcessor for ResponsesStateProcessor

{ match storage.put(state).await { Ok(()) => { info!( - "[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Successfully stored conversation state for response_id: {}, items_count={}", - request_id, - response_id_clone, - items_count + request_id = %request_id, + response_id = %response_id_clone, + items = items_count, + "Stored conversation state" ); } Err(e) => { warn!( - "[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Failed to store conversation state for response_id {}: {}", - request_id, - response_id_clone, - e + request_id = %request_id, + response_id = %response_id_clone, + error = %e, + "Failed to store conversation state" ); } } }); } else { warn!( - "[PLANO_REQ_ID:{}] | STATE_PROCESSOR | No response_id captured from upstream response - cannot store conversation state. response_id present: {}, output present: {}", - self.request_id, - self.response_id.is_some(), - self.output_items.is_some() + has_response_id = self.response_id.is_some(), + has_output = self.output_items.is_some(), + "No response_id captured, cannot store conversation state" ); } } diff --git a/crates/brightstaff/src/tracing/mod.rs b/crates/brightstaff/src/tracing/mod.rs index 09ec6f2a..7332170c 100644 --- a/crates/brightstaff/src/tracing/mod.rs +++ b/crates/brightstaff/src/tracing/mod.rs @@ -1,5 +1,35 @@ mod constants; +mod service_name_exporter; pub use constants::{ error, http, llm, operation_component, routing, signals, OperationNameBuilder, }; +pub use service_name_exporter::{ServiceNameOverrideExporter, SERVICE_NAME_OVERRIDE_KEY}; + +use opentelemetry::trace::get_active_span; +use opentelemetry::KeyValue; + +/// Sets the service name override on the current active OpenTelemetry span. +/// +/// This function adds the `service.name.override` attribute to the active +/// OpenTelemetry span, which allows observability backends to filter and group +/// spans by their logical service (e.g., `plano(llm)`, `plano(filter)`). +/// +/// # Arguments +/// * `service_name` - The service name to use (e.g., `operation_component::LLM`) +/// +/// # Example +/// ```rust,ignore +/// use brightstaff::tracing::{set_service_name, operation_component}; +/// +/// // Inside a traced function: +/// set_service_name(operation_component::LLM); +/// ``` +pub fn set_service_name(service_name: &str) { + get_active_span(|span| { + span.set_attribute(KeyValue::new( + SERVICE_NAME_OVERRIDE_KEY, + service_name.to_string(), + )); + }); +} diff --git a/crates/brightstaff/src/tracing/service_name_exporter.rs b/crates/brightstaff/src/tracing/service_name_exporter.rs new file mode 100644 index 00000000..ca0bde15 --- /dev/null +++ b/crates/brightstaff/src/tracing/service_name_exporter.rs @@ -0,0 +1,187 @@ +//! Service Name Override Exporter +//! +//! This module provides a custom SpanExporter that allows per-span service.name overrides. +//! In OpenTelemetry, `service.name` is part of the Resource, which is tied to the TracerProvider. +//! However, if you need different service names for different spans (e.g., `plano(orchestrator)`, +//! `plano(filter)`, `plano(llm)`) within the same provider, this exporter handles that by: +//! +//! 1. Looking for a special span attribute `service.name.override` +//! 2. Grouping spans by their effective service name +//! 3. Exporting each group via a dedicated OTLP exporter whose Resource has the correct +//! `service.name` +//! +//! All per-service exporters are created eagerly at construction time so that no tonic +//! channel creation happens later inside `futures_executor::block_on` (which the +//! `BatchSpanProcessor` uses and which lacks a tokio runtime). +//! +//! # Usage +//! +//! ```rust +//! use brightstaff::tracing::{set_service_name, operation_component}; +//! +//! // In your instrumented code, set the service name override: +//! set_service_name(operation_component::LLM); +//! ``` + +use opentelemetry::Key; +use opentelemetry_otlp::WithExportConfig; +use opentelemetry_sdk::error::OTelSdkResult; +use opentelemetry_sdk::trace::{SpanData, SpanExporter}; +use opentelemetry_sdk::Resource; +use std::collections::HashMap; +use std::time::Duration; +use tokio::sync::Mutex; + +use super::operation_component; + +/// The attribute key used to override the service name for a specific span. +/// Set this as a span attribute to route the span to a different service. +pub const SERVICE_NAME_OVERRIDE_KEY: &str = "service.name.override"; + +/// Default service name used when no override is set on a span. +const DEFAULT_SERVICE_NAME: &str = "plano"; + +/// All known service names that will have dedicated exporters. +const ALL_SERVICE_NAMES: &[&str] = &[ + DEFAULT_SERVICE_NAME, + operation_component::INBOUND, + operation_component::ROUTING, + operation_component::ORCHESTRATOR, + operation_component::AGENT_FILTER, + operation_component::AGENT, + operation_component::LLM, +]; + +/// Span attribute keys to remove before export. +const FILTERED_ATTR_KEYS: &[&str] = &[ + "busy_ns", + "idle_ns", + "thread.id", + "thread.name", + "code.file.path", + "code.line.number", + "code.module.name", + "target", +]; + +/// A SpanExporter that supports per-span `service.name` overrides. +/// +/// Internally it holds one OTLP exporter per known service name. Each exporter +/// has its own `Resource` with the correct `service.name`, so backends like +/// Jaeger see the spans under the right service. +pub struct ServiceNameOverrideExporter { + /// Map from service name → pre-created OTLP exporter (behind tokio Mutex + /// because `SpanExporter::export` takes `&self` and the future must be Send). + exporters: HashMap>, +} + +// Manual Debug because `opentelemetry_otlp::SpanExporter` doesn't implement Debug +impl std::fmt::Debug for ServiceNameOverrideExporter { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ServiceNameOverrideExporter") + .field("services", &self.exporters.keys().collect::>()) + .finish() + } +} + +impl ServiceNameOverrideExporter { + /// Create a new `ServiceNameOverrideExporter`. + /// + /// This eagerly creates one OTLP gRPC exporter per known service name so + /// that the tonic channel is established while a tokio runtime is available. + /// + /// # Arguments + /// * `endpoint` – The OTLP collector endpoint URL (e.g. `http://localhost:4317`) + pub fn new(endpoint: &str) -> Self { + let mut exporters = HashMap::new(); + + for &service_name in ALL_SERVICE_NAMES { + let resource = Resource::builder_empty() + .with_service_name(service_name) + .build(); + + let mut exporter = opentelemetry_otlp::SpanExporter::builder() + .with_tonic() + .with_endpoint(endpoint) + .build() + .expect("Failed to create OTLP span exporter"); + + exporter.set_resource(&resource); + exporters.insert(service_name.to_string(), Mutex::new(exporter)); + } + + Self { exporters } + } +} + +impl SpanExporter for ServiceNameOverrideExporter { + fn export( + &self, + batch: Vec, + ) -> impl std::future::Future + Send { + let override_key = Key::new(SERVICE_NAME_OVERRIDE_KEY); + + // Group spans by their effective service name + let mut spans_by_service: HashMap> = HashMap::new(); + + let should_filter = !tracing::enabled!(tracing::Level::DEBUG); + + for span in batch { + let mut span = span; + + if should_filter { + span.attributes + .retain(|kv| !FILTERED_ATTR_KEYS.contains(&kv.key.as_str())); + } + + let service_name = span + .attributes + .iter() + .find(|kv| kv.key == override_key) + .map(|kv| kv.value.to_string()) + .unwrap_or_else(|| DEFAULT_SERVICE_NAME.to_string()); + + spans_by_service.entry(service_name).or_default().push(span); + } + + // Collect grouped spans into a Vec so the async block owns the data. + let results: Vec<(String, Vec)> = spans_by_service.into_iter().collect(); + async move { + for (service_name, spans) in results { + // Look up the pre-created exporter; fall back to default if + // the service name isn't one of the known ones. + let key = if self.exporters.contains_key(&service_name) { + service_name.clone() + } else { + DEFAULT_SERVICE_NAME.to_string() + }; + + if let Some(exporter_mutex) = self.exporters.get(&key) { + let exporter = exporter_mutex.lock().await; + if let Err(e) = exporter.export(spans).await { + tracing::warn!( + service = %service_name, + error = ?e, + "Failed to export spans" + ); + } + } + } + Ok(()) + } + } + + fn shutdown_with_timeout(&mut self, timeout: Duration) -> OTelSdkResult { + for (_, exporter_mutex) in self.exporters.iter() { + if let Ok(mut exporter) = exporter_mutex.try_lock() { + let _ = exporter.shutdown_with_timeout(timeout); + } + } + Ok(()) + } + + fn set_resource(&mut self, _resource: &Resource) { + // Each inner exporter already has its own resource set at creation time. + // Nothing to propagate. + } +} diff --git a/crates/brightstaff/src/utils/tracing.rs b/crates/brightstaff/src/utils/tracing.rs index 6da4b631..21882303 100644 --- a/crates/brightstaff/src/utils/tracing.rs +++ b/crates/brightstaff/src/utils/tracing.rs @@ -3,12 +3,17 @@ use std::sync::OnceLock; use opentelemetry::global; use opentelemetry_sdk::{propagation::TraceContextPropagator, trace::SdkTracerProvider}; -use opentelemetry_stdout::SpanExporter; use time::macros::format_description; use tracing::{Event, Subscriber}; use tracing_subscriber::fmt::{format, time::FormatTime, FmtContext, FormatEvent, FormatFields}; +use tracing_subscriber::layer::SubscriberExt; +use tracing_subscriber::registry::LookupSpan; +use tracing_subscriber::util::SubscriberInitExt; use tracing_subscriber::EnvFilter; +use crate::tracing::ServiceNameOverrideExporter; +use common::configuration::Tracing; + struct BracketedTime; impl FormatTime for BracketedTime { @@ -29,7 +34,7 @@ struct BracketedFormatter; impl FormatEvent for BracketedFormatter where - S: Subscriber + for<'a> tracing_subscriber::registry::LookupSpan<'a>, + S: Subscriber + for<'a> LookupSpan<'a>, N: for<'a> FormatFields<'a> + 'static, { fn format_event( @@ -43,36 +48,116 @@ where write!( writer, - "[{}] ", + "[{}]", event.metadata().level().to_string().to_lowercase() )?; + // Extract request_id from span context if present + if let Some(scope) = ctx.event_scope() { + for span in scope.from_root() { + let extensions = span.extensions(); + if let Some(fields) = extensions.get::>() { + let fields_str = fields.fields.as_str(); + // Look for request_id in the formatted fields + if let Some(start) = fields_str.find("request_id=") { + let rest = &fields_str[start + 11..]; // Skip "request_id=" + let end = rest.find(|c: char| c.is_whitespace()).unwrap_or(rest.len()); + let rid = &rest[..end]; + write!(writer, " request_id={}", rid)?; + break; + } + } + } + } + + write!(writer, " ")?; ctx.field_format().format_fields(writer.by_ref(), event)?; writeln!(writer) } } +use tracing_subscriber::fmt::FormattedFields; + static INIT_LOGGER: OnceLock = OnceLock::new(); -pub fn init_tracer() -> &'static SdkTracerProvider { +pub fn init_tracer(tracing_config: Option<&Tracing>) -> &'static SdkTracerProvider { INIT_LOGGER.get_or_init(|| { global::set_text_map_propagator(TraceContextPropagator::new()); - // Install stdout exporter pipeline to be able to retrieve the collected spans. - // For the demonstration, use `Sampler::AlwaysOn` sampler to sample all traces. - let provider = SdkTracerProvider::builder() - .with_simple_exporter(SpanExporter::default()) - .build(); - global::set_tracer_provider(provider.clone()); + // Get OTEL endpoint and sampling from config.yaml tracing section + let otel_endpoint = tracing_config.and_then(|t| t.opentracing_grpc_endpoint.clone()); - tracing_subscriber::fmt() - .with_env_filter( - EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info")), - ) - .event_format(BracketedFormatter) - .init(); + let random_sampling = tracing_config.and_then(|t| t.random_sampling).unwrap_or(0); - provider + let tracing_enabled = random_sampling > 0 && otel_endpoint.is_some(); + eprintln!( + "initializing tracing: tracing_enabled={}, otel_endpoint={:?}, random_sampling={}", + tracing_enabled, otel_endpoint, random_sampling + ); + + // Create OTLP exporter to send spans to collector + if tracing_enabled { + // Set service name via environment if not already set + if std::env::var("OTEL_SERVICE_NAME").is_err() { + std::env::set_var("OTEL_SERVICE_NAME", "plano"); + } + + // Create ServiceNameOverrideExporter to support per-span service names + // This allows spans to have different service names (e.g., plano(orchestrator), + // plano(filter), plano(llm)) by setting the "service.name.override" attribute + let exporter = ServiceNameOverrideExporter::new(otel_endpoint.as_ref().unwrap()); + + let provider = SdkTracerProvider::builder() + .with_batch_exporter(exporter) + .build(); + + global::set_tracer_provider(provider.clone()); + + // Create OpenTelemetry tracing layer using TracerProvider trait + use opentelemetry::trace::TracerProvider as _; + let telemetry_layer = + tracing_opentelemetry::layer().with_tracer(provider.tracer("brightstaff")); + + // Combine the OpenTelemetry layer with fmt layer using the registry + let env_filter = + EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info")); + + // Create fmt layer with span field formatting enabled (no ANSI to keep fields parseable) + let fmt_layer = tracing_subscriber::fmt::layer() + .event_format(BracketedFormatter) + .fmt_fields(format::DefaultFields::new()) + .with_ansi(false); + + let subscriber = tracing_subscriber::registry() + .with(telemetry_layer) + .with(env_filter) + .with(fmt_layer); + + tracing::subscriber::set_global_default(subscriber) + .expect("Failed to set tracing subscriber"); + + provider + } else { + // Tracing disabled - use no-op provider + let provider = SdkTracerProvider::builder().build(); + global::set_tracer_provider(provider.clone()); + + let env_filter = + EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info")); + + // Create fmt layer with span field formatting enabled (no ANSI to keep fields parseable) + let fmt_layer = tracing_subscriber::fmt::layer() + .event_format(BracketedFormatter) + .fmt_fields(format::DefaultFields::new()) + .with_ansi(false); + + tracing_subscriber::registry() + .with(env_filter) + .with(fmt_layer) + .init(); + + provider + } }) } diff --git a/crates/common/Cargo.toml b/crates/common/Cargo.toml index bcff9b32..cb471bd6 100644 --- a/crates/common/Cargo.toml +++ b/crates/common/Cargo.toml @@ -21,14 +21,8 @@ url = "2.5.4" hermesllm = { version = "0.1.0", path = "../hermesllm" } serde_with = "3.13.0" -# Optional dependencies for trace collection (not available in WASM) -tokio = { version = "1.44", features = ["sync", "time"], optional = true } -reqwest = { version = "0.12", features = ["json"], optional = true } -tracing = { version = "0.1", optional = true } - [features] default = [] -trace-collection = ["tokio", "reqwest", "tracing"] [dev-dependencies] pretty_assertions = "1.4.1" diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs index c600ed5d..ccca89c3 100644 --- a/crates/common/src/configuration.rs +++ b/crates/common/src/configuration.rs @@ -90,6 +90,8 @@ pub struct Overrides { pub struct Tracing { pub sampling_rate: Option, pub trace_arch_internal: Option, + pub random_sampling: Option, + pub opentracing_grpc_endpoint: Option, } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash, Default)] diff --git a/crates/common/src/consts.rs b/crates/common/src/consts.rs index 23260747..cafc8e80 100644 --- a/crates/common/src/consts.rs +++ b/crates/common/src/consts.rs @@ -30,7 +30,6 @@ pub const ARCH_MODEL_PREFIX: &str = "Arch"; pub const HALLUCINATION_TEMPLATE: &str = "It seems I'm missing some information. Could you provide the following details "; pub const OTEL_COLLECTOR_HTTP: &str = "opentelemetry_collector_http"; -pub const OTEL_POST_PATH: &str = "/v1/traces"; pub const LLM_ROUTE_HEADER: &str = "x-arch-llm-route"; pub const ENVOY_RETRY_HEADER: &str = "x-envoy-max-retries"; pub const BRIGHT_STAFF_SERVICE_NAME: &str = "brightstaff"; diff --git a/crates/common/src/traces/collector.rs b/crates/common/src/traces/collector.rs deleted file mode 100644 index e26f544e..00000000 --- a/crates/common/src/traces/collector.rs +++ /dev/null @@ -1,292 +0,0 @@ -use super::resource_span_builder::ResourceSpanBuilder; -use super::shapes::Span; -use std::collections::{HashMap, VecDeque}; -use std::sync::Arc; -use tokio::sync::Mutex; -use tokio::time::{interval, Duration}; -use tracing::{debug, error, warn}; - -/// Parse W3C traceparent header into trace_id and parent_span_id -/// Format: "00-{trace_id}-{parent_span_id}-01" -/// -/// Returns (trace_id, Option) -/// - parent_span_id is None if it's all zeros (0000000000000000), indicating a root span -pub fn parse_traceparent(traceparent: &str) -> (String, Option) { - let parts: Vec<&str> = traceparent.split('-').collect(); - if parts.len() == 4 { - let trace_id = parts[1].to_string(); - let parent_span_id = parts[2].to_string(); - - // If parent_span_id is all zeros, this is a root span with no parent - let parent = if parent_span_id == "0000000000000000" { - None - } else { - Some(parent_span_id) - }; - - (trace_id, parent) - } else { - warn!("Invalid traceparent format: {}", traceparent); - // Return empty trace ID and None for parent if parsing fails - (String::new(), None) - } -} - -/// Collects and batches spans, flushing them to an OTEL collector -/// -/// Supports multiple services, with each service (e.g., "archgw(routing)", "archgw(llm)") -/// maintaining its own span queue. Flushes all services together periodically. -/// -/// Tracing can be enabled/disabled in two ways: -/// 1. Via arch_config.yaml: presence of `tracing` configuration section -/// 2. Via environment variable: `OTEL_TRACING_ENABLED=true/false` -/// -/// When disabled, span recording and flushing are no-ops. -pub struct TraceCollector { - /// Spans grouped by service name - /// Key: service name (e.g., "archgw(routing)", "archgw(llm)") - /// Value: queue of spans for that service - spans_by_service: Arc>>>, - flush_interval: Duration, - otel_url: String, - /// Whether tracing is enabled - enabled: bool, -} - -impl TraceCollector { - /// Create a new trace collector - /// - /// # Arguments - /// * `enabled` - Whether tracing is enabled - /// - `Some(true)` - Force enable tracing - /// - `Some(false)` - Force disable tracing - /// - `None` - Check `OTEL_TRACING_ENABLED` env var (defaults to true if not set) - /// - /// Other parameters are read from environment variables: - /// - `TRACE_FLUSH_INTERVAL_MS` - Flush interval in milliseconds (default: 1000) - /// - `OTEL_COLLECTOR_URL` - OTEL collector endpoint (default: http://localhost:9903/v1/traces) - pub fn new(enabled: Option) -> Self { - let flush_interval_ms = std::env::var("TRACE_FLUSH_INTERVAL_MS") - .ok() - .and_then(|s| s.parse().ok()) - .unwrap_or(1000); - - let otel_url = std::env::var("OTEL_COLLECTOR_URL") - .unwrap_or_else(|_| "http://localhost:9903/v1/traces".to_string()); - - // Determine if tracing is enabled: - // 1. Use explicit parameter if provided - // 2. Otherwise check OTEL_TRACING_ENABLED env var - // 3. Default to false if neither is set (tracing opt-in, not opt-out) - let enabled = enabled.unwrap_or_else(|| { - std::env::var("OTEL_TRACING_ENABLED") - .ok() - .and_then(|s| s.parse().ok()) - .unwrap_or(false) - }); - - debug!( - "TraceCollector initialized: flush_interval={}ms, url={}, enabled={}", - flush_interval_ms, otel_url, enabled - ); - - Self { - spans_by_service: Arc::new(Mutex::new(HashMap::new())), - flush_interval: Duration::from_millis(flush_interval_ms), - otel_url, - enabled, - } - } - - /// Record a span for a specific service - /// - /// # Arguments - /// * `service_name` - Name of the service (e.g., "archgw(routing)", "archgw(llm)") - /// * `span` - The span to record - pub fn record_span(&self, service_name: impl Into, span: Span) { - // Skip recording if tracing is disabled - if !self.enabled { - return; - } - - let service_name = service_name.into(); - - // Use try_lock to avoid blocking in async contexts - // If the lock is held, we skip recording (telemetry shouldn't block the app) - if let Ok(mut spans_by_service) = self.spans_by_service.try_lock() { - // Get or create the queue for this service - let spans = spans_by_service - .entry(service_name) - .or_insert_with(VecDeque::new); - - spans.push_back(span); - } else { - // Lock contention - skip recording this span - debug!("Skipped span recording due to lock contention"); - } - // Flushing is handled by the periodic background flusher (see `start_background_flusher`). - } - - /// Flush all buffered spans to the OTEL collector - /// Builds ResourceSpans for each service with spans - pub async fn flush(&self) -> Result<(), Box> { - // Skip flushing if tracing is disabled - if !self.enabled { - return Ok(()); - } - - let mut spans_by_service = self.spans_by_service.lock().await; - - if spans_by_service.is_empty() { - return Ok(()); - } - - // Snapshot and drain all services' spans - let service_batches: Vec<(String, Vec)> = spans_by_service - .iter_mut() - .filter_map(|(service_name, spans)| { - if spans.is_empty() { - None - } else { - Some((service_name.clone(), spans.drain(..).collect())) - } - }) - .collect(); - - drop(spans_by_service); // Release lock before HTTP call - - if service_batches.is_empty() { - return Ok(()); - } - - let total_spans: usize = service_batches.iter().map(|(_, spans)| spans.len()).sum(); - debug!( - "Flushing {} spans across {} services to OTEL collector", - total_spans, - service_batches.len() - ); - - // Build canonical OTEL payload structure - one ResourceSpan per service - let resource_spans = self.build_resource_spans(service_batches); - - match self.send_to_otel(resource_spans).await { - Ok(_) => { - debug!("Successfully flushed {} spans", total_spans); - Ok(()) - } - Err(e) => { - warn!("Failed to send spans to OTEL collector: {:?}", e); - Err(e) - } - } - } - - /// Build OTEL-compliant resource spans from collected spans, one ResourceSpan per service - fn build_resource_spans( - &self, - service_batches: Vec<(String, Vec)>, - ) -> Vec { - service_batches - .into_iter() - .map(|(service_name, spans)| { - ResourceSpanBuilder::new(&service_name) - .add_spans(spans) - .build() - }) - .collect() - } - - /// Send resource spans to OTEL collector - /// Serializes as {"resourceSpans": [...]} per OTEL spec - async fn send_to_otel( - &self, - resource_spans: Vec, - ) -> Result<(), Box> { - let client = reqwest::Client::new(); - - // Create OTEL payload with proper structure - let payload = serde_json::json!({ - "resourceSpans": resource_spans - }); - - let response = client - .post(&self.otel_url) - .header("Content-Type", "application/json") - .json(&payload) - .timeout(Duration::from_secs(5)) - .send() - .await?; - - if !response.status().is_success() { - warn!( - "OTEL collector returned non-success status: {}", - response.status() - ); - return Err(format!("OTEL collector error: {}", response.status()).into()); - } - - Ok(()) - } - - /// Start a background task that periodically flushes traces - /// Returns a join handle that can be used to stop the flusher - pub fn start_background_flusher(self: Arc) -> tokio::task::JoinHandle<()> { - let flush_interval = self.flush_interval; - - tokio::spawn(async move { - let mut ticker = interval(flush_interval); - - loop { - ticker.tick().await; - - if let Err(e) = self.flush().await { - error!("Background trace flush failed: {:?}", e); - } - } - }) - } - - /// Get current number of buffered spans across all services (for testing/monitoring) - pub async fn buffered_count(&self) -> usize { - self.spans_by_service - .lock() - .await - .values() - .map(|spans| spans.len()) - .sum() - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::traces::SpanBuilder; - - #[tokio::test] - async fn test_collector_basic() { - let collector = TraceCollector::new(Some(true)); - - let span = SpanBuilder::new("test_operation") - .with_trace_id("abc123") - .build(); - - collector.record_span("test-service", span); - - assert_eq!(collector.buffered_count().await, 1); - } - - #[tokio::test] - async fn test_collector_auto_flush() { - // Since batch-triggered flush behavior was removed, record two spans and verify both are buffered - let collector = Arc::new(TraceCollector::new(Some(true))); - - let span1 = SpanBuilder::new("test1").build(); - let span2 = SpanBuilder::new("test2").build(); - - collector.record_span("test-service", span1); - collector.record_span("test-service", span2); - - // With no batch-triggered flush, both spans should remain buffered - assert_eq!(collector.buffered_count().await, 2); - } -} diff --git a/crates/common/src/traces/mod.rs b/crates/common/src/traces/mod.rs index 6181f194..e9177073 100644 --- a/crates/common/src/traces/mod.rs +++ b/crates/common/src/traces/mod.rs @@ -5,12 +5,6 @@ mod constants; mod resource_span_builder; mod span_builder; -#[cfg(feature = "trace-collection")] -mod collector; - -#[cfg(all(test, feature = "trace-collection"))] -mod tests; - // Re-export original types pub use shapes::{ Attribute, AttributeValue, Event, Resource, ResourceSpan, Scope, ScopeSpan, Span, Traceparent, @@ -21,6 +15,3 @@ pub use shapes::{ pub use constants::*; pub use resource_span_builder::ResourceSpanBuilder; pub use span_builder::{generate_random_span_id, SpanBuilder, SpanKind}; - -#[cfg(feature = "trace-collection")] -pub use collector::{parse_traceparent, TraceCollector}; diff --git a/crates/common/src/traces/tests/mock_otel_collector.rs b/crates/common/src/traces/tests/mock_otel_collector.rs deleted file mode 100644 index 8c8e770d..00000000 --- a/crates/common/src/traces/tests/mock_otel_collector.rs +++ /dev/null @@ -1,96 +0,0 @@ -//! Mock OTEL Collector for testing trace output -//! -//! This module provides a simple HTTP server that mimics an OTEL collector. -//! It exposes three endpoints: -//! - POST /v1/traces: Capture incoming OTLP JSON payloads -//! - GET /v1/traces: Return all captured payloads as JSON array -//! - DELETE /v1/traces: Clear all captured payloads -//! -//! Each test creates its own MockOtelCollector instance. - -use axum::{ - extract::State, - http::StatusCode, - routing::{delete, get, post}, - Json, Router, -}; -use serde_json::Value; -use std::sync::Arc; -use tokio::sync::RwLock; - -type SharedTraces = Arc>>; - -/// POST /v1/traces - capture incoming OTLP payload -async fn post_traces(State(traces): State, Json(payload): Json) -> StatusCode { - traces.write().await.push(payload); - StatusCode::OK -} - -/// GET /v1/traces - return all captured payloads -async fn get_traces(State(traces): State) -> Json> { - Json(traces.read().await.clone()) -} - -/// DELETE /v1/traces - clear all captured payloads -async fn delete_traces(State(traces): State) -> StatusCode { - traces.write().await.clear(); - StatusCode::NO_CONTENT -} - -/// Mock OTEL collector server -pub struct MockOtelCollector { - address: String, - client: reqwest::Client, - #[allow(dead_code)] - server_handle: tokio::task::JoinHandle<()>, -} - -impl MockOtelCollector { - /// Create and start a new mock collector on a random port - pub async fn start() -> Self { - let traces = Arc::new(RwLock::new(Vec::new())); - - let app = Router::new() - .route("/v1/traces", post(post_traces)) - .route("/v1/traces", get(get_traces)) - .route("/v1/traces", delete(delete_traces)) - .with_state(traces.clone()); - - let listener = tokio::net::TcpListener::bind("127.0.0.1:0") - .await - .expect("Failed to bind to random port"); - - let addr = listener.local_addr().expect("Failed to get local address"); - let address = format!("http://127.0.0.1:{}", addr.port()); - - let server_handle = tokio::spawn(async move { - axum::serve(listener, app).await.expect("Server failed"); - }); - - // Give server a moment to start - tokio::time::sleep(tokio::time::Duration::from_millis(50)).await; - - Self { - address, - client: reqwest::Client::new(), - server_handle, - } - } - - /// Get the address of the collector - pub fn address(&self) -> &str { - &self.address - } - - /// GET /v1/traces - fetch all captured payloads - pub async fn get_traces(&self) -> Vec { - self.client - .get(format!("{}/v1/traces", self.address)) - .send() - .await - .expect("Failed to GET traces") - .json() - .await - .expect("Failed to parse traces JSON") - } -} diff --git a/crates/common/src/traces/tests/mod.rs b/crates/common/src/traces/tests/mod.rs deleted file mode 100644 index 7bba42f8..00000000 --- a/crates/common/src/traces/tests/mod.rs +++ /dev/null @@ -1,4 +0,0 @@ -mod mock_otel_collector; -mod trace_integration_test; - -pub use mock_otel_collector::MockOtelCollector; diff --git a/crates/common/src/traces/tests/trace_integration_test.rs b/crates/common/src/traces/tests/trace_integration_test.rs deleted file mode 100644 index 5f41a2c3..00000000 --- a/crates/common/src/traces/tests/trace_integration_test.rs +++ /dev/null @@ -1,342 +0,0 @@ -//! Integration tests for OpenTelemetry tracing in router.rs -//! -//! These tests validate that the spans created for LLM requests contain -//! all expected attributes and events by checking the raw JSON payloads -//! sent to the mock OTEL collector. -//! -//! ## Test Design -//! Each test creates its own MockOtelCollector and TraceCollector: -//! 1. Start MockOtelCollector on random port -//! 2. Create TraceCollector with 500ms flush interval -//! 3. Record spans using TraceCollector -//! 4. Flush and wait (500ms + 200ms buffer = 700ms total) for spans to arrive -//! 5. Get raw JSON payloads (GET /v1/traces) and validate structure -//! 6. Test cleanup happens automatically when collectors are dropped -//! -//! ## Serial Execution -//! Tests use the `#[serial]` attribute to run sequentially because they -//! use global environment variables (OTEL_COLLECTOR_URL, OTEL_TRACING_ENABLED, -//! TRACE_FLUSH_INTERVAL_MS). This ensures test isolation without requiring -//! the `--test-threads=1` command line flag. - -const FLUSH_INTERVAL_MS: u64 = 50; -const FLUSH_BUFFER_MS: u64 = 50; -const TOTAL_WAIT_MS: u64 = FLUSH_INTERVAL_MS + FLUSH_BUFFER_MS; - -use crate::traces::{SpanBuilder, SpanKind, TraceCollector}; -use serde_json::Value; -use serial_test::serial; -use std::sync::Arc; - -use super::MockOtelCollector; - -/// Helper to extract all spans from OTLP JSON payloads -fn extract_spans(payloads: &[Value]) -> Vec<&Value> { - let mut spans = Vec::new(); - for payload in payloads { - if let Some(resource_spans) = payload.get("resourceSpans").and_then(|v| v.as_array()) { - for resource_span in resource_spans { - if let Some(scope_spans) = - resource_span.get("scopeSpans").and_then(|v| v.as_array()) - { - for scope_span in scope_spans { - if let Some(span_list) = scope_span.get("spans").and_then(|v| v.as_array()) - { - spans.extend(span_list.iter()); - } - } - } - } - } - } - spans -} - -/// Helper to get string attribute value from a span -fn get_string_attr<'a>(span: &'a Value, key: &str) -> Option<&'a str> { - span.get("attributes") - .and_then(|attrs| attrs.as_array()) - .and_then(|attrs| { - attrs - .iter() - .find(|attr| attr.get("key").and_then(|k| k.as_str()) == Some(key)) - }) - .and_then(|attr| attr.get("value")) - .and_then(|v| v.get("stringValue")) - .and_then(|v| v.as_str()) -} - -#[tokio::test] -#[serial] -async fn test_llm_span_contains_basic_attributes() { - // Start mock OTEL collector - let mock_collector = MockOtelCollector::start().await; - - // Create TraceCollector pointing to mock with 500ms flush intervalc - std::env::set_var( - "OTEL_COLLECTOR_URL", - format!("{}/v1/traces", mock_collector.address()), - ); - std::env::set_var("OTEL_TRACING_ENABLED", "true"); - std::env::set_var("TRACE_FLUSH_INTERVAL_MS", "500"); - let trace_collector = Arc::new(TraceCollector::new(Some(true))); - - // Create a test span simulating router.rs behavior - let span = SpanBuilder::new("POST /v1/chat/completions >> /v1/chat/completions") - .with_kind(SpanKind::Client) - .with_trace_id("test-trace-123") - .with_attribute("http.method", "POST") - .with_attribute("http.target", "/v1/chat/completions") - .with_attribute("http.upstream_target", "/v1/chat/completions") - .with_attribute("llm.model", "gpt-4o") - .with_attribute("llm.provider", "openai") - .with_attribute("llm.is_streaming", "true") - .with_attribute("llm.temperature", "0.7") - .build(); - - trace_collector.record_span("archgw(llm)", span); - - // Flush and wait for spans to arrive (500ms flush interval + 200ms buffer) - trace_collector.flush().await.expect("Failed to flush"); - tokio::time::sleep(tokio::time::Duration::from_millis(TOTAL_WAIT_MS)).await; - - let payloads = mock_collector.get_traces().await; - let spans = extract_spans(&payloads); - - assert_eq!(spans.len(), 1, "Expected exactly one span"); - - let span = spans[0]; - // Validate HTTP attributes - assert_eq!(get_string_attr(span, "http.method"), Some("POST")); - assert_eq!( - get_string_attr(span, "http.target"), - Some("/v1/chat/completions") - ); - - // Validate LLM attributes - assert_eq!(get_string_attr(span, "llm.model"), Some("gpt-4o")); - assert_eq!(get_string_attr(span, "llm.provider"), Some("openai")); - assert_eq!(get_string_attr(span, "llm.is_streaming"), Some("true")); - assert_eq!(get_string_attr(span, "llm.temperature"), Some("0.7")); -} - -#[tokio::test] -#[serial] -async fn test_llm_span_contains_tool_information() { - let mock_collector = MockOtelCollector::start().await; - std::env::set_var( - "OTEL_COLLECTOR_URL", - format!("{}/v1/traces", mock_collector.address()), - ); - std::env::set_var("OTEL_TRACING_ENABLED", "true"); - std::env::set_var("TRACE_FLUSH_INTERVAL_MS", "500"); - let trace_collector = Arc::new(TraceCollector::new(Some(true))); - - let tools_formatted = "get_weather(...)\nsearch_web(...)\ncalculate(...)"; - - let span = SpanBuilder::new("POST /v1/chat/completions") - .with_trace_id("test-trace-tools") - .with_attribute("llm.request.tools", tools_formatted) - .with_attribute("llm.model", "gpt-4o") - .build(); - - trace_collector.record_span("archgw(llm)", span); - trace_collector.flush().await.expect("Failed to flush"); - tokio::time::sleep(tokio::time::Duration::from_millis(TOTAL_WAIT_MS)).await; - - let payloads = mock_collector.get_traces().await; - let spans = extract_spans(&payloads); - - assert!(!spans.is_empty(), "No spans captured"); - - let span = spans[0]; - let tools = get_string_attr(span, "llm.request.tools"); - - assert!(tools.is_some(), "Tools attribute missing"); - assert!(tools.unwrap().contains("get_weather(...)")); - assert!(tools.unwrap().contains("search_web(...)")); - assert!(tools.unwrap().contains("calculate(...)")); - assert!( - tools.unwrap().contains('\n'), - "Tools should be newline-separated" - ); -} - -#[tokio::test] -#[serial] -async fn test_llm_span_contains_user_message_preview() { - let mock_collector = MockOtelCollector::start().await; - std::env::set_var( - "OTEL_COLLECTOR_URL", - format!("{}/v1/traces", mock_collector.address()), - ); - std::env::set_var("OTEL_TRACING_ENABLED", "true"); - std::env::set_var("TRACE_FLUSH_INTERVAL_MS", "500"); - let trace_collector = Arc::new(TraceCollector::new(Some(true))); - - let long_message = - "This is a very long user message that should be truncated to 50 characters in the span"; - let preview = if long_message.len() > 50 { - format!("{}...", &long_message[..50]) - } else { - long_message.to_string() - }; - - let span = SpanBuilder::new("POST /v1/messages") - .with_trace_id("test-trace-preview") - .with_attribute("llm.request.user_message_preview", &preview) - .build(); - - trace_collector.record_span("archgw(llm)", span); - trace_collector.flush().await.expect("Failed to flush"); - tokio::time::sleep(tokio::time::Duration::from_millis(TOTAL_WAIT_MS)).await; - - let payloads = mock_collector.get_traces().await; - let spans = extract_spans(&payloads); - let span = spans[0]; - - let message_preview = get_string_attr(span, "llm.request.user_message_preview"); - - assert!(message_preview.is_some()); - assert!(message_preview.unwrap().len() <= 53); // 50 chars + "..." - assert!(message_preview.unwrap().contains("...")); -} - -#[tokio::test] -#[serial] -async fn test_llm_span_contains_time_to_first_token() { - let mock_collector = MockOtelCollector::start().await; - std::env::set_var( - "OTEL_COLLECTOR_URL", - format!("{}/v1/traces", mock_collector.address()), - ); - std::env::set_var("OTEL_TRACING_ENABLED", "true"); - std::env::set_var("TRACE_FLUSH_INTERVAL_MS", "500"); - let trace_collector = Arc::new(TraceCollector::new(Some(true))); - - let ttft_ms = "245"; // milliseconds as string - - let span = SpanBuilder::new("POST /v1/chat/completions") - .with_trace_id("test-trace-ttft") - .with_attribute("llm.is_streaming", "true") - .with_attribute("llm.time_to_first_token_ms", ttft_ms) - .build(); - - trace_collector.record_span("archgw(llm)", span); - trace_collector.flush().await.expect("Failed to flush"); - tokio::time::sleep(tokio::time::Duration::from_millis(TOTAL_WAIT_MS)).await; - - let payloads = mock_collector.get_traces().await; - let spans = extract_spans(&payloads); - let span = spans[0]; - - // Check TTFT attribute - let ttft_attr = get_string_attr(span, "llm.time_to_first_token_ms"); - assert_eq!(ttft_attr, Some("245")); -} - -#[tokio::test] -#[serial] -async fn test_llm_span_contains_upstream_path() { - let mock_collector = MockOtelCollector::start().await; - std::env::set_var( - "OTEL_COLLECTOR_URL", - format!("{}/v1/traces", mock_collector.address()), - ); - std::env::set_var("OTEL_TRACING_ENABLED", "true"); - std::env::set_var("TRACE_FLUSH_INTERVAL_MS", "500"); - let trace_collector = Arc::new(TraceCollector::new(Some(true))); - - // Test Zhipu provider with path transformation - let span = SpanBuilder::new("POST /v1/chat/completions >> /api/paas/v4/chat/completions") - .with_trace_id("test-trace-upstream") - .with_attribute("http.upstream_target", "/api/paas/v4/chat/completions") - .with_attribute("llm.provider", "zhipu") - .with_attribute("llm.model", "glm-4") - .build(); - - trace_collector.record_span("archgw(llm)", span); - trace_collector.flush().await.expect("Failed to flush"); - tokio::time::sleep(tokio::time::Duration::from_millis(TOTAL_WAIT_MS)).await; - - let payloads = mock_collector.get_traces().await; - let spans = extract_spans(&payloads); - let span = spans[0]; - - // Operation name should show the transformation - let name = span.get("name").and_then(|v| v.as_str()); - assert!(name.is_some()); - assert!( - name.unwrap().contains(">>"), - "Operation name should show path transformation" - ); - - // Check upstream target attribute - let upstream = get_string_attr(span, "http.upstream_target"); - assert_eq!(upstream, Some("/api/paas/v4/chat/completions")); -} - -#[tokio::test] -#[serial] -async fn test_llm_span_multiple_services() { - let mock_collector = MockOtelCollector::start().await; - std::env::set_var( - "OTEL_COLLECTOR_URL", - format!("{}/v1/traces", mock_collector.address()), - ); - std::env::set_var("OTEL_TRACING_ENABLED", "true"); - std::env::set_var("TRACE_FLUSH_INTERVAL_MS", "500"); - let trace_collector = Arc::new(TraceCollector::new(Some(true))); - - // Create spans for different services - let llm_span = SpanBuilder::new("LLM Request") - .with_trace_id("test-multi") - .with_attribute("service", "llm") - .build(); - - let routing_span = SpanBuilder::new("Routing Decision") - .with_trace_id("test-multi") - .with_attribute("service", "routing") - .build(); - - trace_collector.record_span("archgw(llm)", llm_span); - trace_collector.record_span("archgw(routing)", routing_span); - trace_collector.flush().await.expect("Failed to flush"); - tokio::time::sleep(tokio::time::Duration::from_millis(TOTAL_WAIT_MS)).await; - - let payloads = mock_collector.get_traces().await; - let all_spans = extract_spans(&payloads); - - assert_eq!(all_spans.len(), 2, "Should have captured both spans"); -} - -#[tokio::test] -#[serial] -async fn test_tracing_disabled_produces_no_spans() { - let mock_collector = MockOtelCollector::start().await; - - // Create TraceCollector with tracing DISABLED - std::env::set_var( - "OTEL_COLLECTOR_URL", - format!("{}/v1/traces", mock_collector.address()), - ); - std::env::set_var("OTEL_TRACING_ENABLED", "false"); - std::env::set_var("TRACE_FLUSH_INTERVAL_MS", "500"); - let trace_collector = Arc::new(TraceCollector::new(Some(false))); - - let span = SpanBuilder::new("Test Span") - .with_trace_id("test-disabled") - .build(); - - trace_collector.record_span("archgw(llm)", span); - trace_collector.flush().await.ok(); // Should be no-op when disabled - tokio::time::sleep(tokio::time::Duration::from_millis(TOTAL_WAIT_MS)).await; - - let payloads = mock_collector.get_traces().await; - let all_spans = extract_spans(&payloads); - assert_eq!( - all_spans.len(), - 0, - "No spans should be captured when tracing is disabled" - ); -} diff --git a/crates/llm_gateway/src/filter_context.rs b/crates/llm_gateway/src/filter_context.rs index 3a1f7b84..4bcd9955 100644 --- a/crates/llm_gateway/src/filter_context.rs +++ b/crates/llm_gateway/src/filter_context.rs @@ -74,7 +74,7 @@ impl RootContext for FilterContext { fn create_http_context(&self, context_id: u32) -> Option> { trace!( - "||| create_http_context called with context_id: {:?} |||", + "create_http_context called with context_id: {:?}", context_id ); @@ -107,10 +107,7 @@ impl Context for FilterContext { _body_size: usize, _num_trailers: usize, ) { - trace!( - "||| on_http_call_response called with token_id: {:?} |||", - token_id - ); + trace!("on_http_call_response called with token_id: {:?}", token_id); let _callout_data = self .callouts @@ -119,7 +116,7 @@ impl Context for FilterContext { .expect("invalid token_id"); if let Some(status) = self.get_http_call_response_header(":status") { - trace!("trace response status: {:?}", status); + trace!("response status: {:?}", status); }; } } diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index 0543d3d1..d3116233 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -97,7 +97,7 @@ impl StreamContext { .as_ref() .filter(|id| !id.is_empty()) .cloned() - .unwrap_or_else(|| "NO_REQUEST_ID".to_string()) + .unwrap_or_else(|| "no_request_id".to_string()) } fn llm_provider(&self) -> &LlmProvider { self.llm_provider @@ -145,14 +145,14 @@ impl StreamContext { match self.llm_providers.default() { Some(default_provider) => { info!( - "[PLANO_REQ_ID:{}] Provider selection failed, using default provider", + "request_id={}: provider selection failed, using default provider", self.request_identifier() ); default_provider } None => { error!( - "[PLANO_REQ_ID:{}] PROVIDER_SELECTION_FAILED: Error='{}' and no default provider configured", + "request_id={}: provider selection failed, error='{}' and no default provider configured", self.request_identifier(), err ); @@ -165,7 +165,7 @@ impl StreamContext { self.llm_provider = Some(provider); info!( - "[PLANO_REQ_ID:{}] PROVIDER_SELECTION: Hint='{}' -> Selected='{}'", + "request_id={}: provider selected, hint='{}' selected='{}'", self.request_identifier(), self.get_http_request_header(ARCH_PROVIDER_HINT_HEADER) .unwrap_or("none".to_string()), @@ -180,12 +180,12 @@ impl StreamContext { // Check if client provided an Authorization header if self.get_http_request_header("Authorization").is_none() { warn!( - "[PLANO_REQ_ID:{}] AUTH_PASSTHROUGH: passthrough_auth enabled but no Authorization header present in client request", + "request_id={}: passthrough_auth enabled but no authorization header present in client request", self.request_identifier() ); } else { debug!( - "[PLANO_REQ_ID:{}] AUTH_PASSTHROUGH: preserving client Authorization header for provider '{}'", + "request_id={}: preserving client authorization header for provider '{}'", self.request_identifier(), self.llm_provider().name ); @@ -249,7 +249,11 @@ impl StreamContext { } fn send_server_error(&self, error: ServerError, override_status_code: Option) { - warn!("server error occurred: {}", error); + warn!( + "request_id={}: server error occurred: {}", + self.request_identifier(), + error + ); self.send_http_response( override_status_code .unwrap_or(StatusCode::INTERNAL_SERVER_ERROR) @@ -269,7 +273,7 @@ impl StreamContext { let token_count = tokenizer::token_count(model, json_string).unwrap_or(0); debug!( - "[PLANO_REQ_ID:{}] TOKEN_COUNT: model='{}' input_tokens={}", + "request_id={}: token count, model='{}' input_tokens={}", self.request_identifier(), model, token_count @@ -283,7 +287,7 @@ impl StreamContext { // Check if rate limiting needs to be applied. if let Some(selector) = self.ratelimit_selector.take() { info!( - "[PLANO_REQ_ID:{}] RATELIMIT_CHECK: model='{}' selector='{}:{}'", + "request_id={}: ratelimit check, model='{}' selector='{}:{}'", self.request_identifier(), model, selector.key, @@ -296,7 +300,7 @@ impl StreamContext { )?; } else { debug!( - "[PLANO_REQ_ID:{}] RATELIMIT_SKIP: model='{}' (no selector)", + "request_id={}: ratelimit skip, model='{}' (no selector)", self.request_identifier(), model ); @@ -315,7 +319,7 @@ impl StreamContext { Ok(duration) => { let duration_ms = duration.as_millis(); info!( - "[PLANO_REQ_ID:{}] TIME_TO_FIRST_TOKEN: {}ms", + "request_id={}: time to first token {}ms", self.request_identifier(), duration_ms ); @@ -324,7 +328,7 @@ impl StreamContext { } Err(e) => { warn!( - "[PLANO_REQ_ID:{}] TIME_MEASUREMENT_ERROR: {:?}", + "request_id={}: time measurement error: {:?}", self.request_identifier(), e ); @@ -340,7 +344,7 @@ impl StreamContext { // Convert the duration to milliseconds let duration_ms = duration.as_millis(); info!( - "[PLANO_REQ_ID:{}] REQUEST_COMPLETE: latency={}ms tokens={}", + "request_id={}: request complete, latency={}ms tokens={}", self.request_identifier(), duration_ms, self.response_tokens @@ -356,7 +360,7 @@ impl StreamContext { self.metrics.time_per_output_token.record(tpot); info!( - "[PLANO_REQ_ID:{}] TOKEN_THROUGHPUT: time_per_token={}ms tokens_per_second={}", + "request_id={}: token throughput, time_per_token={}ms tokens_per_second={}", self.request_identifier(), tpot, 1000 / tpot @@ -366,7 +370,11 @@ impl StreamContext { } } Err(e) => { - warn!("SystemTime error: {:?}", e); + warn!( + "request_id={}: system time error: {:?}", + self.request_identifier(), + e + ); } } // Record the output sequence length @@ -379,7 +387,7 @@ impl StreamContext { if self.streaming_response { let chunk_size = body_size; debug!( - "[PLANO_REQ_ID:{}] UPSTREAM_RESPONSE_CHUNK: streaming=true chunk_size={}", + "request_id={}: upstream response chunk, streaming=true chunk_size={}", self.request_identifier(), chunk_size ); @@ -387,7 +395,7 @@ impl StreamContext { Some(chunk) => chunk, None => { warn!( - "[PLANO_REQ_ID:{}] UPSTREAM_RESPONSE_ERROR: empty chunk, size={}", + "request_id={}: upstream response error, empty chunk size={}", self.request_identifier(), chunk_size ); @@ -397,7 +405,7 @@ impl StreamContext { if streaming_chunk.len() != chunk_size { warn!( - "[PLANO_REQ_ID:{}] UPSTREAM_RESPONSE_MISMATCH: expected={} actual={}", + "request_id={}: upstream response size mismatch, expected={} actual={}", self.request_identifier(), chunk_size, streaming_chunk.len() @@ -409,14 +417,17 @@ impl StreamContext { return Err(Action::Continue); } debug!( - "[PLANO_REQ_ID:{}] UPSTREAM_RESPONSE_COMPLETE: streaming=false body_size={}", + "request_id={}: upstream response complete, streaming=false body_size={}", self.request_identifier(), body_size ); match self.get_http_response_body(0, body_size) { Some(body) => Ok(body), None => { - warn!("non streaming response body empty"); + warn!( + "request_id={}: non streaming response body empty", + self.request_identifier() + ); Err(Action::Continue) } } @@ -429,7 +440,7 @@ impl StreamContext { provider_id: ProviderId, ) -> Result, Action> { debug!( - "[PLANO_REQ_ID:{}] STREAMING_PROCESS: client={:?} provider_id={:?} chunk_size={}", + "request_id={}: streaming process, client={:?} provider_id={:?} chunk_size={}", self.request_identifier(), self.client_api, provider_id, @@ -460,7 +471,11 @@ impl StreamContext { { Ok(buffer) => Some(buffer), Err(e) => { - warn!("Failed to create SSE buffer: {}", e); + warn!( + "request_id={}: failed to create sse buffer: {}", + self.request_identifier(), + e + ); return Err(Action::Continue); } }; @@ -477,7 +492,7 @@ impl StreamContext { Ok(events) => { if has_buffered { debug!( - "[PLANO_REQ_ID:{}] SSE_INCOMPLETE_BUFFERED: {} bytes buffered for next chunk", + "request_id={}: sse incomplete buffered, {} bytes buffered for next chunk", self.request_identifier(), buffered_size ); @@ -486,7 +501,7 @@ impl StreamContext { } Err(e) => { warn!( - "[PLANO_REQ_ID:{}] SSE_CHUNK_PROCESS_ERROR: {}", + "request_id={}: sse chunk process error: {}", self.request_identifier(), e ); @@ -495,7 +510,10 @@ impl StreamContext { } } None => { - warn!("SSE chunk processor unexpectedly missing"); + warn!( + "request_id={}: sse chunk processor unexpectedly missing", + self.request_identifier() + ); return Err(Action::Continue); } }; @@ -510,7 +528,7 @@ impl StreamContext { if provider_response.is_final() { debug!( - "[PLANO_REQ_ID:{}] STREAMING_FINAL_CHUNK: total_tokens={}", + "request_id={}: streaming final chunk, total_tokens={}", self.request_identifier(), self.response_tokens ); @@ -520,7 +538,7 @@ impl StreamContext { let estimated_tokens = content.len() / 4; self.response_tokens += estimated_tokens.max(1); debug!( - "[PLANO_REQ_ID:{}] STREAMING_TOKEN_UPDATE: delta_chars={} estimated_tokens={} total_tokens={}", + "request_id={}: streaming token update, delta_chars={} estimated_tokens={} total_tokens={}", self.request_identifier(), content.len(), estimated_tokens.max(1), @@ -530,7 +548,7 @@ impl StreamContext { } Err(e) => { warn!( - "[PLANO_REQ_ID:{}] STREAMING_CHUNK_ERROR: {}", + "request_id={}: streaming chunk error: {}", self.request_identifier(), e ); @@ -552,7 +570,7 @@ impl StreamContext { if !bytes.is_empty() { let content = String::from_utf8_lossy(&bytes); debug!( - "[PLANO_REQ_ID:{}] UPSTREAM_TRANSFORMED_CLIENT_RESPONSE: size={} content={}", + "request_id={}: upstream transformed client response, size={} content={}", self.request_identifier(), bytes.len(), content @@ -561,13 +579,19 @@ impl StreamContext { Ok(bytes) } None => { - warn!("SSE buffer unexpectedly missing after initialization"); + warn!( + "request_id={}: sse buffer unexpectedly missing after initialization", + self.request_identifier() + ); Err(Action::Continue) } } } None => { - warn!("Missing client_api for non-streaming response"); + warn!( + "request_id={}: missing client_api for non-streaming response", + self.request_identifier() + ); Err(Action::Continue) } } @@ -590,7 +614,7 @@ impl StreamContext { Ok(buffer) => Some(buffer), Err(e) => { warn!( - "[PLANO_REQ_ID:{}] BEDROCK_BUFFER_INIT_ERROR: {}", + "request_id={}: bedrock buffer init error: {}", self.request_identifier(), e ); @@ -620,7 +644,7 @@ impl StreamContext { let estimated_tokens = content.len() / 4; self.response_tokens += estimated_tokens.max(1); debug!( - "[PLANO_REQ_ID:{}] BEDROCK_TOKEN_UPDATE: delta_chars={} estimated_tokens={} total_tokens={}", + "request_id={}: bedrock token update, delta_chars={} estimated_tokens={} total_tokens={}", self.request_identifier(), content.len(), estimated_tokens.max(1), @@ -638,7 +662,7 @@ impl StreamContext { } Err(e) => { warn!( - "[PLANO_REQ_ID:{}] BEDROCK_FRAME_CONVERSION_ERROR: {}", + "request_id={}: bedrock frame conversion error: {}", self.request_identifier(), e ); @@ -648,7 +672,7 @@ impl StreamContext { Some(DecodedFrame::Incomplete) => { // Incomplete frame - buffer retains partial data, wait for more bytes debug!( - "[PLANO_REQ_ID:{}] BEDROCK_INCOMPLETE_FRAME: waiting for more data", + "request_id={}: bedrock incomplete frame, waiting for more data", self.request_identifier() ); break; @@ -656,7 +680,7 @@ impl StreamContext { None => { // Decode error warn!( - "[PLANO_REQ_ID:{}] BEDROCK_DECODE_ERROR", + "request_id={}: bedrock decode error", self.request_identifier() ); return Err(Action::Continue); @@ -671,7 +695,7 @@ impl StreamContext { if !bytes.is_empty() { let content = String::from_utf8_lossy(&bytes); debug!( - "[PLANO_REQ_ID:{}] UPSTREAM_TRANSFORMED_CLIENT_RESPONSE: size={} content={}", + "request_id={}: upstream transformed client response, size={} content={}", self.request_identifier(), bytes.len(), content @@ -681,7 +705,7 @@ impl StreamContext { } None => { warn!( - "[PLANO_REQ_ID:{}] BEDROCK_BUFFER_MISSING", + "request_id={}: bedrock buffer missing", self.request_identifier() ); Err(Action::Continue) @@ -695,7 +719,7 @@ impl StreamContext { provider_id: ProviderId, ) -> Result, Action> { debug!( - "[PLANO_REQ_ID:{}] NON_STREAMING_PROCESS: provider_id={:?} body_size={}", + "request_id={}: non-streaming process, provider_id={:?} body_size={}", self.request_identifier(), provider_id, body.len() @@ -707,7 +731,7 @@ impl StreamContext { Ok(response) => response, Err(e) => { warn!( - "[PLANO_REQ_ID:{}] UPSTREAM_RESPONSE_PARSE_ERROR: {} | body: {}", + "request_id={}: upstream response parse error: {} | body: {}", self.request_identifier(), e, String::from_utf8_lossy(body) @@ -722,7 +746,7 @@ impl StreamContext { } None => { warn!( - "[PLANO_REQ_ID:{}] UPSTREAM_RESPONSE_ERROR: missing client_api", + "request_id={}: upstream response error, missing client_api", self.request_identifier() ); return Err(Action::Continue); @@ -734,7 +758,7 @@ impl StreamContext { response.extract_usage_counts() { debug!( - "[PLANO_REQ_ID:{}] RESPONSE_USAGE: prompt_tokens={} completion_tokens={} total_tokens={}", + "request_id={}: response usage, prompt_tokens={} completion_tokens={} total_tokens={}", self.request_identifier(), prompt_tokens, completion_tokens, @@ -743,7 +767,7 @@ impl StreamContext { self.response_tokens = completion_tokens; } else { warn!( - "[PLANO_REQ_ID:{}] RESPONSE_USAGE: no usage information found", + "request_id={}: response usage, no usage information found", self.request_identifier() ); } @@ -751,14 +775,18 @@ impl StreamContext { match serde_json::to_vec(&response) { Ok(bytes) => { debug!( - "[PLANO_REQ_ID:{}] CLIENT_RESPONSE_PAYLOAD: {}", + "request_id={}: client response payload: {}", self.request_identifier(), String::from_utf8_lossy(&bytes) ); Ok(bytes) } Err(e) => { - warn!("Failed to serialize normalized response: {}", e); + warn!( + "request_id={}: failed to serialize normalized response: {}", + self.request_identifier(), + e + ); self.send_server_error( ServerError::LogicError(format!("Response serialization error: {}", e)), Some(StatusCode::INTERNAL_SERVER_ERROR), @@ -819,7 +847,7 @@ impl HttpContext for StreamContext { Some(provider_id.compatible_api_for_client(api, self.streaming_response)); debug!( - "[PLANO_REQ_ID:{}] ROUTING_INFO: provider='{}' client_api={:?} resolved_api={:?} request_path='{}'", + "request_id={}: routing info, provider='{}' client_api={:?} resolved_api={:?} request_path='{}'", self.request_identifier(), provider.to_provider_id(), api, @@ -863,7 +891,7 @@ impl HttpContext for StreamContext { fn on_http_request_body(&mut self, body_size: usize, end_of_stream: bool) -> Action { debug!( - "[PLANO_REQ_ID:{}] REQUEST_BODY_CHUNK: bytes={} end_stream={}", + "request_id={}: request body chunk, bytes={} end_stream={}", self.request_identifier(), body_size, end_of_stream @@ -902,14 +930,14 @@ impl HttpContext for StreamContext { let mut deserialized_client_request: ProviderRequestType = match self.client_api.as_ref() { Some(the_client_api) => { info!( - "[PLANO_REQ_ID:{}] CLIENT_REQUEST_RECEIVED: api={:?} body_size={}", + "request_id={}: client request received, api={:?} body_size={}", self.request_identifier(), the_client_api, body_bytes.len() ); debug!( - "[PLANO_REQ_ID:{}] CLIENT_REQUEST_PAYLOAD: {}", + "request_id={}: client request payload: {}", self.request_identifier(), String::from_utf8_lossy(&body_bytes) ); @@ -918,7 +946,7 @@ impl HttpContext for StreamContext { Ok(deserialized) => deserialized, Err(e) => { warn!( - "[PLANO_REQ_ID:{}] CLIENT_REQUEST_PARSE_ERROR: {} | body: {}", + "request_id={}: client request parse error: {} | body: {}", self.request_identifier(), e, String::from_utf8_lossy(&body_bytes) @@ -953,7 +981,7 @@ impl HttpContext for StreamContext { Some(model_name) => model_name, None => { warn!( - "[PLANO_REQ_ID:{}] MODEL_RESOLUTION_ERROR: no model specified | req_model='{}' provider='{}' config_model={:?}", + "request_id={}: model resolution error, no model specified | req_model='{}' provider='{}' config_model={:?}", self.request_identifier(), model_requested, self.llm_provider().name, @@ -981,7 +1009,7 @@ impl HttpContext for StreamContext { self.user_message = deserialized_client_request.get_recent_user_message(); info!( - "[PLANO_REQ_ID:{}] MODEL_RESOLUTION: req_model='{}' -> resolved_model='{}' provider='{}' streaming={}", + "request_id={}: model resolved, req_model='{}' -> resolved_model='{}' provider='{}' streaming={}", self.request_identifier(), model_requested, resolved_model, @@ -1008,56 +1036,68 @@ impl HttpContext for StreamContext { } // Convert chat completion request to llm provider specific request using provider interface - let serialized_body_bytes_upstream = - match self.resolved_api.as_ref() { - Some(upstream) => { - info!( - "[PLANO_REQ_ID:{}] UPSTREAM_TRANSFORM: client_api={:?} -> upstream_api={:?}", - self.request_identifier(), self.client_api, upstream + let serialized_body_bytes_upstream = match self.resolved_api.as_ref() { + Some(upstream) => { + info!( + "request_id={}: upstream transform, client_api={:?} -> upstream_api={:?}", + self.request_identifier(), + self.client_api, + upstream ); - match ProviderRequestType::try_from((deserialized_client_request, upstream)) { - Ok(request) => { - debug!( - "[PLANO_REQ_ID:{}] UPSTREAM_REQUEST_PAYLOAD: {}", - self.request_identifier(), - String::from_utf8_lossy(&request.to_bytes().unwrap_or_default()) - ); + match ProviderRequestType::try_from((deserialized_client_request, upstream)) { + Ok(request) => { + debug!( + "request_id={}: upstream request payload: {}", + self.request_identifier(), + String::from_utf8_lossy(&request.to_bytes().unwrap_or_default()) + ); - match request.to_bytes() { - Ok(bytes) => bytes, - Err(e) => { - warn!("Failed to serialize request body: {}", e); - self.send_server_error( - ServerError::LogicError(format!( - "Request serialization error: {}", - e - )), - Some(StatusCode::BAD_REQUEST), - ); - return Action::Pause; - } + match request.to_bytes() { + Ok(bytes) => bytes, + Err(e) => { + warn!( + "request_id={}: failed to serialize request body: {}", + self.request_identifier(), + e + ); + self.send_server_error( + ServerError::LogicError(format!( + "Request serialization error: {}", + e + )), + Some(StatusCode::BAD_REQUEST), + ); + return Action::Pause; } } - Err(e) => { - warn!("Failed to create provider request: {}", e); - self.send_server_error( - ServerError::LogicError(format!("Provider request error: {}", e)), - Some(StatusCode::BAD_REQUEST), - ); - return Action::Pause; - } + } + Err(e) => { + warn!( + "request_id={}: failed to create provider request: {}", + self.request_identifier(), + e + ); + self.send_server_error( + ServerError::LogicError(format!("Provider request error: {}", e)), + Some(StatusCode::BAD_REQUEST), + ); + return Action::Pause; } } - None => { - warn!("No upstream API resolved"); - self.send_server_error( - ServerError::LogicError("No upstream API resolved".into()), - Some(StatusCode::BAD_REQUEST), - ); - return Action::Pause; - } - }; + } + None => { + warn!( + "request_id={}: no upstream api resolved", + self.request_identifier() + ); + self.send_server_error( + ServerError::LogicError("No upstream API resolved".into()), + Some(StatusCode::BAD_REQUEST), + ); + return Action::Pause; + } + }; self.set_http_request_body(0, body_size, &serialized_body_bytes_upstream); Action::Continue @@ -1070,7 +1110,7 @@ impl HttpContext for StreamContext { self.upstream_status_code = StatusCode::from_u16(status_code).ok(); debug!( - "[PLANO_REQ_ID:{}] UPSTREAM_RESPONSE_STATUS: {}", + "request_id={}: upstream response status: {}", self.request_identifier(), status_code ); @@ -1090,14 +1130,17 @@ impl HttpContext for StreamContext { fn on_http_response_body(&mut self, body_size: usize, end_of_stream: bool) -> Action { if self.request_body_sent_time.is_none() { - debug!("on_http_response_body: request body not sent, not doing any processing in llm filter"); + debug!( + "request_id={}: request body not sent, skipping processing in llm filter", + self.request_identifier() + ); return Action::Continue; } let current_time = get_current_time().unwrap(); if end_of_stream && body_size == 0 { debug!( - "[PLANO_REQ_ID:{}] RESPONSE_BODY_COMPLETE: total_bytes={}", + "request_id={}: response body complete, total_bytes={}", self.request_identifier(), body_size ); @@ -1109,7 +1152,7 @@ impl HttpContext for StreamContext { if let Some(status_code) = &self.upstream_status_code { if status_code.is_client_error() || status_code.is_server_error() { info!( - "[PLANO_REQ_ID:{}] UPSTREAM_ERROR_RESPONSE: status={} body_size={}", + "request_id={}: upstream error response, status={} body_size={}", self.request_identifier(), status_code.as_u16(), body_size @@ -1119,7 +1162,7 @@ impl HttpContext for StreamContext { if body_size > 0 { if let Ok(body) = self.read_raw_response_body(body_size) { debug!( - "[PLANO_REQ_ID:{}] UPSTREAM_ERROR_BODY: {}", + "request_id={}: upstream error body: {}", self.request_identifier(), String::from_utf8_lossy(&body) ); @@ -1141,7 +1184,7 @@ impl HttpContext for StreamContext { None => "None".to_string(), }; info!( - "[PLANO_REQ_ID:{}], UNSUPPORTED API: {}", + "request_id={}: unsupported api: {}", self.request_identifier(), api_info ); @@ -1155,7 +1198,7 @@ impl HttpContext for StreamContext { }; debug!( - "[PLANO_REQ_ID:{}] UPSTREAM_RAW_RESPONSE: body_size={} content={}", + "request_id={}: upstream raw response, body_size={} content={}", self.request_identifier(), body.len(), String::from_utf8_lossy(&body) diff --git a/demos/use_cases/http_filter/README.md b/demos/use_cases/http_filter/README.md index 89a5bd1a..fa683d9f 100644 --- a/demos/use_cases/http_filter/README.md +++ b/demos/use_cases/http_filter/README.md @@ -47,15 +47,15 @@ This brings up: - RAG Agent REST server on port 10505 - Plano listener on port 8001 (and gateway on 12000) - Jaeger UI for viewing traces at http://localhost:16686 -- Open WebUI at http://localhost:8080 for interactive queries +- AnythingLLM at http://localhost:3001 for interactive queries > Set `OPENAI_API_KEY` in your environment before running; `LLM_GATEWAY_ENDPOINT` defaults to `http://host.docker.internal:12000/v1`. ### 2. Test the system -**Option A: Using Open WebUI (recommended)** +**Option A: Using AnythingLLM (recommended)** -Navigate to http://localhost:8080 and send queries through the chat interface. +Navigate to http://localhost:3001 and send queries through the chat interface. **Option B: Using curl** ```bash diff --git a/demos/use_cases/http_filter/docker-compose.yaml b/demos/use_cases/http_filter/docker-compose.yaml index 469bf0d5..a5832b22 100644 --- a/demos/use_cases/http_filter/docker-compose.yaml +++ b/demos/use_cases/http_filter/docker-compose.yaml @@ -31,12 +31,17 @@ services: - "16686:16686" - "4317:4317" - "4318:4318" - open-web-ui: - image: dyrnq/open-webui:main + anythingllm: + image: mintplexlabs/anythingllm restart: always ports: - - "8080:8080" + - "3001:3001" + cap_add: + - SYS_ADMIN environment: - - DEFAULT_MODEL=gpt-4o-mini - - ENABLE_OPENAI_API=true - - OPENAI_API_BASE_URL=http://host.docker.internal:8001/v1 + - STORAGE_DIR=/app/server/storage + - LLM_PROVIDER=generic-openai + - GENERIC_OPEN_AI_BASE_PATH=http://plano:8001/v1 + - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini + - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000 + - GENERIC_OPEN_AI_API_KEY=sk-placeholder diff --git a/demos/use_cases/llm_routing/docker-compose.yaml b/demos/use_cases/llm_routing/docker-compose.yaml index 3b294076..8e4a88b3 100644 --- a/demos/use_cases/llm_routing/docker-compose.yaml +++ b/demos/use_cases/llm_routing/docker-compose.yaml @@ -1,15 +1,34 @@ services: + plano: + build: + context: ../../../ + dockerfile: Dockerfile + ports: + - "12000:12000" + - "12001:12001" + environment: + - ARCH_CONFIG_PATH=/app/arch_config.yaml + - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set} + - OTEL_TRACING_GRPC_ENDPOINT=http://host.docker.internal:4317 + volumes: + - ./config.yaml:/app/arch_config.yaml:ro + - /etc/ssl/cert.pem:/etc/ssl/cert.pem - open-web-ui: - image: dyrnq/open-webui:main + anythingllm: + image: mintplexlabs/anythingllm restart: always ports: - - "8080:8080" + - "3001:3001" + cap_add: + - SYS_ADMIN environment: - - DEFAULT_MODEL=gpt-4o-mini - - ENABLE_OPENAI_API=true - - OPENAI_API_BASE_URL=http://host.docker.internal:12000/v1 + - STORAGE_DIR=/app/server/storage + - LLM_PROVIDER=generic-openai + - GENERIC_OPEN_AI_BASE_PATH=http://plano:12000/v1 + - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini + - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000 + - GENERIC_OPEN_AI_API_KEY=sk-placeholder jaeger: build: diff --git a/demos/use_cases/mcp_filter/README.md b/demos/use_cases/mcp_filter/README.md index 89a5bd1a..fa683d9f 100644 --- a/demos/use_cases/mcp_filter/README.md +++ b/demos/use_cases/mcp_filter/README.md @@ -47,15 +47,15 @@ This brings up: - RAG Agent REST server on port 10505 - Plano listener on port 8001 (and gateway on 12000) - Jaeger UI for viewing traces at http://localhost:16686 -- Open WebUI at http://localhost:8080 for interactive queries +- AnythingLLM at http://localhost:3001 for interactive queries > Set `OPENAI_API_KEY` in your environment before running; `LLM_GATEWAY_ENDPOINT` defaults to `http://host.docker.internal:12000/v1`. ### 2. Test the system -**Option A: Using Open WebUI (recommended)** +**Option A: Using AnythingLLM (recommended)** -Navigate to http://localhost:8080 and send queries through the chat interface. +Navigate to http://localhost:3001 and send queries through the chat interface. **Option B: Using curl** ```bash diff --git a/demos/use_cases/mcp_filter/docker-compose.yaml b/demos/use_cases/mcp_filter/docker-compose.yaml index 469bf0d5..5e94f277 100644 --- a/demos/use_cases/mcp_filter/docker-compose.yaml +++ b/demos/use_cases/mcp_filter/docker-compose.yaml @@ -16,6 +16,8 @@ services: context: ../../../ dockerfile: Dockerfile ports: + - "11000:11000" + - "12001:12001" - "12000:12000" - "8001:8001" environment: @@ -31,12 +33,17 @@ services: - "16686:16686" - "4317:4317" - "4318:4318" - open-web-ui: - image: dyrnq/open-webui:main + anythingllm: + image: mintplexlabs/anythingllm restart: always ports: - - "8080:8080" + - "3001:3001" + cap_add: + - SYS_ADMIN environment: - - DEFAULT_MODEL=gpt-4o-mini - - ENABLE_OPENAI_API=true - - OPENAI_API_BASE_URL=http://host.docker.internal:8001/v1 + - STORAGE_DIR=/app/server/storage + - LLM_PROVIDER=generic-openai + - GENERIC_OPEN_AI_BASE_PATH=http://plano:8001/v1 + - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini + - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000 + - GENERIC_OPEN_AI_API_KEY=sk-placeholder diff --git a/demos/use_cases/multi_agent_with_crewai_langchain/README.md b/demos/use_cases/multi_agent_with_crewai_langchain/README.md index 82b5c9c5..e36af9a2 100644 --- a/demos/use_cases/multi_agent_with_crewai_langchain/README.md +++ b/demos/use_cases/multi_agent_with_crewai_langchain/README.md @@ -47,13 +47,13 @@ This starts: - **Plano** (ports 12000, 8001) - routing and orchestration - **CrewAI Flight Agent** (port 10520) - flight search - **LangChain Weather Agent** (port 10510) - weather forecasts -- **Open WebUI** (port 8080) - chat interface +- **AnythingLLM** (port 3001) - chat interface - **Jaeger** (port 16686) - distributed tracing ### Try It Out 1. **Open the Chat Interface** - - Navigate to [http://localhost:8080](http://localhost:8080) + - Navigate to [http://localhost:3001](http://localhost:3001) - Create an account (stored locally) 2. **Ask Multi-Agent Questions** @@ -75,9 +75,9 @@ This starts: ## Architecture ``` -┌─────────────┐ -│ Open WebUI │ (Chat Interface) -└──────┬──────┘ +┌──────────────┐ +│ AnythingLLM │ (Chat Interface) +└──────┬───────┘ │ v ┌─────────────┐ diff --git a/demos/use_cases/multi_agent_with_crewai_langchain/config.yaml b/demos/use_cases/multi_agent_with_crewai_langchain/config.yaml index b3a204f3..8b63ccea 100644 --- a/demos/use_cases/multi_agent_with_crewai_langchain/config.yaml +++ b/demos/use_cases/multi_agent_with_crewai_langchain/config.yaml @@ -55,3 +55,4 @@ listeners: tracing: random_sampling: 100 + opentracing_grpc_endpoint: http://jaeger:4317 diff --git a/demos/use_cases/multi_agent_with_crewai_langchain/docker-compose.yaml b/demos/use_cases/multi_agent_with_crewai_langchain/docker-compose.yaml index e114442f..c233699b 100644 --- a/demos/use_cases/multi_agent_with_crewai_langchain/docker-compose.yaml +++ b/demos/use_cases/multi_agent_with_crewai_langchain/docker-compose.yaml @@ -9,7 +9,8 @@ services: environment: - ARCH_CONFIG_PATH=/app/arch_config.yaml - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set} - - OTEL_TRACING_HTTP_ENDPOINT=http://host.docker.internal:4318/v1/traces + - OTEL_TRACING_GRPC_ENDPOINT=http://jaeger:4317 + - LOG_LEVEL=${LOG_LEVEL:-info} volumes: - ./config.yaml:/app/arch_config.yaml:ro - /etc/ssl/cert.pem:/etc/ssl/cert.pem @@ -36,15 +37,20 @@ services: - LLM_GATEWAY_ENDPOINT=http://plano:12000/v1 command: ["python", "-u", "langchain/weather_agent.py"] - open-web-ui: - image: dyrnq/open-webui:main + anythingllm: + image: mintplexlabs/anythingllm restart: always ports: - - "8080:8080" + - "3001:3001" + cap_add: + - SYS_ADMIN environment: - - DEFAULT_MODEL=gpt-4o-mini - - ENABLE_OPENAI_API=true - - OPENAI_API_BASE_URL=http://plano:8001/v1 + - STORAGE_DIR=/app/server/storage + - LLM_PROVIDER=generic-openai + - GENERIC_OPEN_AI_BASE_PATH=http://plano:8001/v1 + - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini + - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000 + - GENERIC_OPEN_AI_API_KEY=sk-placeholder jaeger: build: @@ -53,4 +59,3 @@ services: ports: - "16686:16686" # Jaeger UI - "4317:4317" # OTLP gRPC receiver - - "4318:4318" # OTLP HTTP receiver diff --git a/demos/use_cases/preference_based_routing/README.md b/demos/use_cases/preference_based_routing/README.md index eb785cf0..1af5ec31 100644 --- a/demos/use_cases/preference_based_routing/README.md +++ b/demos/use_cases/preference_based_routing/README.md @@ -6,7 +6,7 @@ This demo shows how you can use user preferences to route user prompts to approp Make sure your machine is up to date with [latest version of plano]([url](https://github.com/katanemo/plano/tree/main?tab=readme-ov-file#prerequisites)). And you have activated the virtual environment. -1. start the openwebui +1. start anythingllm ```bash (venv) $ cd demos/use_cases/preference_based_routing (venv) $ docker compose up -d @@ -24,7 +24,7 @@ Make sure your machine is up to date with [latest version of plano]([url](https: ... ``` -3. open openwebui http://localhost:8080/ +3. open AnythingLLM http://localhost:3001/ # Testing out preference based routing diff --git a/demos/use_cases/preference_based_routing/docker-compose.yaml b/demos/use_cases/preference_based_routing/docker-compose.yaml index 8387ccac..b4bc6a6b 100644 --- a/demos/use_cases/preference_based_routing/docker-compose.yaml +++ b/demos/use_cases/preference_based_routing/docker-compose.yaml @@ -1,14 +1,37 @@ services: - open-web-ui: - image: dyrnq/open-webui:main + plano: + build: + context: ../../../ + dockerfile: Dockerfile + ports: + - "12000:12000" + - "12001:12001" + environment: + - ARCH_CONFIG_PATH=/app/arch_config.yaml + - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set} + - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:?ANTHROPIC_API_KEY environment variable is required but not set} + - OTEL_TRACING_GRPC_ENDPOINT=http://host.docker.internal:4317 + - OTEL_TRACING_ENABLED=true + - RUST_LOG=debug + volumes: + - ./config.yaml:/app/arch_config.yaml:ro + - /etc/ssl/cert.pem:/etc/ssl/cert.pem + + anythingllm: + image: mintplexlabs/anythingllm restart: always ports: - - "8080:8080" + - "3001:3001" + cap_add: + - SYS_ADMIN environment: - - DEFAULT_MODELS=gpt-4o-mini - - ENABLE_OPENAI_API=true - - OPENAI_API_BASE_URL=http://host.docker.internal:12000/v1 + - STORAGE_DIR=/app/server/storage + - LLM_PROVIDER=generic-openai + - GENERIC_OPEN_AI_BASE_PATH=http://plano:12000/v1 + - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini + - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000 + - GENERIC_OPEN_AI_API_KEY=sk-placeholder jaeger: build: @@ -18,12 +41,12 @@ services: - "4317:4317" - "4318:4318" - prometheus: - build: - context: ../../shared/prometheus + # prometheus: + # build: + # context: ../../shared/prometheus - grafana: - build: - context: ../../shared/grafana - ports: - - "3000:3000" + # grafana: + # build: + # context: ../../shared/grafana + # ports: + # - "3000:3000" diff --git a/docs/source/resources/includes/arch_config_full_reference_rendered.yaml b/docs/source/resources/includes/arch_config_full_reference_rendered.yaml index 62e7ab96..abd909a0 100644 --- a/docs/source/resources/includes/arch_config_full_reference_rendered.yaml +++ b/docs/source/resources/includes/arch_config_full_reference_rendered.yaml @@ -130,5 +130,6 @@ prompt_targets: required: true type: int tracing: + opentracing_grpc_endpoint: http://host.docker.internal:4317 random_sampling: 100 version: v0.3.0 diff --git a/tests/e2e/docker-compose.yaml b/tests/e2e/docker-compose.yaml index 31dd28de..fa171051 100644 --- a/tests/e2e/docker-compose.yaml +++ b/tests/e2e/docker-compose.yaml @@ -16,4 +16,4 @@ services: - OPENAI_API_KEY=${OPENAI_API_KEY:?error} - MISTRAL_API_KEY=${MISTRAL_API_KEY:?error} - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:?error} - - OTEL_TRACING_HTTP_ENDPOINT=http://host.docker.internal:4318/v1/traces + - OTEL_TRACING_GRPC_ENDPOINT=http://host.docker.internal:4317 diff --git a/tests/rest/tracing.rest b/tests/rest/tracing.rest deleted file mode 100644 index e277dedd..00000000 --- a/tests/rest/tracing.rest +++ /dev/null @@ -1,31 +0,0 @@ -POST http://localhost:4318/v1/traces -Content-Type: application/json - -{ - "resourceSpans": [ - { - "resource": { - "attributes": [ - { "key": "service.name", "value": { "stringValue": "upstream-llm" } } - ] - }, - "scopeSpans": [ - { - "scope": { "name": "default", "version": "1.0", "attributes": [] }, - "spans": [ - { - "traceId": "fa8f7c410c28092faafbd7d4a2f5e742", - "spanId": "4dc43055a07410d6", - "parentSpanId": "f0acd74216a5e179", - "name": "archgw", - "startTimeUnixNano": "1731363782228270000", - "endTimeUnixNano": "1731363787843156000", - "kind": 1, - "attributes": [] - } - ] - } - ] - } - ] -}