use standard tracing and logging in brightstaff (#721)

This commit is contained in:
Adil Hafeez 2026-02-09 13:33:27 -08:00 committed by GitHub
parent 4d9ed74b68
commit 46de89590b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
55 changed files with 1494 additions and 2432 deletions

View file

@ -29,6 +29,3 @@ jobs:
- name: Run unit tests
run: cargo test --lib
- name: Run trace integration tests
run: cargo test -p common --features trace-collection traces::tests::trace_integration_test

View file

@ -40,7 +40,7 @@ COPY crates/brightstaff/src brightstaff/src
RUN find common hermesllm brightstaff -name "*.rs" -exec touch {} +
RUN cargo build --release -p brightstaff
FROM docker.io/envoyproxy/envoy:v1.36.4 AS envoy
FROM docker.io/envoyproxy/envoy:v1.37.0 AS envoy
FROM python:3.13.6-slim AS arch

View file

@ -25,8 +25,7 @@
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter",
"editor.formatOnSave": true
},
"rust-analyzer.cargo.features": ["trace-collection"]
}
},
"extensions": {
"recommendations": [

View file

@ -6,6 +6,7 @@ import yaml
from jsonschema import validate
from urllib.parse import urlparse
from copy import deepcopy
from planoai.consts import DEFAULT_OTEL_TRACING_GRPC_ENDPOINT
SUPPORTED_PROVIDERS_WITH_BASE_URL = [
@ -158,6 +159,32 @@ def validate_and_render_schema():
arch_tracing = config_yaml.get("tracing", {})
# Resolution order: config yaml > OTEL_TRACING_GRPC_ENDPOINT env var > hardcoded default
opentracing_grpc_endpoint = arch_tracing.get(
"opentracing_grpc_endpoint",
os.environ.get(
"OTEL_TRACING_GRPC_ENDPOINT", DEFAULT_OTEL_TRACING_GRPC_ENDPOINT
),
)
# resolve env vars in opentracing_grpc_endpoint if present
if opentracing_grpc_endpoint and "$" in opentracing_grpc_endpoint:
opentracing_grpc_endpoint = os.path.expandvars(opentracing_grpc_endpoint)
print(
f"Resolved opentracing_grpc_endpoint to {opentracing_grpc_endpoint} after expanding environment variables"
)
arch_tracing["opentracing_grpc_endpoint"] = opentracing_grpc_endpoint
# ensure that opentracing_grpc_endpoint is a valid URL if present and start with http and must not have any path
if opentracing_grpc_endpoint:
urlparse_result = urlparse(opentracing_grpc_endpoint)
if urlparse_result.scheme != "http":
raise Exception(
f"Invalid opentracing_grpc_endpoint {opentracing_grpc_endpoint}, scheme must be http"
)
if urlparse_result.path and urlparse_result.path != "/":
raise Exception(
f"Invalid opentracing_grpc_endpoint {opentracing_grpc_endpoint}, path must be empty"
)
llms_with_endpoint = []
llms_with_endpoint_cluster_names = set()
updated_model_providers = []

View file

@ -3,3 +3,4 @@ import os
SERVICE_NAME_ARCHGW = "plano"
PLANO_DOCKER_NAME = "plano"
PLANO_DOCKER_IMAGE = os.getenv("PLANO_DOCKER_IMAGE", "katanemo/plano:0.4.4")
DEFAULT_OTEL_TRACING_GRPC_ENDPOINT = "http://host.docker.internal:4317"

View file

@ -116,11 +116,18 @@ def stream_gateway_logs(follow, service="plano"):
def docker_validate_plano_schema(arch_config_file):
import os
env = os.environ.copy()
env.pop("PATH", None)
env_args = [item for key, value in env.items() for item in ["-e", f"{key}={value}"]]
result = subprocess.run(
[
"docker",
"run",
"--rm",
*env_args,
"-v",
f"{arch_config_file}:/app/arch_config.yaml:ro",
"--entrypoint",

View file

@ -27,6 +27,7 @@ from planoai.core import (
start_cli_agent,
)
from planoai.consts import (
DEFAULT_OTEL_TRACING_GRPC_ENDPOINT,
PLANO_DOCKER_IMAGE,
PLANO_DOCKER_NAME,
SERVICE_NAME_ARCHGW,
@ -156,7 +157,7 @@ def up(file, path, foreground):
# Set the ARCH_CONFIG_FILE environment variable
env_stage = {
"OTEL_TRACING_HTTP_ENDPOINT": "http://host.docker.internal:4318/v1/traces",
"OTEL_TRACING_GRPC_ENDPOINT": DEFAULT_OTEL_TRACING_GRPC_ENDPOINT,
}
env = os.environ.copy()
# Remove PATH variable if present

View file

@ -382,6 +382,8 @@ properties:
type: integer
trace_arch_internal:
type: boolean
opentracing_grpc_endpoint:
type: string
additionalProperties: false
mode:
type: string

View file

@ -21,4 +21,4 @@ services:
environment:
- OPENAI_API_KEY=${OPENAI_API_KEY:?error}
- MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
- OTEL_TRACING_HTTP_ENDPOINT=http://host.docker.internal:4318/v1/traces
- OTEL_TRACING_GRPC_ENDPOINT=http://host.docker.internal:4317

View file

@ -54,6 +54,7 @@ static_resources:
service_name: plano(inbound)
random_sampling:
value: {{ arch_tracing.random_sampling }}
operation: "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)%"
{% endif %}
stat_prefix: plano(inbound)
codec_type: AUTO
@ -206,7 +207,7 @@ static_resources:
- name: outbound_api_traffic
address:
socket_address:
address: 127.0.0.1
address: 0.0.0.0
port_value: 11000
traffic_direction: OUTBOUND
filter_chains:
@ -303,6 +304,7 @@ static_resources:
service_name: plano(inbound)
random_sampling:
value: {{ arch_tracing.random_sampling }}
operation: "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)%"
{% endif %}
stat_prefix: {{ listener.name | replace(" ", "_") }}_traffic
codec_type: AUTO
@ -388,21 +390,6 @@ static_resources:
- name: envoy.filters.network.http_connection_manager
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
generate_request_id: true
tracing:
provider:
name: envoy.tracers.opentelemetry
typed_config:
"@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
grpc_service:
envoy_grpc:
cluster_name: opentelemetry_collector
timeout: 0.250s
service_name: plano(outbound)
random_sampling:
value: {{ arch_tracing.random_sampling }}
{% endif %}
stat_prefix: egress_traffic
codec_type: AUTO
scheme_header_transformation:
@ -470,50 +457,6 @@ static_resources:
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
- name: otel_collector_proxy
address:
socket_address:
address: 127.0.0.1
port_value: 9903
traffic_direction: OUTBOUND
filter_chains:
- filters:
- name: envoy.filters.network.http_connection_manager
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
stat_prefix: otel_proxy
codec_type: AUTO
# access_log:
# - name: envoy.access_loggers.file
# typed_config:
# "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
# path: "/var/log/access_otel.log"
# format: |
# [%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%"
route_config:
name: otel_route
virtual_hosts:
- name: otel_backend
domains: ["*"]
routes:
- match:
prefix: "/v1/traces"
route:
cluster: opentelemetry_collector_http
timeout: 5s
retry_policy:
retry_on: "5xx,connect-failure,refused-stream,reset"
num_retries: 3
per_try_timeout: 2s
host_selection_retry_max_attempts: 5
retriable_status_codes: [500, 502, 503, 504]
http_filters:
- name: envoy.filters.http.router
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
{% endif %}
- name: egress_traffic_llm
address:
socket_address:
@ -524,6 +467,22 @@ static_resources:
- name: envoy.filters.network.http_connection_manager
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
generate_request_id: true
tracing:
provider:
name: envoy.tracers.opentelemetry
typed_config:
"@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
grpc_service:
envoy_grpc:
cluster_name: opentelemetry_collector
timeout: 0.250s
service_name: plano(outbound)
random_sampling:
value: {{ arch_tracing.random_sampling }}
operation: "%REQ(:METHOD)% %REQ(:AUTHORITY)%%REQ(X-ENVOY-ORIGINAL-PATH?:PATH)%"
{% endif %}
stat_prefix: egress_traffic
codec_type: AUTO
scheme_header_transformation:
@ -1071,35 +1030,7 @@ static_resources:
- endpoint:
address:
socket_address:
address: host.docker.internal
port_value: 4317
- name: opentelemetry_collector_http
type: STRICT_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
typed_extension_protocol_options:
load_assignment:
cluster_name: opentelemetry_collector_http
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: host.docker.internal
port_value: 4318
# Circuit breaker configuration to prevent overwhelming OTEL collector
circuit_breakers:
thresholds:
- priority: DEFAULT
max_connections: 100
max_pending_requests: 100
max_requests: 100
max_retries: 3
# Health checking and outlier detection
outlier_detection:
consecutive_5xx: 5
interval: 10s
base_ejection_time: 30s
max_ejection_percent: 50
enforcing_consecutive_5xx: 100
{% set _otel_endpoint = arch_tracing.opentracing_grpc_endpoint | default('host.docker.internal:4317') | replace("http://", "") | replace("https://", "") %}
address: {{ _otel_endpoint.split(":") | first }}
port_value: {{ _otel_endpoint.split(":") | last }}
{% endif %}

View file

@ -13,7 +13,10 @@
"env": {
"RUST_LOG": "debug",
"RUST_BACKTRACE": "1",
"ARCH_CONFIG_PATH_RENDERED": "../demos/use_cases/preference_based_routing/arch_config_rendered.yaml"
"ARCH_CONFIG_PATH_RENDERED": "../demos/use_cases/multi_agent_with_crewai_langchain/config.yaml_rendered",
// "ARCH_CONFIG_PATH_RENDERED": "../demos/use_cases/preference_based_routing/config.yaml_rendered",
"OTEL_TRACING_GRPC_ENDPOINT": "http://localhost:4317",
"OTEL_TRACING_ENABLED": "true"
},
"preLaunchTask": "rust: cargo build"
}

177
crates/Cargo.lock generated
View file

@ -1,6 +1,6 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
version = 4
[[package]]
name = "acap"
@ -195,7 +195,7 @@ dependencies = [
"serde_urlencoded",
"sync_wrapper",
"tokio",
"tower 0.5.2",
"tower",
"tower-layer",
"tower-service",
"tracing",
@ -354,7 +354,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4"
dependencies = [
"memchr",
"regex-automata 0.4.9",
"regex-automata",
"serde",
]
@ -445,7 +445,6 @@ dependencies = [
"pretty_assertions",
"proxy-wasm",
"rand 0.8.5",
"reqwest",
"serde",
"serde_json",
"serde_with",
@ -454,7 +453,6 @@ dependencies = [
"thiserror 1.0.69",
"tiktoken-rs",
"tokio",
"tracing",
"url",
"urlencoding",
]
@ -946,12 +944,6 @@ version = "0.31.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
[[package]]
name = "glob"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2"
[[package]]
name = "governor"
version = "0.6.3"
@ -1573,11 +1565,11 @@ checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
[[package]]
name = "matchers"
version = "0.1.0"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558"
checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9"
dependencies = [
"regex-automata 0.1.10",
"regex-automata",
]
[[package]]
@ -1719,12 +1711,11 @@ checksum = "38bf9645c8b145698bb0b18a4637dcacbc421ea49bef2317e4fd8065a387cf21"
[[package]]
name = "nu-ansi-term"
version = "0.46.0"
version = "0.50.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84"
checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5"
dependencies = [
"overload",
"winapi",
"windows-sys 0.59.0",
]
[[package]]
@ -1812,9 +1803,9 @@ dependencies = [
[[package]]
name = "opentelemetry"
version = "0.29.1"
version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e87237e2775f74896f9ad219d26a2081751187eb7c9f5c58dde20a23b95d16c"
checksum = "b84bcd6ae87133e903af7ef497404dda70c60d0ea14895fc8a5e6722754fc2a0"
dependencies = [
"futures-core",
"futures-sink",
@ -1826,25 +1817,23 @@ dependencies = [
[[package]]
name = "opentelemetry-http"
version = "0.29.0"
version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46d7ab32b827b5b495bd90fa95a6cb65ccc293555dcc3199ae2937d2d237c8ed"
checksum = "d7a6d09a73194e6b66df7c8f1b680f156d916a1a942abf2de06823dd02b7855d"
dependencies = [
"async-trait",
"bytes",
"http 1.3.1",
"opentelemetry",
"reqwest",
"tracing",
]
[[package]]
name = "opentelemetry-otlp"
version = "0.29.0"
version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d899720fe06916ccba71c01d04ecd77312734e2de3467fd30d9d580c8ce85656"
checksum = "7a2366db2dca4d2ad033cad11e6ee42844fd727007af5ad04a1730f4cb8163bf"
dependencies = [
"futures-core",
"http 1.3.1",
"opentelemetry",
"opentelemetry-http",
@ -1860,44 +1849,43 @@ dependencies = [
[[package]]
name = "opentelemetry-proto"
version = "0.29.0"
version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c40da242381435e18570d5b9d50aca2a4f4f4d8e146231adb4e7768023309b3"
checksum = "a7175df06de5eaee9909d4805a3d07e28bb752c34cab57fa9cff549da596b30f"
dependencies = [
"opentelemetry",
"opentelemetry_sdk",
"prost",
"tonic",
"tonic-prost",
]
[[package]]
name = "opentelemetry-stdout"
version = "0.29.0"
version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7e27d446dabd68610ef0b77d07b102ecde827a4596ea9c01a4d3811e945b286"
checksum = "bc8887887e169414f637b18751487cce4e095be787d23fad13c454e2fb1b3811"
dependencies = [
"chrono",
"futures-util",
"opentelemetry",
"opentelemetry_sdk",
]
[[package]]
name = "opentelemetry_sdk"
version = "0.29.0"
version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "afdefb21d1d47394abc1ba6c57363ab141be19e27cc70d0e422b7f303e4d290b"
checksum = "e14ae4f5991976fd48df6d843de219ca6d31b01daaab2dad5af2badeded372bd"
dependencies = [
"futures-channel",
"futures-executor",
"futures-util",
"glob",
"opentelemetry",
"percent-encoding",
"rand 0.9.2",
"serde_json",
"thiserror 2.0.12",
"tracing",
"tokio",
"tokio-stream",
]
[[package]]
@ -1906,12 +1894,6 @@ version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e"
[[package]]
name = "overload"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
[[package]]
name = "parking_lot"
version = "0.12.4"
@ -2101,9 +2083,9 @@ dependencies = [
[[package]]
name = "prost"
version = "0.13.5"
version = "0.14.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5"
checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568"
dependencies = [
"bytes",
"prost-derive",
@ -2111,9 +2093,9 @@ dependencies = [
[[package]]
name = "prost-derive"
version = "0.13.5"
version = "0.14.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d"
checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b"
dependencies = [
"anyhow",
"itertools",
@ -2298,17 +2280,8 @@ checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata 0.4.9",
"regex-syntax 0.8.5",
]
[[package]]
name = "regex-automata"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
dependencies = [
"regex-syntax 0.6.29",
"regex-automata",
"regex-syntax",
]
[[package]]
@ -2319,15 +2292,9 @@ checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax 0.8.5",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.6.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
[[package]]
name = "regex-syntax"
version = "0.8.5"
@ -2375,7 +2342,7 @@ dependencies = [
"tokio-native-tls",
"tokio-rustls 0.26.2",
"tokio-util",
"tower 0.5.2",
"tower",
"tower-http",
"tower-service",
"url",
@ -3205,9 +3172,9 @@ dependencies = [
[[package]]
name = "tonic"
version = "0.12.3"
version = "0.14.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52"
checksum = "eb7613188ce9f7df5bfe185db26c5814347d110db17920415cf2fbcad85e7203"
dependencies = [
"async-trait",
"base64 0.22.1",
@ -3220,33 +3187,24 @@ dependencies = [
"hyper-util",
"percent-encoding",
"pin-project",
"prost",
"sync_wrapper",
"tokio",
"tokio-stream",
"tower 0.4.13",
"tower",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]
name = "tower"
version = "0.4.13"
name = "tonic-prost"
version = "0.14.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c"
checksum = "66bd50ad6ce1252d87ef024b3d64fe4c3cf54a86fb9ef4c631fdd0ded7aeaa67"
dependencies = [
"futures-core",
"futures-util",
"indexmap 1.9.3",
"pin-project",
"pin-project-lite",
"rand 0.8.5",
"slab",
"tokio",
"tokio-util",
"tower-layer",
"tower-service",
"tracing",
"bytes",
"prost",
"tonic",
]
[[package]]
@ -3257,9 +3215,12 @@ checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9"
dependencies = [
"futures-core",
"futures-util",
"indexmap 2.9.0",
"pin-project-lite",
"slab",
"sync_wrapper",
"tokio",
"tokio-util",
"tower-layer",
"tower-service",
"tracing",
@ -3278,7 +3239,7 @@ dependencies = [
"http-body 1.0.1",
"iri-string",
"pin-project-lite",
"tower 0.5.2",
"tower",
"tower-layer",
"tower-service",
]
@ -3297,9 +3258,9 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"
[[package]]
name = "tracing"
version = "0.1.41"
version = "0.1.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0"
checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100"
dependencies = [
"log",
"pin-project-lite",
@ -3309,9 +3270,9 @@ dependencies = [
[[package]]
name = "tracing-attributes"
version = "0.1.28"
version = "0.1.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d"
checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
dependencies = [
"proc-macro2",
"quote",
@ -3320,9 +3281,9 @@ dependencies = [
[[package]]
name = "tracing-core"
version = "0.1.33"
version = "0.1.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c"
checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a"
dependencies = [
"once_cell",
"valuable",
@ -3341,14 +3302,12 @@ dependencies = [
[[package]]
name = "tracing-opentelemetry"
version = "0.30.0"
version = "0.32.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd8e764bd6f5813fd8bebc3117875190c5b0415be8f7f8059bffb6ecd979c444"
checksum = "1ac28f2d093c6c477eaa76b23525478f38de514fa9aeb1285738d4b97a9552fc"
dependencies = [
"js-sys",
"once_cell",
"opentelemetry",
"opentelemetry_sdk",
"smallvec",
"tracing",
"tracing-core",
@ -3359,14 +3318,14 @@ dependencies = [
[[package]]
name = "tracing-subscriber"
version = "0.3.19"
version = "0.3.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008"
checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e"
dependencies = [
"matchers",
"nu-ansi-term",
"once_cell",
"regex",
"regex-automata",
"sharded-slab",
"smallvec",
"thread_local",
@ -3684,28 +3643,6 @@ dependencies = [
"web-sys",
]
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows-core"
version = "0.61.2"

View file

@ -1,7 +1,3 @@
[workspace]
resolver = "2"
members = ["llm_gateway", "prompt_gateway", "common", "brightstaff", "hermesllm"]
[workspace.metadata.rust-analyzer]
# Enable features for better IDE support
cargo.features = ["trace-collection"]

View file

@ -8,7 +8,7 @@ async-openai = "0.30.1"
async-trait = "0.1"
bytes = "1.10.1"
chrono = "0.4"
common = { version = "0.1.0", path = "../common", features = ["trace-collection"] }
common = { version = "0.1.0", path = "../common" }
eventsource-client = "0.15.0"
eventsource-stream = "0.2.3"
flate2 = "1.0"
@ -19,11 +19,11 @@ http-body = "1.0.1"
http-body-util = "0.1.3"
hyper = { version = "1.6.0", features = ["full"] }
hyper-util = "0.1.11"
opentelemetry = "0.29.1"
opentelemetry-http = "0.29.0"
opentelemetry-otlp = {version="0.29.0", features=["trace", "tonic", "grpc-tonic"]}
opentelemetry-stdout = "0.29.0"
opentelemetry_sdk = "0.29.0"
opentelemetry = "0.31"
opentelemetry-http = "0.31"
opentelemetry-otlp = {version="0.31", features=["trace", "grpc-tonic"]}
opentelemetry-stdout = "0.31"
opentelemetry_sdk = { version = "0.31", features = ["rt-tokio"] }
pretty_assertions = "1.4.1"
rand = "0.9.2"
reqwest = { version = "0.12.15", features = ["stream"] }
@ -38,6 +38,7 @@ tokio-postgres = { version = "0.7", features = ["with-serde_json-1"] }
tokio-stream = "0.1"
time = { version = "0.3", features = ["formatting", "macros"] }
tracing = "0.1"
tracing-opentelemetry = "0.32.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
uuid = { version = "1.0", features = ["v4", "serde"] }
@ -45,5 +46,5 @@ uuid = { version = "1.0", features = ["v4", "serde"] }
mockito = "1.0"
tokio-stream = "0.1.17"
tracing = "0.1.41"
tracing-opentelemetry = "0.30.0"
tracing-opentelemetry = "0.32.1"
tracing-subscriber = { version = "0.3.19", features = ["env-filter", "fmt", "time"] }

View file

@ -1,9 +1,7 @@
use std::sync::Arc;
use std::time::{Instant, SystemTime};
use std::time::Instant;
use bytes::Bytes;
use common::consts::TRACE_PARENT_HEADER;
use common::traces::{generate_random_span_id, parse_traceparent, SpanBuilder, SpanKind};
use hermesllm::apis::OpenAIMessage;
use hermesllm::clients::SupportedAPIsFromClient;
use hermesllm::providers::request::ProviderRequest;
@ -11,14 +9,15 @@ use hermesllm::ProviderRequestType;
use http_body_util::combinators::BoxBody;
use http_body_util::BodyExt;
use hyper::{Request, Response};
use opentelemetry::trace::get_active_span;
use serde::ser::Error as SerError;
use tracing::{debug, info, warn};
use tracing::{debug, info, info_span, warn, Instrument};
use super::agent_selector::{AgentSelectionError, AgentSelector};
use super::pipeline_processor::{PipelineError, PipelineProcessor};
use super::response_handler::ResponseHandler;
use crate::router::plano_orchestrator::OrchestratorService;
use crate::tracing::{http, operation_component, OperationNameBuilder};
use crate::tracing::{operation_component, set_service_name};
/// Main errors for agent chat completions
#[derive(Debug, thiserror::Error)]
@ -41,92 +40,122 @@ pub async fn agent_chat(
_: String,
agents_list: Arc<tokio::sync::RwLock<Option<Vec<common::configuration::Agent>>>>,
listeners: Arc<tokio::sync::RwLock<Vec<common::configuration::Listener>>>,
trace_collector: Arc<common::traces::TraceCollector>,
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
match handle_agent_chat(
request,
orchestrator_service,
agents_list,
listeners,
trace_collector,
)
.await
// Extract request_id from headers or generate a new one
let request_id: String = match request
.headers()
.get(common::consts::REQUEST_ID_HEADER)
.and_then(|h| h.to_str().ok())
.map(|s| s.to_string())
{
Ok(response) => Ok(response),
Err(err) => {
// Check if this is a client error from the pipeline that should be cascaded
if let AgentFilterChainError::Pipeline(PipelineError::ClientError {
agent,
status,
body,
}) = &err
{
warn!(
"Client error from agent '{}' (HTTP {}): {}",
agent, status, body
);
Some(id) => id,
None => uuid::Uuid::new_v4().to_string(),
};
// Create error response with the original status code and body
// Create a span with request_id that will be included in all log lines
let request_span = info_span!(
"(orchestrator)",
component = "orchestrator",
request_id = %request_id,
http.method = %request.method(),
http.path = %request.uri().path()
);
// Execute the handler inside the span
async {
// Set service name for orchestrator operations
set_service_name(operation_component::ORCHESTRATOR);
match handle_agent_chat_inner(
request,
orchestrator_service,
agents_list,
listeners,
request_id,
)
.await
{
Ok(response) => Ok(response),
Err(err) => {
// Check if this is a client error from the pipeline that should be cascaded
if let AgentFilterChainError::Pipeline(PipelineError::ClientError {
agent,
status,
body,
}) = &err
{
warn!(
agent = %agent,
status = %status,
body = %body,
"client error from agent"
);
// Create error response with the original status code and body
let error_json = serde_json::json!({
"error": "ClientError",
"agent": agent,
"status": status,
"agent_response": body
});
let json_string = error_json.to_string();
let mut response =
Response::new(ResponseHandler::create_full_body(json_string));
*response.status_mut() = hyper::StatusCode::from_u16(*status)
.unwrap_or(hyper::StatusCode::BAD_REQUEST);
response.headers_mut().insert(
hyper::header::CONTENT_TYPE,
"application/json".parse().unwrap(),
);
return Ok(response);
}
// Print detailed error information with full error chain for other errors
let mut error_chain = Vec::new();
let mut current_error: &dyn std::error::Error = &err;
// Collect the full error chain
loop {
error_chain.push(current_error.to_string());
match current_error.source() {
Some(source) => current_error = source,
None => break,
}
}
// Log the complete error chain
warn!(error_chain = ?error_chain, "agent chat error chain");
warn!(root_error = ?err, "root error");
// Create structured error response as JSON
let error_json = serde_json::json!({
"error": "ClientError",
"agent": agent,
"status": status,
"agent_response": body
"error": {
"type": "AgentFilterChainError",
"message": err.to_string(),
"error_chain": error_chain,
"debug_info": format!("{:?}", err)
}
});
let json_string = error_json.to_string();
let mut response = Response::new(ResponseHandler::create_full_body(json_string));
*response.status_mut() =
hyper::StatusCode::from_u16(*status).unwrap_or(hyper::StatusCode::BAD_REQUEST);
response.headers_mut().insert(
hyper::header::CONTENT_TYPE,
"application/json".parse().unwrap(),
);
return Ok(response);
// Log the error for debugging
info!(error = %error_json, "structured error info");
// Return JSON error response
Ok(ResponseHandler::create_json_error_response(&error_json))
}
// Print detailed error information with full error chain for other errors
let mut error_chain = Vec::new();
let mut current_error: &dyn std::error::Error = &err;
// Collect the full error chain
loop {
error_chain.push(current_error.to_string());
match current_error.source() {
Some(source) => current_error = source,
None => break,
}
}
// Log the complete error chain
warn!("Agent chat error chain: {:#?}", error_chain);
warn!("Root error: {:?}", err);
// Create structured error response as JSON
let error_json = serde_json::json!({
"error": {
"type": "AgentFilterChainError",
"message": err.to_string(),
"error_chain": error_chain,
"debug_info": format!("{:?}", err)
}
});
// Log the error for debugging
info!("Structured error info: {}", error_json);
// Return JSON error response
Ok(ResponseHandler::create_json_error_response(&error_json))
}
}
.instrument(request_span)
.await
}
async fn handle_agent_chat(
async fn handle_agent_chat_inner(
request: Request<hyper::body::Incoming>,
orchestrator_service: Arc<OrchestratorService>,
agents_list: Arc<tokio::sync::RwLock<Option<Vec<common::configuration::Agent>>>>,
listeners: Arc<tokio::sync::RwLock<Vec<common::configuration::Listener>>>,
trace_collector: Arc<common::traces::TraceCollector>,
request_id: String,
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, AgentFilterChainError> {
// Initialize services
let agent_selector = AgentSelector::new(orchestrator_service);
@ -140,14 +169,18 @@ async fn handle_agent_chat(
.and_then(|name| name.to_str().ok());
// Find the appropriate listener
let listener = {
let listener: common::configuration::Listener = {
let listeners = listeners.read().await;
agent_selector
.find_listener(listener_name, &listeners)
.await?
};
info!("Handling request for listener: {}", listener.name);
get_active_span(|span| {
span.update_name(listener.name.to_string());
});
info!(listener = %listener.name, "handling request");
// Parse request body
let request_path = request
@ -162,12 +195,8 @@ async fn handle_agent_chat(
let mut headers = request.headers().clone();
headers.remove(common::consts::ENVOY_ORIGINAL_PATH_HEADER);
// Set the request_id in headers if not already present
if !headers.contains_key(common::consts::REQUEST_ID_HEADER) {
let request_id = uuid::Uuid::new_v4().to_string();
info!(
"Request id not found in headers, generated new request id: {}",
request_id
);
headers.insert(
common::consts::REQUEST_ID_HEADER,
hyper::header::HeaderValue::from_str(&request_id).unwrap(),
@ -180,8 +209,8 @@ async fn handle_agent_chat(
let chat_request_bytes = request.collect().await?.to_bytes();
debug!(
"Received request body (raw utf8): {}",
String::from_utf8_lossy(&chat_request_bytes)
body = %String::from_utf8_lossy(&chat_request_bytes),
"received request body"
);
// Determine the API type from the endpoint
@ -195,7 +224,7 @@ async fn handle_agent_chat(
let client_request = match ProviderRequestType::try_from((&chat_request_bytes[..], &api_type)) {
Ok(request) => request,
Err(err) => {
warn!("Failed to parse request as ProviderRequestType: {}", err);
warn!("failed to parse request as ProviderRequestType: {}", err);
let err_msg = format!("Failed to parse request: {}", err);
return Err(AgentFilterChainError::RequestParsing(
serde_json::Error::custom(err_msg),
@ -205,12 +234,6 @@ async fn handle_agent_chat(
let message: Vec<OpenAIMessage> = client_request.get_messages();
// Extract trace parent for routing
let traceparent = request_headers
.iter()
.find(|(key, _)| key.as_str() == TRACE_PARENT_HEADER)
.map(|(_, value)| value.to_str().unwrap_or_default().to_string());
let request_id = request_headers
.get(common::consts::REQUEST_ID_HEADER)
.and_then(|val| val.to_str().ok())
@ -223,87 +246,58 @@ async fn handle_agent_chat(
agent_selector.create_agent_map(agents)
};
// Parse trace parent to get trace_id and parent_span_id
let (trace_id, parent_span_id) = if let Some(ref tp) = traceparent {
parse_traceparent(tp)
} else {
(String::new(), None)
};
// Select appropriate agents using arch orchestrator llm model
let selection_span_id = generate_random_span_id();
let selection_start_time = SystemTime::now();
let selection_start_instant = Instant::now();
let selection_start = Instant::now();
let selected_agents = agent_selector
.select_agents(&message, &listener, traceparent.clone(), request_id.clone())
.select_agents(&message, &listener, request_id.clone())
.await?;
// Record agent selection span
let selection_end_time = SystemTime::now();
let selection_elapsed = selection_start_instant.elapsed();
let selection_operation_name = OperationNameBuilder::new()
.with_method("POST")
.with_path("/agents/select")
.with_target(&listener.name)
.build();
let mut selection_span_builder = SpanBuilder::new(&selection_operation_name)
.with_span_id(selection_span_id)
.with_kind(SpanKind::Internal)
.with_start_time(selection_start_time)
.with_end_time(selection_end_time)
.with_attribute(http::METHOD, "POST")
.with_attribute(http::TARGET, "/agents/select")
.with_attribute("selection.listener", listener.name.clone())
.with_attribute("selection.agent_count", selected_agents.len().to_string())
.with_attribute(
// Record selection attributes on the current orchestrator span
let selection_elapsed_ms = selection_start.elapsed().as_secs_f64() * 1000.0;
get_active_span(|span| {
span.set_attribute(opentelemetry::KeyValue::new(
"selection.listener",
listener.name.clone(),
));
span.set_attribute(opentelemetry::KeyValue::new(
"selection.agent_count",
selected_agents.len() as i64,
));
span.set_attribute(opentelemetry::KeyValue::new(
"selection.agents",
selected_agents
.iter()
.map(|a| a.id.as_str())
.collect::<Vec<_>>()
.join(","),
)
.with_attribute(
"duration_ms",
format!("{:.2}", selection_elapsed.as_secs_f64() * 1000.0),
);
));
span.set_attribute(opentelemetry::KeyValue::new(
"selection.determination_ms",
format!("{:.2}", selection_elapsed_ms),
));
});
if !trace_id.is_empty() {
selection_span_builder = selection_span_builder.with_trace_id(trace_id.clone());
}
if let Some(parent_id) = parent_span_id.clone() {
selection_span_builder = selection_span_builder.with_parent_span_id(parent_id);
}
let selection_span = selection_span_builder.build();
trace_collector.record_span(operation_component::ORCHESTRATOR, selection_span);
info!("Selected {} agent(s) for execution", selected_agents.len());
info!(
count = selected_agents.len(),
"selected agents for execution"
);
// Execute agents sequentially, passing output from one to the next
let mut current_messages = message.clone();
let agent_count = selected_agents.len();
for (agent_index, selected_agent) in selected_agents.iter().enumerate() {
// Get agent name
let agent_name = selected_agent.id.clone();
let is_last_agent = agent_index == agent_count - 1;
debug!(
"Processing agent {}/{}: {}",
agent_index + 1,
agent_count,
selected_agent.id
agent_index = agent_index + 1,
total = agent_count,
agent = %agent_name,
"processing agent"
);
// Record the start time for agent span
let agent_start_time = SystemTime::now();
let agent_start_instant = Instant::now();
let span_id = generate_random_span_id();
// Get agent name
let agent_name = selected_agent.id.clone();
// Process the filter chain
let chat_history = pipeline_processor
.process_filter_chain(
@ -311,88 +305,71 @@ async fn handle_agent_chat(
selected_agent,
&agent_map,
&request_headers,
Some(&trace_collector),
trace_id.clone(),
span_id.clone(),
)
.await?;
// Get agent details and invoke
let agent = agent_map.get(&agent_name).unwrap();
debug!("Invoking agent: {}", agent_name);
debug!(agent = %agent_name, "invoking agent");
let llm_response = pipeline_processor
.invoke_agent(
&chat_history,
client_request.clone(),
agent,
&request_headers,
trace_id.clone(),
span_id.clone(),
)
.await?;
let agent_span = info_span!(
"agent",
agent_id = %agent_name,
message_count = chat_history.len(),
);
// Record agent span
let agent_end_time = SystemTime::now();
let agent_elapsed = agent_start_instant.elapsed();
let full_path = format!("/agents{}", request_path);
let operation_name = OperationNameBuilder::new()
.with_method("POST")
.with_path(&full_path)
.with_target(&agent_name)
.build();
let llm_response = async {
set_service_name(operation_component::AGENT);
get_active_span(|span| {
span.update_name(format!("{} /v1/chat/completions", agent_name));
});
let mut span_builder = SpanBuilder::new(&operation_name)
.with_span_id(span_id)
.with_kind(SpanKind::Internal)
.with_start_time(agent_start_time)
.with_end_time(agent_end_time)
.with_attribute(http::METHOD, "POST")
.with_attribute(http::TARGET, full_path)
.with_attribute("agent.name", agent_name.clone())
.with_attribute(
"agent.sequence",
format!("{}/{}", agent_index + 1, agent_count),
)
.with_attribute(
"duration_ms",
format!("{:.2}", agent_elapsed.as_secs_f64() * 1000.0),
);
if !trace_id.is_empty() {
span_builder = span_builder.with_trace_id(trace_id.clone());
pipeline_processor
.invoke_agent(
&chat_history,
client_request.clone(),
agent,
&request_headers,
)
.await
}
if let Some(parent_id) = parent_span_id.clone() {
span_builder = span_builder.with_parent_span_id(parent_id);
}
let span = span_builder.build();
trace_collector.record_span(operation_component::AGENT, span);
.instrument(agent_span.clone())
.await?;
// If this is the last agent, return the streaming response
if is_last_agent {
info!(
"Completed agent chain, returning response from last agent: {}",
agent_name
agent = %agent_name,
"completed agent chain, returning response"
);
return response_handler
.create_streaming_response(llm_response)
.await
.map_err(AgentFilterChainError::from);
// Capture the orchestrator span (parent of the agent span) so it
// stays open for the full streaming duration alongside the agent span.
let orchestrator_span = tracing::Span::current();
return async {
response_handler
.create_streaming_response(
llm_response,
tracing::Span::current(), // agent span (inner)
orchestrator_span, // orchestrator span (outer)
)
.await
.map_err(AgentFilterChainError::from)
}
.instrument(agent_span)
.await;
}
// For intermediate agents, collect the full response and pass to next agent
debug!(
"Collecting response from intermediate agent: {}",
agent_name
);
let response_text = response_handler.collect_full_response(llm_response).await?;
debug!(agent = %agent_name, "collecting response from intermediate agent");
let response_text = async { response_handler.collect_full_response(llm_response).await }
.instrument(agent_span)
.await?;
info!(
"Agent {} completed, passing {} character response to next agent",
agent_name,
response_text.len()
agent = %agent_name,
response_len = response_text.len(),
"agent completed, passing response to next agent"
);
// remove last message and add new one at the end

View file

@ -75,7 +75,7 @@ impl AgentSelector {
.cloned()
.or_else(|| {
warn!(
"No default agent found, routing request to first agent: {}",
"no default agent found, routing request to first agent: {}",
agents[0].id
);
Some(agents[0].clone())
@ -108,7 +108,6 @@ impl AgentSelector {
&self,
messages: &[Message],
listener: &Listener,
trace_parent: Option<String>,
request_id: Option<String>,
) -> Result<Vec<AgentFilterChain>, AgentSelectionError> {
let agents = listener
@ -118,7 +117,7 @@ impl AgentSelector {
// If only one agent, skip orchestration
if agents.len() == 1 {
debug!("Only one agent available, skipping orchestration");
debug!("only one agent available, skipping orchestration");
return Ok(vec![agents[0].clone()]);
}
@ -132,15 +131,15 @@ impl AgentSelector {
match self
.orchestrator_service
.determine_orchestration(messages, trace_parent, Some(usage_preferences), request_id)
.determine_orchestration(messages, Some(usage_preferences), request_id)
.await
{
Ok(Some(routes)) => {
debug!("Determined {} agent(s) via orchestration", routes.len());
debug!(count = routes.len(), "determined agents via orchestration");
let mut selected_agents = Vec::new();
for (route_name, agent_name) in routes {
debug!("Processing route: {}, agent: {}", route_name, agent_name);
debug!(route = %route_name, agent = %agent_name, "processing route");
let selected_agent = agents
.iter()
.find(|a| a.id == agent_name)
@ -155,14 +154,14 @@ impl AgentSelector {
}
if selected_agents.is_empty() {
debug!("No agents determined using orchestration, using default agent");
debug!("no agents determined via orchestration, using default");
Ok(vec![self.get_default_agent(agents, &listener.name)?])
} else {
Ok(selected_agents)
}
}
Ok(None) => {
debug!("No agents determined using orchestration, using default agent");
debug!("no agents determined using orchestration, using default agent");
Ok(vec![self.get_default_agent(agents, &listener.name)?])
}
Err(err) => Err(AgentSelectionError::OrchestrationError(err.to_string())),

View file

@ -944,7 +944,7 @@ impl ArchFunctionHandler {
) -> Result<ChatCompletionsResponse> {
use tracing::{error, info};
info!("[Arch-Function] - ChatCompletion");
info!("processing chat completion request");
let messages = self.process_messages(
&request.messages,
@ -955,9 +955,9 @@ impl ArchFunctionHandler {
)?;
info!(
"[request to arch-fc]: model: {}, messages count: {}",
self.model_name,
messages.len()
model = %self.model_name,
message_count = messages.len(),
"sending request to arch-fc"
);
let use_agent_orchestrator = request
@ -991,7 +991,7 @@ impl ArchFunctionHandler {
}
}
}
info!("[Agent Orchestrator]: response received");
info!("agent orchestrator response received");
} else if let Some(tools) = request.tools.as_ref() {
let mut hallucination_state = HallucinationState::new(tools);
let mut has_tool_calls = None;
@ -1040,7 +1040,10 @@ impl ArchFunctionHandler {
}
if has_tool_calls == Some(true) && has_hallucination {
info!("[Hallucination]: {}", hallucination_state.error_message);
info!(
"detected hallucination: {}",
hallucination_state.error_message
);
let clarify_messages = self.prefill_message(messages.clone(), &self.clarify_prefix);
let clarify_request = self.create_request_with_extra_body(clarify_messages, false);
@ -1075,8 +1078,8 @@ impl ArchFunctionHandler {
let response_dict = self.parse_model_response(&model_response);
info!(
"[arch-fc]: raw model response: {}",
response_dict.raw_response
raw_response = %response_dict.raw_response,
"arch-fc model response"
);
// General model response (no intent matched - should route to default target)
@ -1126,7 +1129,7 @@ impl ArchFunctionHandler {
if verification.is_valid {
info!(
"[Tool calls]: {:?}",
"tool calls extracted: {:?}",
response_dict
.tool_calls
.iter()
@ -1143,7 +1146,7 @@ impl ArchFunctionHandler {
tool_calls: Some(response_dict.tool_calls.clone()),
}
} else {
error!("Invalid tool call - {}", verification.error_message);
error!(error = %verification.error_message, "invalid tool call");
ResponseMessage {
role: Role::Assistant,
content: Some(String::new()),
@ -1155,7 +1158,7 @@ impl ArchFunctionHandler {
}
}
} else {
error!("Tool calls present but no tools provided in request");
error!("tool calls present but no tools provided in request");
ResponseMessage {
role: Role::Assistant,
content: Some(String::new()),
@ -1168,7 +1171,7 @@ impl ArchFunctionHandler {
}
} else {
info!(
"[Tool calls]: {:?}",
"tool calls extracted: {:?}",
response_dict
.tool_calls
.iter()
@ -1187,8 +1190,8 @@ impl ArchFunctionHandler {
}
} else {
error!(
"Invalid tool calls in response: {}",
response_dict.error_message
error = %response_dict.error_message,
"invalid tool calls in response"
);
ResponseMessage {
role: Role::Assistant,
@ -1201,7 +1204,7 @@ impl ArchFunctionHandler {
}
}
} else {
error!("Invalid model response - {}", model_response);
error!(response = %model_response, "invalid model response");
ResponseMessage {
role: Role::Assistant,
content: Some(String::new()),
@ -1244,7 +1247,7 @@ impl ArchFunctionHandler {
metadata: Some(metadata),
};
info!("[response arch-fc]: {:?}", chat_completion_response);
info!(response = ?chat_completion_response, "arch-fc response");
Ok(chat_completion_response)
}
@ -1331,7 +1334,7 @@ pub async fn function_calling_chat_handler(
let mut body_json: Value = match serde_json::from_slice(&whole_body) {
Ok(json) => json,
Err(e) => {
error!("Failed to parse request body as JSON: {}", e);
error!(error = %e, "failed to parse request body as json");
let mut response = Response::new(full(
serde_json::json!({
"error": format!("Invalid request body: {}", e)
@ -1355,13 +1358,13 @@ pub async fn function_calling_chat_handler(
let chat_request: ChatCompletionsRequest = match serde_json::from_value(body_json) {
Ok(req) => {
info!(
"[request body]: {}",
serde_json::to_string(&req).unwrap_or_default()
request_body = %serde_json::to_string(&req).unwrap_or_default(),
"received request"
);
req
}
Err(e) => {
error!("Failed to parse request body: {}", e);
error!(error = %e, "failed to parse request body");
let mut response = Response::new(full(
serde_json::json!({
"error": format!("Invalid request body: {}", e)
@ -1384,7 +1387,10 @@ pub async fn function_calling_chat_handler(
.and_then(|v| v.as_bool())
.unwrap_or(false);
info!("Use agent orchestrator: {}", use_agent_orchestrator);
info!(
use_agent_orchestrator = use_agent_orchestrator,
"handler mode"
);
// Create the appropriate handler
let handler_name = if use_agent_orchestrator {
@ -1415,7 +1421,7 @@ pub async fn function_calling_chat_handler(
match final_response {
Ok(response_data) => {
let response_json = serde_json::to_string(&response_data).unwrap_or_else(|e| {
error!("Failed to serialize response: {}", e);
error!(error = %e, "failed to serialize response");
serde_json::json!({"error": "Failed to serialize response"}).to_string()
});
@ -1428,7 +1434,7 @@ pub async fn function_calling_chat_handler(
Ok(response)
}
Err(e) => {
error!("[{}] - Error in function calling: {}", handler_name, e);
error!(handler = handler_name, error = %e, "error in function calling");
let error_response = serde_json::json!({
"error": format!("[{}] - Error in function calling: {}", handler_name, e)

View file

@ -112,15 +112,7 @@ mod tests {
let headers = HeaderMap::new();
let result = pipeline_processor
.process_filter_chain(
&request.messages,
&test_pipeline,
&agent_map,
&headers,
None,
String::new(),
String::new(),
)
.process_filter_chain(&request.messages, &test_pipeline, &agent_map, &headers)
.await;
println!("Pipeline processing result: {:?}", result);

View file

@ -4,7 +4,6 @@ use common::consts::{
ARCH_IS_STREAMING_HEADER, ARCH_PROVIDER_HINT_HEADER, REQUEST_ID_HEADER, TRACE_PARENT_HEADER,
};
use common::llm_providers::LlmProviders;
use common::traces::TraceCollector;
use hermesllm::apis::openai_responses::InputParam;
use hermesllm::clients::{SupportedAPIsFromClient, SupportedUpstreamAPIs};
use hermesllm::{ProviderRequest, ProviderRequestType};
@ -12,10 +11,13 @@ use http_body_util::combinators::BoxBody;
use http_body_util::{BodyExt, Full};
use hyper::header::{self};
use hyper::{Request, Response, StatusCode};
use opentelemetry::global;
use opentelemetry::trace::get_active_span;
use opentelemetry_http::HeaderInjector;
use std::collections::HashMap;
use std::sync::Arc;
use tokio::sync::RwLock;
use tracing::{debug, info, warn};
use tracing::{debug, info, info_span, warn, Instrument};
use crate::handlers::router_chat::router_chat_get_upstream_model;
use crate::handlers::utils::{
@ -26,7 +28,7 @@ use crate::state::response_state_processor::ResponsesStateProcessor;
use crate::state::{
extract_input_items, retrieve_and_combine_input, StateStorage, StateStorageError,
};
use crate::tracing::operation_component;
use crate::tracing::{operation_component, set_service_name};
fn full<T: Into<Bytes>>(chunk: T) -> BoxBody<Bytes, hyper::Error> {
Full::new(chunk.into())
@ -40,7 +42,6 @@ pub async fn llm_chat(
full_qualified_llm_provider_url: String,
model_aliases: Arc<Option<HashMap<String, ModelAlias>>>,
llm_providers: Arc<RwLock<LlmProviders>>,
trace_collector: Arc<TraceCollector>,
state_storage: Option<Arc<dyn StateStorage>>,
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
let request_path = request.uri().path().to_string();
@ -51,16 +52,49 @@ pub async fn llm_chat(
.map(|s| s.to_string())
{
Some(id) => id,
None => {
let generated_id = uuid::Uuid::new_v4().to_string();
warn!(
"[PLANO_REQ_ID:{}] | REQUEST_ID header missing, generated new ID",
generated_id
);
generated_id
}
None => uuid::Uuid::new_v4().to_string(),
};
// Create a span with request_id that will be included in all log lines
let request_span = info_span!(
"llm",
component = "llm",
request_id = %request_id,
http.method = %request.method(),
http.path = %request_path,
);
// Execute the rest of the handler inside the span
llm_chat_inner(
request,
router_service,
full_qualified_llm_provider_url,
model_aliases,
llm_providers,
state_storage,
request_id,
request_path,
request_headers,
)
.instrument(request_span)
.await
}
#[allow(clippy::too_many_arguments)]
async fn llm_chat_inner(
request: Request<hyper::body::Incoming>,
router_service: Arc<RouterService>,
full_qualified_llm_provider_url: String,
model_aliases: Arc<Option<HashMap<String, ModelAlias>>>,
llm_providers: Arc<RwLock<LlmProviders>>,
state_storage: Option<Arc<dyn StateStorage>>,
request_id: String,
request_path: String,
mut request_headers: hyper::HeaderMap,
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
// Set service name for LLM operations
set_service_name(operation_component::LLM);
// Extract or generate traceparent - this establishes the trace context for all spans
let traceparent: String = match request_headers
.get(TRACE_PARENT_HEADER)
@ -73,20 +107,18 @@ pub async fn llm_chat(
let trace_id = Uuid::new_v4().to_string().replace("-", "");
let generated_tp = format!("00-{}-0000000000000000-01", trace_id);
warn!(
"[PLANO_REQ_ID:{}] | TRACE_PARENT header missing, generated new traceparent: {}",
request_id, generated_tp
generated_traceparent = %generated_tp,
"TRACE_PARENT header missing, generated new traceparent"
);
generated_tp
}
};
let mut request_headers = request_headers;
let chat_request_bytes = request.collect().await?.to_bytes();
debug!(
"[PLANO_REQ_ID:{}] | REQUEST_BODY (UTF8): {}",
request_id,
String::from_utf8_lossy(&chat_request_bytes)
body = %String::from_utf8_lossy(&chat_request_bytes),
"request body received"
);
let mut client_request = match ProviderRequestType::try_from((
@ -96,13 +128,10 @@ pub async fn llm_chat(
Ok(request) => request,
Err(err) => {
warn!(
"[PLANO_REQ_ID:{}] | FAILURE | Failed to parse request as ProviderRequestType: {}",
request_id, err
);
let err_msg = format!(
"[PLANO_REQ_ID:{}] | FAILURE | Failed to parse request: {}",
request_id, err
error = %err,
"failed to parse request as ProviderRequestType"
);
let err_msg = format!("Failed to parse request: {}", err);
let mut bad_request = Response::new(full(err_msg));
*bad_request.status_mut() = StatusCode::BAD_REQUEST;
return Ok(bad_request);
@ -120,18 +149,23 @@ pub async fn llm_chat(
// Model alias resolution: update model field in client_request immediately
// This ensures all downstream objects use the resolved model
let model_from_request = client_request.model().to_string();
let temperature = client_request.get_temperature();
let _temperature = client_request.get_temperature();
let is_streaming_request = client_request.is_streaming();
let resolved_model = resolve_model_alias(&model_from_request, &model_aliases);
let alias_resolved_model = resolve_model_alias(&model_from_request, &model_aliases);
// Validate that the requested model exists in configuration
// This matches the validation in llm_gateway routing.rs
if llm_providers.read().await.get(&resolved_model).is_none() {
if llm_providers
.read()
.await
.get(&alias_resolved_model)
.is_none()
{
let err_msg = format!(
"Model '{}' not found in configured providers",
resolved_model
alias_resolved_model
);
warn!("[PLANO_REQ_ID:{}] | FAILURE | {}", request_id, err_msg);
warn!(model = %alias_resolved_model, "model not found in configured providers");
let mut bad_request = Response::new(full(err_msg));
*bad_request.status_mut() = StatusCode::BAD_REQUEST;
return Ok(bad_request);
@ -139,29 +173,26 @@ pub async fn llm_chat(
// Handle provider/model slug format (e.g., "openai/gpt-4")
// Extract just the model name for upstream (providers don't understand the slug)
let model_name_only = if let Some((_, model)) = resolved_model.split_once('/') {
let model_name_only = if let Some((_, model)) = alias_resolved_model.split_once('/') {
model.to_string()
} else {
resolved_model.clone()
alias_resolved_model.clone()
};
// Extract tool names and user message preview for span attributes
let tool_names = client_request.get_tool_names();
let user_message_preview = client_request
let _tool_names = client_request.get_tool_names();
let _user_message_preview = client_request
.get_recent_user_message()
.map(|msg| truncate_message(&msg, 50));
// Extract messages for signal analysis (clone before moving client_request)
let messages_for_signals = client_request.get_messages();
let messages_for_signals = Some(client_request.get_messages());
// Set the model to just the model name (without provider prefix)
// This ensures upstream receives "gpt-4" not "openai/gpt-4"
client_request.set_model(model_name_only.clone());
if client_request.remove_metadata_key("archgw_preference_config") {
debug!(
"[PLANO_REQ_ID:{}] Removed archgw_preference_config from metadata",
request_id
);
debug!("removed archgw_preference_config from metadata");
}
// === v1/responses state management: Determine upstream API and combine input if needed ===
@ -180,9 +211,9 @@ pub async fn llm_chat(
// Get the upstream path and check if it's ResponsesAPI
let upstream_path = get_upstream_path(
&llm_providers,
&resolved_model,
&alias_resolved_model,
&request_path,
&resolved_model,
&alias_resolved_model,
is_streaming_request,
)
.await;
@ -209,14 +240,17 @@ pub async fn llm_chat(
// Update both the request and original_input_items
responses_req.input = InputParam::Items(combined_input.clone());
original_input_items = combined_input;
info!("[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Updated request with conversation history ({} items)", request_id, original_input_items.len());
info!(
items = original_input_items.len(),
"updated request with conversation history"
);
}
Err(StateStorageError::NotFound(_)) => {
// Return 409 Conflict when previous_response_id not found
warn!("[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Previous response_id not found: {}", request_id, prev_resp_id);
warn!(previous_response_id = %prev_resp_id, "previous response_id not found");
let err_msg = format!(
"[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Conversation state not found for previous_response_id: {}",
request_id, prev_resp_id
"Conversation state not found for previous_response_id: {}",
prev_resp_id
);
let mut conflict_response = Response::new(full(err_msg));
*conflict_response.status_mut() = StatusCode::CONFLICT;
@ -225,8 +259,9 @@ pub async fn llm_chat(
Err(e) => {
// Log warning but continue on other storage errors
warn!(
"[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Failed to retrieve conversation state for {}: {}",
request_id, prev_resp_id, e
previous_response_id = %prev_resp_id,
error = %e,
"failed to retrieve conversation state"
);
// Restore original_input_items since we passed ownership
original_input_items = extract_input_items(&responses_req.input);
@ -234,10 +269,7 @@ pub async fn llm_chat(
}
}
} else {
debug!(
"[PLANO_REQ_ID:{}] | BRIGHT_STAFF | Upstream supports ResponsesAPI natively.",
request_id
);
debug!("upstream supports ResponsesAPI natively");
}
}
}
@ -246,14 +278,29 @@ pub async fn llm_chat(
let client_request_bytes_for_upstream = ProviderRequestType::to_bytes(&client_request).unwrap();
// Determine routing using the dedicated router_chat module
let routing_result = match router_chat_get_upstream_model(
router_service,
client_request, // Pass the original request - router_chat will convert it
trace_collector.clone(),
&traceparent,
&request_path,
&request_id,
)
// This gets its own span for latency and error tracking
let routing_span = info_span!(
"routing",
component = "routing",
http.method = "POST",
http.target = %request_path,
model.requested = %model_from_request,
model.alias_resolved = %alias_resolved_model,
route.selected_model = tracing::field::Empty,
routing.determination_ms = tracing::field::Empty,
);
let routing_result = match async {
set_service_name(operation_component::ROUTING);
router_chat_get_upstream_model(
router_service,
client_request, // Pass the original request - router_chat will convert it
&traceparent,
&request_path,
&request_id,
)
.await
}
.instrument(routing_span)
.await
{
Ok(result) => result,
@ -267,22 +314,36 @@ pub async fn llm_chat(
// Determine final model to use
// Router returns "none" as a sentinel value when it doesn't select a specific model
let router_selected_model = routing_result.model_name;
let model_name = if router_selected_model != "none" {
let resolved_model = if router_selected_model != "none" {
// Router selected a specific model via routing preferences
router_selected_model
} else {
// Router returned "none" sentinel, use validated resolved_model from request
resolved_model.clone()
alias_resolved_model.clone()
};
let span_name = if model_from_request == resolved_model {
format!("POST {} {}", request_path, resolved_model)
} else {
format!(
"POST {} {} -> {}",
request_path, model_from_request, resolved_model
)
};
get_active_span(|span| {
span.update_name(span_name.clone());
});
debug!(
"[PLANO_REQ_ID:{}] | ARCH_ROUTER URL | {}, Provider Hint: {}, Model for upstream: {}",
request_id, full_qualified_llm_provider_url, model_name, model_name_only
url = %full_qualified_llm_provider_url,
provider_hint = %resolved_model,
upstream_model = %model_name_only,
"Routing to upstream"
);
request_headers.insert(
ARCH_PROVIDER_HINT_HEADER,
header::HeaderValue::from_str(&model_name).unwrap(),
header::HeaderValue::from_str(&resolved_model).unwrap(),
);
request_headers.insert(
@ -292,12 +353,18 @@ pub async fn llm_chat(
// remove content-length header if it exists
request_headers.remove(header::CONTENT_LENGTH);
// Inject current LLM span's trace context so upstream spans are children of plano(llm)
global::get_text_map_propagator(|propagator| {
let cx = tracing_opentelemetry::OpenTelemetrySpanExt::context(&tracing::Span::current());
propagator.inject_context(&cx, &mut HeaderInjector(&mut request_headers));
});
// Capture start time right before sending request to upstream
let request_start_time = std::time::Instant::now();
let request_start_system_time = std::time::SystemTime::now();
let _request_start_system_time = std::time::SystemTime::now();
let llm_response = match reqwest::Client::new()
.post(full_qualified_llm_provider_url)
.post(&full_qualified_llm_provider_url)
.headers(request_headers)
.body(client_request_bytes_for_upstream)
.send()
@ -324,29 +391,12 @@ pub async fn llm_chat(
// Build LLM span with actual status code using constants
let byte_stream = llm_response.bytes_stream();
// Build the LLM span (will be finalized after streaming completes)
let llm_span = build_llm_span(
&traceparent,
&request_path,
&resolved_model,
&model_name,
upstream_status.as_u16(),
is_streaming_request,
request_start_system_time,
tool_names,
user_message_preview,
temperature,
&llm_providers,
)
.await;
// Create base processor for metrics and tracing
let base_processor = ObservableStreamProcessor::new(
trace_collector,
operation_component::LLM,
llm_span,
span_name,
request_start_time,
Some(messages_for_signals),
messages_for_signals,
);
// === v1/responses state management: Wrap with ResponsesStateProcessor ===
@ -367,8 +417,8 @@ pub async fn llm_chat(
base_processor,
state_store,
original_input_items,
alias_resolved_model.clone(),
resolved_model.clone(),
model_name.clone(),
is_streaming_request,
false, // Not OpenAI upstream since should_manage_state is true
content_encoding,
@ -409,88 +459,6 @@ fn resolve_model_alias(
model_from_request.to_string()
}
/// Builds the LLM span with all required and optional attributes.
#[allow(clippy::too_many_arguments)]
async fn build_llm_span(
traceparent: &str,
request_path: &str,
resolved_model: &str,
model_name: &str,
status_code: u16,
is_streaming: bool,
start_time: std::time::SystemTime,
tool_names: Option<Vec<String>>,
user_message_preview: Option<String>,
temperature: Option<f32>,
llm_providers: &Arc<RwLock<LlmProviders>>,
) -> common::traces::Span {
use crate::tracing::{http, llm, OperationNameBuilder};
use common::traces::{parse_traceparent, SpanBuilder, SpanKind};
// Calculate the upstream path based on provider configuration
let upstream_path = get_upstream_path(
llm_providers,
model_name,
request_path,
resolved_model,
is_streaming,
)
.await;
// Build operation name showing path transformation if different
let operation_name = if request_path != upstream_path {
OperationNameBuilder::new()
.with_method("POST")
.with_path(format!("{} >> {}", request_path, upstream_path))
.with_target(resolved_model)
.build()
} else {
OperationNameBuilder::new()
.with_method("POST")
.with_path(request_path)
.with_target(resolved_model)
.build()
};
let (trace_id, parent_span_id) = parse_traceparent(traceparent);
let mut span_builder = SpanBuilder::new(&operation_name)
.with_trace_id(&trace_id)
.with_kind(SpanKind::Client)
.with_start_time(start_time)
.with_attribute(http::METHOD, "POST")
.with_attribute(http::STATUS_CODE, status_code.to_string())
.with_attribute(http::TARGET, request_path.to_string())
.with_attribute(http::UPSTREAM_TARGET, upstream_path)
.with_attribute(llm::MODEL_NAME, resolved_model.to_string())
.with_attribute(llm::IS_STREAMING, is_streaming.to_string());
// Only set parent span ID if it exists (not a root span)
if let Some(parent) = parent_span_id {
span_builder = span_builder.with_parent_span_id(&parent);
}
// Add optional attributes
if let Some(temp) = temperature {
span_builder = span_builder.with_attribute(llm::TEMPERATURE, temp.to_string());
}
if let Some(tools) = tool_names {
let formatted_tools = tools
.iter()
.map(|name| format!("{}(...)", name))
.collect::<Vec<_>>()
.join("\n");
span_builder = span_builder.with_attribute(llm::TOOLS, formatted_tools);
}
if let Some(preview) = user_message_preview {
span_builder = span_builder.with_attribute(llm::USER_MESSAGE_PREVIEW, preview);
}
span_builder.build()
}
/// Calculates the upstream path for the provider based on the model name.
/// Looks up provider configuration, gets the ProviderId and base_url_path_prefix,
/// then uses target_endpoint_for_provider to calculate the correct upstream path.

View file

@ -4,20 +4,18 @@ use common::configuration::{Agent, AgentFilterChain};
use common::consts::{
ARCH_UPSTREAM_HOST_HEADER, BRIGHT_STAFF_SERVICE_NAME, ENVOY_RETRY_HEADER, TRACE_PARENT_HEADER,
};
use common::traces::{generate_random_span_id, SpanBuilder, SpanKind};
use hermesllm::apis::openai::Message;
use hermesllm::{ProviderRequest, ProviderRequestType};
use hyper::header::HeaderMap;
use std::time::{Instant, SystemTime};
use tracing::{debug, info, warn};
use crate::tracing::operation_component::{self};
use crate::tracing::{http, OperationNameBuilder};
use opentelemetry::global;
use opentelemetry_http::HeaderInjector;
use tracing::{debug, info, instrument, warn};
use crate::handlers::jsonrpc::{
JsonRpcId, JsonRpcNotification, JsonRpcRequest, JsonRpcResponse, JSON_RPC_VERSION,
MCP_INITIALIZE, MCP_INITIALIZE_NOTIFICATION, TOOL_CALL_METHOD,
};
use crate::tracing::{operation_component, set_service_name};
use uuid::Uuid;
/// Errors that can occur during pipeline processing
@ -81,115 +79,14 @@ impl PipelineProcessor {
}
}
/// Record a span for filter execution
#[allow(clippy::too_many_arguments)]
fn record_filter_span(
&self,
collector: &std::sync::Arc<common::traces::TraceCollector>,
agent_name: &str,
tool_name: &str,
start_time: SystemTime,
end_time: SystemTime,
elapsed: std::time::Duration,
trace_id: String,
parent_span_id: String,
span_id: String,
) -> String {
// let (trace_id, parent_span_id) = self.extract_trace_context();
// Build operation name: POST /agents/* {filter_name}
// Using generic path since we don't have access to specific endpoint here
let operation_name = OperationNameBuilder::new()
.with_method("POST")
.with_path("/agents/*")
.with_target(agent_name)
.build();
let mut span_builder = SpanBuilder::new(&operation_name)
.with_span_id(span_id.clone())
.with_kind(SpanKind::Client)
.with_start_time(start_time)
.with_end_time(end_time)
.with_attribute(http::METHOD, "POST")
.with_attribute(http::TARGET, "/agents/*")
.with_attribute("filter.name", agent_name.to_string())
.with_attribute("filter.tool_name", tool_name.to_string())
.with_attribute(
"duration_ms",
format!("{:.2}", elapsed.as_secs_f64() * 1000.0),
);
if !trace_id.is_empty() {
span_builder = span_builder.with_trace_id(trace_id);
}
if !parent_span_id.is_empty() {
span_builder = span_builder.with_parent_span_id(parent_span_id);
}
let span = span_builder.build();
// Use plano(filter) as service name for filter execution spans
collector.record_span(operation_component::AGENT_FILTER, span);
span_id.clone()
}
/// Record a span for MCP protocol interactions
#[allow(clippy::too_many_arguments)]
fn record_agent_filter_span(
&self,
collector: &std::sync::Arc<common::traces::TraceCollector>,
operation: &str,
agent_id: &str,
start_time: SystemTime,
end_time: SystemTime,
elapsed: std::time::Duration,
additional_attrs: Option<HashMap<&str, String>>,
trace_id: String,
parent_span_id: String,
span_id: Option<String>,
) {
// let (trace_id, parent_span_id) = self.extract_trace_context();
// Build operation name: POST /mcp {agent_id}
let operation_name = OperationNameBuilder::new()
.with_method("POST")
.with_path("/mcp")
.with_operation(operation)
.with_target(agent_id)
.build();
let mut span_builder = SpanBuilder::new(&operation_name)
.with_span_id(span_id.unwrap_or_else(generate_random_span_id))
.with_kind(SpanKind::Client)
.with_start_time(start_time)
.with_end_time(end_time)
.with_attribute(http::METHOD, "POST")
.with_attribute(http::TARGET, format!("/mcp ({})", operation))
.with_attribute("mcp.operation", operation.to_string())
.with_attribute("mcp.agent_id", agent_id.to_string())
.with_attribute(
"duration_ms",
format!("{:.2}", elapsed.as_secs_f64() * 1000.0),
);
if let Some(attrs) = additional_attrs {
for (key, value) in attrs {
span_builder = span_builder.with_attribute(key, value);
}
}
if !trace_id.is_empty() {
span_builder = span_builder.with_trace_id(trace_id);
}
if !parent_span_id.is_empty() {
span_builder = span_builder.with_parent_span_id(parent_span_id);
}
let span = span_builder.build();
// MCP spans also use plano(filter) service name as they are part of filter operations
collector.record_span(operation_component::AGENT_FILTER, span);
}
/// Process the filter chain of agents (all except the terminal agent)
// /// Process the filter chain of agents (all except the terminal agent)
// #[instrument(
// skip(self, chat_history, agent_filter_chain, agent_map, request_headers),
// fields(
// filter_count = agent_filter_chain.filter_chain.as_ref().map(|fc| fc.len()).unwrap_or(0),
// message_count = chat_history.len()
// )
// )]
#[allow(clippy::too_many_arguments)]
pub async fn process_filter_chain(
&mut self,
@ -197,9 +94,6 @@ impl PipelineProcessor {
agent_filter_chain: &AgentFilterChain,
agent_map: &HashMap<String, Agent>,
request_headers: &HeaderMap,
trace_collector: Option<&std::sync::Arc<common::traces::TraceCollector>>,
trace_id: String,
parent_span_id: String,
) -> Result<Vec<Message>, PipelineError> {
let mut chat_history_updated = chat_history.to_vec();
@ -210,7 +104,7 @@ impl PipelineProcessor {
};
for agent_name in filter_chain {
debug!("Processing filter agent: {}", agent_name);
debug!(agent = %agent_name, "processing filter agent");
let agent = agent_map
.get(agent_name)
@ -219,68 +113,29 @@ impl PipelineProcessor {
let tool_name = agent.tool.as_deref().unwrap_or(&agent.id);
info!(
"executing filter: {}/{}, url: {}, type: {}, conversation length: {}",
agent_name,
tool_name,
agent.url,
agent.agent_type.as_deref().unwrap_or("mcp"),
chat_history.len()
agent = %agent_name,
tool = %tool_name,
url = %agent.url,
agent_type = %agent.agent_type.as_deref().unwrap_or("mcp"),
conversation_len = chat_history.len(),
"executing filter"
);
let start_time = SystemTime::now();
let start_instant = Instant::now();
// Generate filter span ID before execution so MCP spans can use it as parent
let filter_span_id = generate_random_span_id();
if agent.agent_type.as_deref().unwrap_or("mcp") == "mcp" {
chat_history_updated = self
.execute_mcp_filter(
&chat_history_updated,
agent,
request_headers,
trace_collector,
trace_id.clone(),
filter_span_id.clone(),
)
.execute_mcp_filter(&chat_history_updated, agent, request_headers)
.await?;
} else {
chat_history_updated = self
.execute_http_filter(
&chat_history_updated,
agent,
request_headers,
trace_collector,
trace_id.clone(),
filter_span_id.clone(),
)
.execute_http_filter(&chat_history_updated, agent, request_headers)
.await?;
}
let end_time = SystemTime::now();
let elapsed = start_instant.elapsed();
info!(
"Filter '{}' completed in {:.2}ms, updated conversation length: {}",
agent_name,
elapsed.as_secs_f64() * 1000.0,
chat_history_updated.len()
agent = %agent_name,
updated_len = chat_history_updated.len(),
"filter completed"
);
// Record span for this filter execution
if let Some(collector) = trace_collector {
self.record_filter_span(
collector,
agent_name,
tool_name,
start_time,
end_time,
elapsed,
trace_id.clone(),
parent_span_id.clone(),
filter_span_id,
);
}
}
Ok(chat_history_updated)
@ -292,18 +147,17 @@ impl PipelineProcessor {
request_headers: &HeaderMap,
agent_id: &str,
session_id: Option<&str>,
trace_id: String,
parent_span_id: String,
) -> Result<HeaderMap, PipelineError> {
let trace_parent = format!("00-{}-{}-01", trace_id, parent_span_id);
let mut headers = request_headers.clone();
headers.remove(hyper::header::CONTENT_LENGTH);
// Inject OpenTelemetry trace context automatically
headers.remove(TRACE_PARENT_HEADER);
headers.insert(
TRACE_PARENT_HEADER,
hyper::header::HeaderValue::from_str(&trace_parent).unwrap(),
);
global::get_text_map_propagator(|propagator| {
let cx =
tracing_opentelemetry::OpenTelemetrySpanExt::context(&tracing::Span::current());
propagator.inject_context(&cx, &mut HeaderInjector(&mut headers));
});
headers.insert(
ARCH_UPSTREAM_HOST_HEADER,
@ -348,9 +202,9 @@ impl PipelineProcessor {
// Validate SSE format: first line should be "event: message"
if lines.is_empty() || lines[0] != "event: message" {
warn!(
"Invalid SSE response format from agent {}: expected 'event: message' as first line, got: {:?}",
agent_id,
lines.first()
agent = %agent_id,
first_line = ?lines.first(),
"invalid SSE response format"
);
return Err(PipelineError::NoContentInResponse(format!(
"Invalid SSE response format from agent {}: expected 'event: message' as first line",
@ -367,9 +221,9 @@ impl PipelineProcessor {
if data_lines.len() != 1 {
warn!(
"Expected exactly one 'data:' line from agent {}, found {}",
agent_id,
data_lines.len()
agent = %agent_id,
found = data_lines.len(),
"expected exactly one 'data:' line"
);
return Err(PipelineError::NoContentInResponse(format!(
"Expected exactly one 'data:' line from agent {}, found {}",
@ -429,27 +283,34 @@ impl PipelineProcessor {
}
/// Send request to a specific agent and return the response content
#[instrument(
skip(self, messages, agent, request_headers),
fields(
agent_id = %agent.id,
filter_name = %agent.id,
message_count = messages.len()
)
)]
async fn execute_mcp_filter(
&mut self,
messages: &[Message],
agent: &Agent,
request_headers: &HeaderMap,
trace_collector: Option<&std::sync::Arc<common::traces::TraceCollector>>,
trace_id: String,
filter_span_id: String,
) -> Result<Vec<Message>, PipelineError> {
// Set service name for this filter span
set_service_name(operation_component::AGENT_FILTER);
// Update current span name to include filter name
use opentelemetry::trace::get_active_span;
get_active_span(|span| {
span.update_name(format!("execute_mcp_filter ({})", agent.id));
});
// Get or create MCP session
let mcp_session_id = if let Some(session_id) = self.agent_id_session_map.get(&agent.id) {
session_id.clone()
} else {
let session_id = self
.get_new_session_id(
&agent.id,
trace_id.clone(),
filter_span_id.clone(),
request_headers,
)
.await;
let session_id = self.get_new_session_id(&agent.id, request_headers).await;
self.agent_id_session_map
.insert(agent.id.clone(), session_id.clone());
session_id
@ -464,21 +325,9 @@ impl PipelineProcessor {
let tool_name = agent.tool.as_deref().unwrap_or(&agent.id);
let json_rpc_request = self.build_tool_call_request(tool_name, messages)?;
// Generate span ID for this MCP tool call (child of filter span)
let mcp_span_id = generate_random_span_id();
// Build headers
let agent_headers = self.build_mcp_headers(
request_headers,
&agent.id,
Some(&mcp_session_id),
trace_id.clone(),
mcp_span_id.clone(),
)?;
// Send request with tracing
let start_time = SystemTime::now();
let start_instant = Instant::now();
let agent_headers =
self.build_mcp_headers(request_headers, &agent.id, Some(&mcp_session_id))?;
let response = self
.send_mcp_request(&json_rpc_request, &agent_headers, &agent.id)
@ -486,31 +335,6 @@ impl PipelineProcessor {
let http_status = response.status();
let response_bytes = response.bytes().await?;
let end_time = SystemTime::now();
let elapsed = start_instant.elapsed();
// Record MCP tool call span
if let Some(collector) = trace_collector {
let mut attrs = HashMap::new();
attrs.insert("mcp.method", "tools/call".to_string());
attrs.insert("mcp.tool_name", tool_name.to_string());
attrs.insert("mcp.session_id", mcp_session_id.clone());
attrs.insert("http.status_code", http_status.as_u16().to_string());
self.record_agent_filter_span(
collector,
"tool_call",
&agent.id,
start_time,
end_time,
elapsed,
Some(attrs),
trace_id.clone(),
filter_span_id.clone(),
Some(mcp_span_id),
);
}
// Handle HTTP errors
if !http_status.is_success() {
let error_body = String::from_utf8_lossy(&response_bytes).to_string();
@ -611,8 +435,6 @@ impl PipelineProcessor {
&self,
agent_id: &str,
session_id: &str,
trace_id: String,
parent_span_id: String,
request_headers: &HeaderMap,
) -> Result<(), PipelineError> {
let initialized_notification = JsonRpcNotification {
@ -622,15 +444,9 @@ impl PipelineProcessor {
};
let notification_body = serde_json::to_string(&initialized_notification)?;
debug!("Sending initialized notification for agent {}", agent_id);
debug!("sending initialized notification for agent {}", agent_id);
let headers = self.build_mcp_headers(
request_headers,
agent_id,
Some(session_id),
trace_id.clone(),
parent_span_id.clone(),
)?;
let headers = self.build_mcp_headers(request_headers, agent_id, Some(session_id))?;
let response = self
.client
@ -641,31 +457,19 @@ impl PipelineProcessor {
.await?;
info!(
"Initialized notification response status: {}",
"initialized notification response status: {}",
response.status()
);
Ok(())
}
async fn get_new_session_id(
&self,
agent_id: &str,
trace_id: String,
parent_span_id: String,
request_headers: &HeaderMap,
) -> String {
info!("Initializing MCP session for agent {}", agent_id);
async fn get_new_session_id(&self, agent_id: &str, request_headers: &HeaderMap) -> String {
info!("initializing MCP session for agent {}", agent_id);
let initialize_request = self.build_initialize_request();
let headers = self
.build_mcp_headers(
request_headers,
agent_id,
None,
trace_id.clone(),
parent_span_id.clone(),
)
.build_mcp_headers(request_headers, agent_id, None)
.expect("Failed to build headers for initialization");
let response = self
@ -673,7 +477,7 @@ impl PipelineProcessor {
.await
.expect("Failed to initialize MCP session");
info!("Initialize response status: {}", response.status());
info!("initialize response status: {}", response.status());
let session_id = response
.headers()
@ -683,49 +487,54 @@ impl PipelineProcessor {
.to_string();
info!(
"Created new MCP session for agent {}: {}",
"created new MCP session for agent {}: {}",
agent_id, session_id
);
// Send initialized notification
self.send_initialized_notification(
agent_id,
&session_id,
trace_id.clone(),
parent_span_id.clone(),
&headers,
)
.await
.expect("Failed to send initialized notification");
self.send_initialized_notification(agent_id, &session_id, &headers)
.await
.expect("Failed to send initialized notification");
session_id
}
/// Execute a HTTP-based filter agent
#[instrument(
skip(self, messages, agent, request_headers),
fields(
agent_id = %agent.id,
agent_url = %agent.url,
filter_name = %agent.id,
message_count = messages.len()
)
)]
async fn execute_http_filter(
&mut self,
messages: &[Message],
agent: &Agent,
request_headers: &HeaderMap,
trace_collector: Option<&std::sync::Arc<common::traces::TraceCollector>>,
trace_id: String,
filter_span_id: String,
) -> Result<Vec<Message>, PipelineError> {
let tool_name = agent.tool.as_deref().unwrap_or(&agent.id);
// Set service name for this filter span
set_service_name(operation_component::AGENT_FILTER);
// Generate span ID for this HTTP call (child of filter span)
let http_span_id = generate_random_span_id();
// Update current span name to include filter name
use opentelemetry::trace::get_active_span;
get_active_span(|span| {
span.update_name(format!("execute_http_filter ({})", agent.id));
});
// Build headers
let trace_parent = format!("00-{}-{}-01", trace_id, http_span_id);
let mut agent_headers = request_headers.clone();
agent_headers.remove(hyper::header::CONTENT_LENGTH);
// Inject OpenTelemetry trace context automatically
agent_headers.remove(TRACE_PARENT_HEADER);
agent_headers.insert(
TRACE_PARENT_HEADER,
hyper::header::HeaderValue::from_str(&trace_parent).unwrap(),
);
global::get_text_map_propagator(|propagator| {
let cx =
tracing_opentelemetry::OpenTelemetrySpanExt::context(&tracing::Span::current());
propagator.inject_context(&cx, &mut HeaderInjector(&mut agent_headers));
});
agent_headers.insert(
ARCH_UPSTREAM_HOST_HEADER,
@ -748,10 +557,6 @@ impl PipelineProcessor {
hyper::header::HeaderValue::from_static("application/json"),
);
// Send request with tracing
let start_time = SystemTime::now();
let start_instant = Instant::now();
debug!(
"Sending HTTP request to agent {} at URL: {}",
agent.id, agent.url
@ -769,30 +574,6 @@ impl PipelineProcessor {
let http_status = response.status();
let response_bytes = response.bytes().await?;
let end_time = SystemTime::now();
let elapsed = start_instant.elapsed();
// Record HTTP call span
if let Some(collector) = trace_collector {
let mut attrs = HashMap::new();
attrs.insert("http.tool_name", tool_name.to_string());
attrs.insert("http.url", agent.url.clone());
attrs.insert("http.status_code", http_status.as_u16().to_string());
self.record_agent_filter_span(
collector,
"http_call",
&agent.id,
start_time,
end_time,
elapsed,
Some(attrs),
trace_id.clone(),
filter_span_id.clone(),
Some(http_span_id),
);
}
// Handle HTTP errors
if !http_status.is_success() {
let error_body = String::from_utf8_lossy(&response_bytes).to_string();
@ -825,34 +606,34 @@ impl PipelineProcessor {
}
/// Send request to terminal agent and return the raw response for streaming
/// Note: The caller is responsible for creating the plano(agent) span that wraps
/// both this call and the subsequent response consumption.
pub async fn invoke_agent(
&self,
messages: &[Message],
mut original_request: ProviderRequestType,
terminal_agent: &Agent,
request_headers: &HeaderMap,
trace_id: String,
agent_span_id: String,
) -> Result<reqwest::Response, PipelineError> {
// let mut request = original_request.clone();
original_request.set_messages(messages);
let request_url = "/v1/chat/completions";
let request_body = ProviderRequestType::to_bytes(&original_request).unwrap();
// let request_body = serde_json::to_string(&request)?;
debug!("Sending request to terminal agent {}", terminal_agent.id);
debug!("sending request to terminal agent {}", terminal_agent.id);
let mut agent_headers = request_headers.clone();
agent_headers.remove(hyper::header::CONTENT_LENGTH);
// Set traceparent header to make the egress span a child of the agent span
if !trace_id.is_empty() && !agent_span_id.is_empty() {
let trace_parent = format!("00-{}-{}-01", trace_id, agent_span_id);
agent_headers.remove(TRACE_PARENT_HEADER);
agent_headers.insert(
TRACE_PARENT_HEADER,
hyper::header::HeaderValue::from_str(&trace_parent).unwrap(),
);
}
// Inject OpenTelemetry trace context automatically
agent_headers.remove(TRACE_PARENT_HEADER);
global::get_text_map_propagator(|propagator| {
let cx =
tracing_opentelemetry::OpenTelemetrySpanExt::context(&tracing::Span::current());
propagator.inject_context(&cx, &mut HeaderInjector(&mut agent_headers));
});
agent_headers.insert(
ARCH_UPSTREAM_HOST_HEADER,
@ -867,7 +648,7 @@ impl PipelineProcessor {
let response = self
.client
.post(format!("{}/v1/chat/completions", self.url))
.post(format!("{}{}", self.url, request_url))
.headers(agent_headers)
.body(request_body)
.send()
@ -914,15 +695,7 @@ mod tests {
let pipeline = create_test_pipeline(vec!["nonexistent-agent", "terminal-agent"]);
let result = processor
.process_filter_chain(
&messages,
&pipeline,
&agent_map,
&request_headers,
None,
String::new(),
String::new(),
)
.process_filter_chain(&messages, &pipeline, &agent_map, &request_headers)
.await;
assert!(result.is_err());
@ -956,14 +729,7 @@ mod tests {
let request_headers = HeaderMap::new();
let result = processor
.execute_mcp_filter(
&messages,
&agent,
&request_headers,
None,
"trace-123".to_string(),
"span-123".to_string(),
)
.execute_mcp_filter(&messages, &agent, &request_headers)
.await;
match result {
@ -1002,14 +768,7 @@ mod tests {
let request_headers = HeaderMap::new();
let result = processor
.execute_mcp_filter(
&messages,
&agent,
&request_headers,
None,
"trace-456".to_string(),
"span-456".to_string(),
)
.execute_mcp_filter(&messages, &agent, &request_headers)
.await;
match result {
@ -1061,14 +820,7 @@ mod tests {
let request_headers = HeaderMap::new();
let result = processor
.execute_mcp_filter(
&messages,
&agent,
&request_headers,
None,
"trace-789".to_string(),
"span-789".to_string(),
)
.execute_mcp_filter(&messages, &agent, &request_headers)
.await;
match result {

View file

@ -9,7 +9,7 @@ use hyper::{Response, StatusCode};
use tokio::sync::mpsc;
use tokio_stream::wrappers::ReceiverStream;
use tokio_stream::StreamExt;
use tracing::{info, warn};
use tracing::{info, warn, Instrument};
/// Errors that can occur during response handling
#[derive(Debug, thiserror::Error)]
@ -69,10 +69,14 @@ impl ResponseHandler {
response
}
/// Create a streaming response from a reqwest response
/// Create a streaming response from a reqwest response.
/// The spawned streaming task is instrumented with both `agent_span` and `orchestrator_span`
/// so their durations reflect the actual time spent streaming to the client.
pub async fn create_streaming_response(
&self,
llm_response: reqwest::Response,
agent_span: tracing::Span,
orchestrator_span: tracing::Span,
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, ResponseError> {
// Copy headers from the original response
let response_headers = llm_response.headers();
@ -89,25 +93,30 @@ impl ResponseHandler {
// Create channel for async streaming
let (tx, rx) = mpsc::channel::<Bytes>(16);
// Spawn task to stream data
tokio::spawn(async move {
let mut byte_stream = llm_response.bytes_stream();
// Spawn streaming task instrumented with both spans (nested) so both
// remain entered for the full streaming duration.
tokio::spawn(
async move {
let mut byte_stream = llm_response.bytes_stream();
while let Some(item) = byte_stream.next().await {
let chunk = match item {
Ok(chunk) => chunk,
Err(err) => {
warn!("Error receiving chunk: {:?}", err);
while let Some(item) = byte_stream.next().await {
let chunk = match item {
Ok(chunk) => chunk,
Err(err) => {
warn!(error = ?err, "error receiving chunk");
break;
}
};
if tx.send(chunk).await.is_err() {
warn!("receiver dropped");
break;
}
};
if tx.send(chunk).await.is_err() {
warn!("Receiver dropped");
break;
}
}
});
.instrument(agent_span)
.instrument(orchestrator_span),
);
let stream = ReceiverStream::new(rx).map(|chunk| Ok::<_, hyper::Error>(Frame::data(chunk)));
let stream_body = BoxBody::new(StreamBody::new(stream));
@ -164,11 +173,11 @@ impl ResponseHandler {
if let Some(content) = provider_response.content_delta() {
accumulated_text.push_str(content);
} else {
info!("No content delta in provider response");
info!("no content delta in provider response");
}
}
Err(e) => {
warn!("Failed to parse provider response: {:?}", e);
warn!(error = ?e, "failed to parse provider response");
}
}
}
@ -248,7 +257,13 @@ mod tests {
let llm_response = client.get(&(server.url() + "/test")).send().await.unwrap();
let handler = ResponseHandler::new();
let result = handler.create_streaming_response(llm_response).await;
let result = handler
.create_streaming_response(
llm_response,
tracing::Span::current(),
tracing::Span::current(),
)
.await;
mock.assert_async().await;
assert!(result.is_ok());

View file

@ -1,14 +1,12 @@
use common::configuration::ModelUsagePreference;
use common::traces::{parse_traceparent, SpanBuilder, SpanKind, TraceCollector};
use hermesllm::clients::endpoints::SupportedUpstreamAPIs;
use hermesllm::{ProviderRequest, ProviderRequestType};
use hyper::StatusCode;
use std::collections::HashMap;
use std::sync::Arc;
use tracing::{debug, info, warn};
use crate::router::llm_router::RouterService;
use crate::tracing::{http, operation_component, routing, OperationNameBuilder};
use crate::tracing::routing;
pub struct RoutingResult {
pub model_name: String,
@ -36,7 +34,6 @@ impl RoutingError {
pub async fn router_chat_get_upstream_model(
router_service: Arc<RouterService>,
client_request: ProviderRequestType,
trace_collector: Arc<TraceCollector>,
traceparent: &str,
request_path: &str,
request_id: &str,
@ -56,14 +53,14 @@ pub async fn router_chat_get_upstream_model(
| ProviderRequestType::BedrockConverseStream(_)
| ProviderRequestType::ResponsesAPIRequest(_),
) => {
warn!("Unexpected: got non-ChatCompletions request after converting to OpenAI format");
warn!("unexpected: got non-ChatCompletions request after converting to OpenAI format");
return Err(RoutingError::internal_error(
"Request conversion failed".to_string(),
));
}
Err(err) => {
warn!(
"Failed to convert request to ChatCompletionsRequest: {}",
"failed to convert request to ChatCompletionsRequest: {}",
err
);
return Err(RoutingError::internal_error(format!(
@ -74,9 +71,8 @@ pub async fn router_chat_get_upstream_model(
};
debug!(
"[PLANO_REQ_ID: {:?}]: ROUTER_REQ: {}",
request_id,
&serde_json::to_string(&chat_request).unwrap()
request = %serde_json::to_string(&chat_request).unwrap(),
"router request"
);
// Extract usage preferences from metadata
@ -112,16 +108,14 @@ pub async fn router_chat_get_upstream_model(
};
info!(
"[PLANO_REQ_ID: {:?}] | ROUTER_REQ | Usage preferences from request: {}, request_path: {}, latest message: {}",
request_id,
usage_preferences.is_some(),
request_path,
latest_message_for_log
has_usage_preferences = usage_preferences.is_some(),
path = %request_path,
latest_message = %latest_message_for_log,
"processing router request"
);
// Capture start time for routing span
let routing_start_time = std::time::Instant::now();
let routing_start_system_time = std::time::SystemTime::now();
// Attempt to determine route using the router service
let routing_result = router_service
@ -133,41 +127,21 @@ pub async fn router_chat_get_upstream_model(
)
.await;
let determination_ms = routing_start_time.elapsed().as_millis() as i64;
let current_span = tracing::Span::current();
current_span.record(routing::ROUTE_DETERMINATION_MS, determination_ms);
match routing_result {
Ok(route) => match route {
Some((_, model_name)) => {
// Record successful routing span
let mut attrs: HashMap<String, String> = HashMap::new();
attrs.insert("route.selected_model".to_string(), model_name.clone());
record_routing_span(
trace_collector,
traceparent,
routing_start_time,
routing_start_system_time,
attrs,
)
.await;
current_span.record("route.selected_model", model_name.as_str());
Ok(RoutingResult { model_name })
}
None => {
// No route determined, return sentinel value "none"
// This signals to llm.rs to use the original validated request model
info!(
"[PLANO_REQ_ID: {}] | ROUTER_REQ | No route determined, returning sentinel 'none'",
request_id
);
let mut attrs = HashMap::new();
attrs.insert("route.selected_model".to_string(), "none".to_string());
record_routing_span(
trace_collector,
traceparent,
routing_start_time,
routing_start_system_time,
attrs,
)
.await;
current_span.record("route.selected_model", "none");
info!("no route determined, using default model");
Ok(RoutingResult {
model_name: "none".to_string(),
@ -175,19 +149,7 @@ pub async fn router_chat_get_upstream_model(
}
},
Err(err) => {
// Record failed routing span
let mut attrs = HashMap::new();
attrs.insert("route.selected_model".to_string(), "unknown".to_string());
attrs.insert("error.message".to_string(), err.to_string());
record_routing_span(
trace_collector,
traceparent,
routing_start_time,
routing_start_system_time,
attrs,
)
.await;
current_span.record("route.selected_model", "unknown");
Err(RoutingError::internal_error(format!(
"Failed to determine route: {}",
err
@ -195,53 +157,3 @@ pub async fn router_chat_get_upstream_model(
}
}
}
/// Helper function to record a routing span with the given attributes.
/// Reduces code duplication across different routing outcomes.
async fn record_routing_span(
trace_collector: Arc<TraceCollector>,
traceparent: &str,
start_time: std::time::Instant,
start_system_time: std::time::SystemTime,
attrs: HashMap<String, String>,
) {
// The routing always uses OpenAI Chat Completions format internally,
// so we log that as the actual API being used for routing
let routing_api_path = "/v1/chat/completions";
let routing_operation_name = OperationNameBuilder::new()
.with_method("POST")
.with_path(routing_api_path)
.with_target("Arch-Router-1.5B")
.build();
let (trace_id, parent_span_id) = parse_traceparent(traceparent);
// Build the routing span directly using constants
let mut span_builder = SpanBuilder::new(&routing_operation_name)
.with_trace_id(&trace_id)
.with_kind(SpanKind::Client)
.with_start_time(start_system_time)
.with_end_time(std::time::SystemTime::now())
.with_attribute(http::METHOD, "POST")
.with_attribute(http::TARGET, routing_api_path.to_string())
.with_attribute(
routing::ROUTE_DETERMINATION_MS,
start_time.elapsed().as_millis().to_string(),
);
// Only set parent span ID if it exists (not a root span)
if let Some(parent) = parent_span_id {
span_builder = span_builder.with_parent_span_id(&parent);
}
// Add all custom attributes
for (key, value) in attrs {
span_builder = span_builder.with_attribute(key, value);
}
let span = span_builder.build();
// Record the span directly to the collector
trace_collector.record_span(operation_component::ROUTING, span);
}

View file

@ -1,18 +1,18 @@
use bytes::Bytes;
use common::traces::{Attribute, AttributeValue, Event, Span, TraceCollector};
use http_body_util::combinators::BoxBody;
use http_body_util::StreamBody;
use hyper::body::Frame;
use std::sync::Arc;
use std::time::{Instant, SystemTime};
use opentelemetry::trace::TraceContextExt;
use opentelemetry::KeyValue;
use std::time::Instant;
use tokio::sync::mpsc;
use tokio_stream::wrappers::ReceiverStream;
use tokio_stream::StreamExt;
use tracing::warn;
use tracing::{info, warn, Instrument};
use tracing_opentelemetry::OpenTelemetrySpanExt;
// Import tracing constants and signals
use crate::signals::{InteractionQuality, SignalAnalyzer, TextBasedSignalAnalyzer, FLAG_MARKER};
use crate::tracing::{error, llm, signals as signal_constants};
use crate::tracing::{llm, set_service_name, signals as signal_constants};
use hermesllm::apis::openai::Message;
/// Trait for processing streaming chunks
@ -31,11 +31,10 @@ pub trait StreamProcessor: Send + 'static {
fn on_error(&mut self, _error: &str) {}
}
/// A processor that tracks streaming metrics and finalizes the span
/// A processor that tracks streaming metrics
pub struct ObservableStreamProcessor {
collector: Arc<TraceCollector>,
service_name: String,
span: Span,
operation_name: String,
total_bytes: usize,
chunk_count: usize,
start_time: Instant,
@ -47,22 +46,28 @@ impl ObservableStreamProcessor {
/// Create a new passthrough processor
///
/// # Arguments
/// * `collector` - The trace collector to record the span to
/// * `service_name` - The service name for this span (e.g., "archgw(llm)")
/// * `span` - The span to finalize after streaming completes
/// * `service_name` - The service name for this span (e.g., "plano(llm)")
/// This will be set as the `service.name.override` attribute on the current span,
/// allowing the ServiceNameOverrideExporter to route spans to different services.
/// * `operation_name` - The current span operation name (e.g., "POST /v1/chat/completions gpt-4")
/// Used to append the flag marker when concerning signals are detected.
/// * `start_time` - When the request started (for duration calculation)
/// * `messages` - Optional conversation messages for signal analysis
pub fn new(
collector: Arc<TraceCollector>,
service_name: impl Into<String>,
span: Span,
operation_name: impl Into<String>,
start_time: Instant,
messages: Option<Vec<Message>>,
) -> Self {
let service_name = service_name.into();
// Set the service name override on the current span for OpenTelemetry export
// This allows the ServiceNameOverrideExporter to route this span to the correct service
set_service_name(&service_name);
Self {
collector,
service_name: service_name.into(),
span,
service_name,
operation_name: operation_name.into(),
total_bytes: 0,
chunk_count: 0,
start_time,
@ -87,89 +92,81 @@ impl StreamProcessor for ObservableStreamProcessor {
}
fn on_complete(&mut self) {
// Update span with streaming metrics and end time
let end_time_nanos = SystemTime::now()
.duration_since(SystemTime::UNIX_EPOCH)
.unwrap_or_default()
.as_nanos();
self.span.end_time_unix_nano = format!("{}", end_time_nanos);
// Add streaming metrics as attributes using constants
self.span.attributes.push(Attribute {
key: llm::RESPONSE_BYTES.to_string(),
value: AttributeValue {
string_value: Some(self.total_bytes.to_string()),
},
});
self.span.attributes.push(Attribute {
key: llm::DURATION_MS.to_string(),
value: AttributeValue {
string_value: Some(self.start_time.elapsed().as_millis().to_string()),
},
});
// Add time to first token if available (streaming only)
// Record time-to-first-token as an OTel span attribute + event (streaming only)
if let Some(ttft) = self.time_to_first_token {
self.span.attributes.push(Attribute {
key: llm::TIME_TO_FIRST_TOKEN_MS.to_string(),
value: AttributeValue {
string_value: Some(ttft.to_string()),
},
});
// Add time to first token as a span event
// Calculate the timestamp by adding ttft duration to span start time
if let Ok(start_time_nanos) = self.span.start_time_unix_nano.parse::<u128>() {
// Convert ttft from milliseconds to nanoseconds and add to start time
let event_timestamp = start_time_nanos + (ttft * 1_000_000);
let mut event =
Event::new(llm::TIME_TO_FIRST_TOKEN_MS.to_string(), event_timestamp);
event.add_attribute(llm::TIME_TO_FIRST_TOKEN_MS.to_string(), ttft.to_string());
// Initialize events vector if needed
if self.span.events.is_none() {
self.span.events = Some(Vec::new());
}
if let Some(ref mut events) = self.span.events {
events.push(event);
}
}
let span = tracing::Span::current();
let otel_context = span.context();
let otel_span = otel_context.span();
otel_span.set_attribute(KeyValue::new(llm::TIME_TO_FIRST_TOKEN_MS, ttft as i64));
otel_span.add_event(
llm::TIME_TO_FIRST_TOKEN_MS,
vec![KeyValue::new(llm::TIME_TO_FIRST_TOKEN_MS, ttft as i64)],
);
}
// Analyze signals if messages are available and add to span attributes
// Analyze signals if messages are available and record as span attributes
if let Some(ref messages) = self.messages {
let analyzer: Box<dyn SignalAnalyzer> = Box::new(TextBasedSignalAnalyzer::new());
let report = analyzer.analyze(messages);
// Get the current OTel span to set signal attributes
let span = tracing::Span::current();
let otel_context = span.context();
let otel_span = otel_context.span();
// Add overall quality
self.span.attributes.push(Attribute {
key: signal_constants::QUALITY.to_string(),
value: AttributeValue {
string_value: Some(format!("{:?}", report.overall_quality)),
},
});
otel_span.set_attribute(KeyValue::new(
signal_constants::QUALITY,
format!("{:?}", report.overall_quality),
));
// Add repair/follow-up metrics if concerning
if report.follow_up.is_concerning || report.follow_up.repair_count > 0 {
self.span.attributes.push(Attribute {
key: signal_constants::REPAIR_COUNT.to_string(),
value: AttributeValue {
string_value: Some(report.follow_up.repair_count.to_string()),
},
});
self.span.attributes.push(Attribute {
key: signal_constants::REPAIR_RATIO.to_string(),
value: AttributeValue {
string_value: Some(format!("{:.3}", report.follow_up.repair_ratio)),
},
});
otel_span.set_attribute(KeyValue::new(
signal_constants::REPAIR_COUNT,
report.follow_up.repair_count as i64,
));
otel_span.set_attribute(KeyValue::new(
signal_constants::REPAIR_RATIO,
format!("{:.3}", report.follow_up.repair_ratio),
));
}
// Add flag marker to operation name if any concerning signal is detected
// Add frustration metrics
if report.frustration.has_frustration {
otel_span.set_attribute(KeyValue::new(
signal_constants::FRUSTRATION_COUNT,
report.frustration.frustration_count as i64,
));
otel_span.set_attribute(KeyValue::new(
signal_constants::FRUSTRATION_SEVERITY,
report.frustration.severity as i64,
));
}
// Add repetition metrics
if report.repetition.has_looping {
otel_span.set_attribute(KeyValue::new(
signal_constants::REPETITION_COUNT,
report.repetition.repetition_count as i64,
));
}
// Add escalation metrics
if report.escalation.escalation_requested {
otel_span
.set_attribute(KeyValue::new(signal_constants::ESCALATION_REQUESTED, true));
}
// Add positive feedback metrics
if report.positive_feedback.has_positive_feedback {
otel_span.set_attribute(KeyValue::new(
signal_constants::POSITIVE_FEEDBACK_COUNT,
report.positive_feedback.positive_count as i64,
));
}
// Flag the span name if any concerning signal is detected
let should_flag = report.frustration.has_frustration
|| report.repetition.has_looping
|| report.escalation.escalation_requested
@ -179,94 +176,27 @@ impl StreamProcessor for ObservableStreamProcessor {
);
if should_flag {
// Prepend flag marker to the operation name
self.span.name = format!("{} {}", self.span.name, FLAG_MARKER);
}
// Add key signal metrics
if report.frustration.has_frustration {
self.span.attributes.push(Attribute {
key: signal_constants::FRUSTRATION_COUNT.to_string(),
value: AttributeValue {
string_value: Some(report.frustration.frustration_count.to_string()),
},
});
self.span.attributes.push(Attribute {
key: signal_constants::FRUSTRATION_SEVERITY.to_string(),
value: AttributeValue {
string_value: Some(report.frustration.severity.to_string()),
},
});
}
if report.repetition.has_looping {
self.span.attributes.push(Attribute {
key: signal_constants::REPETITION_COUNT.to_string(),
value: AttributeValue {
string_value: Some(report.repetition.repetition_count.to_string()),
},
});
}
if report.escalation.escalation_requested {
self.span.attributes.push(Attribute {
key: signal_constants::ESCALATION_REQUESTED.to_string(),
value: AttributeValue {
string_value: Some("true".to_string()),
},
});
}
if report.positive_feedback.has_positive_feedback {
self.span.attributes.push(Attribute {
key: signal_constants::POSITIVE_FEEDBACK_COUNT.to_string(),
value: AttributeValue {
string_value: Some(report.positive_feedback.positive_count.to_string()),
},
});
otel_span.update_name(format!("{} {}", self.operation_name, FLAG_MARKER));
}
}
// Record the finalized span
self.collector
.record_span(&self.service_name, self.span.clone());
info!(
service = %self.service_name,
total_bytes = self.total_bytes,
chunk_count = self.chunk_count,
duration_ms = self.start_time.elapsed().as_millis(),
time_to_first_token_ms = ?self.time_to_first_token,
"streaming completed"
);
}
fn on_error(&mut self, error_msg: &str) {
warn!("Stream error in PassthroughProcessor: {}", error_msg);
// Update span with error info and end time
let end_time_nanos = SystemTime::now()
.duration_since(SystemTime::UNIX_EPOCH)
.unwrap_or_default()
.as_nanos();
self.span.end_time_unix_nano = format!("{}", end_time_nanos);
self.span.attributes.push(Attribute {
key: error::ERROR.to_string(),
value: AttributeValue {
string_value: Some("true".to_string()),
},
});
self.span.attributes.push(Attribute {
key: error::MESSAGE.to_string(),
value: AttributeValue {
string_value: Some(error_msg.to_string()),
},
});
self.span.attributes.push(Attribute {
key: llm::DURATION_MS.to_string(),
value: AttributeValue {
string_value: Some(self.start_time.elapsed().as_millis().to_string()),
},
});
// Record the error span
self.collector
.record_span(&self.service_name, self.span.clone());
warn!(
service = %self.service_name,
error = error_msg,
duration_ms = self.start_time.elapsed().as_millis(),
"stream error"
);
}
}
@ -287,49 +217,55 @@ where
{
let (tx, rx) = mpsc::channel::<Bytes>(buffer_size);
// Capture the current span so the spawned task inherits the request context
let current_span = tracing::Span::current();
// Spawn a task to process and forward chunks
let processor_handle = tokio::spawn(async move {
let mut is_first_chunk = true;
let processor_handle = tokio::spawn(
async move {
let mut is_first_chunk = true;
while let Some(item) = byte_stream.next().await {
let chunk = match item {
Ok(chunk) => chunk,
Err(err) => {
let err_msg = format!("Error receiving chunk: {:?}", err);
warn!("{}", err_msg);
processor.on_error(&err_msg);
break;
while let Some(item) = byte_stream.next().await {
let chunk = match item {
Ok(chunk) => chunk,
Err(err) => {
let err_msg = format!("Error receiving chunk: {:?}", err);
warn!(error = %err_msg, "stream error");
processor.on_error(&err_msg);
break;
}
};
// Call on_first_bytes for the first chunk
if is_first_chunk {
processor.on_first_bytes();
is_first_chunk = false;
}
};
// Call on_first_bytes for the first chunk
if is_first_chunk {
processor.on_first_bytes();
is_first_chunk = false;
}
// Process the chunk
match processor.process_chunk(chunk) {
Ok(Some(processed_chunk)) => {
if tx.send(processed_chunk).await.is_err() {
warn!("Receiver dropped");
// Process the chunk
match processor.process_chunk(chunk) {
Ok(Some(processed_chunk)) => {
if tx.send(processed_chunk).await.is_err() {
warn!("receiver dropped");
break;
}
}
Ok(None) => {
// Skip this chunk
continue;
}
Err(err) => {
warn!("processor error: {}", err);
processor.on_error(&err);
break;
}
}
Ok(None) => {
// Skip this chunk
continue;
}
Err(err) => {
warn!("Processor error: {}", err);
processor.on_error(&err);
break;
}
}
}
processor.on_complete();
});
processor.on_complete();
}
.instrument(current_span),
);
// Convert channel receiver to HTTP stream
let stream = ReceiverStream::new(rx).map(|chunk| Ok::<_, hyper::Error>(Frame::data(chunk)));

View file

@ -14,7 +14,6 @@ use common::consts::{
CHAT_COMPLETIONS_PATH, MESSAGES_PATH, OPENAI_RESPONSES_API_PATH, PLANO_ORCHESTRATOR_MODEL_NAME,
};
use common::llm_providers::LlmProviders;
use common::traces::TraceCollector;
use http_body_util::{combinators::BoxBody, BodyExt, Empty};
use hyper::body::Incoming;
use hyper::server::conn::http1;
@ -51,13 +50,12 @@ fn empty() -> BoxBody<Bytes, hyper::Error> {
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let _tracer_provider = init_tracer();
let bind_address = env::var("BIND_ADDRESS").unwrap_or_else(|_| BIND_ADDRESS.to_string());
// loading arch_config.yaml file
// loading arch_config.yaml file (before tracing init so we can read tracing config)
let arch_config_path = env::var("ARCH_CONFIG_PATH_RENDERED")
.unwrap_or_else(|_| "./arch_config_rendered.yaml".to_string());
info!("Loading arch_config.yaml from {}", arch_config_path);
eprintln!("loading arch_config.yaml from {}", arch_config_path);
let config_contents =
fs::read_to_string(&arch_config_path).expect("Failed to read arch_config.yaml");
@ -65,6 +63,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let config: Configuration =
serde_yaml::from_str(&config_contents).expect("Failed to parse arch_config.yaml");
// Initialize tracing using config.yaml tracing section
let _tracer_provider = init_tracer(config.tracing.as_ref());
info!(path = %arch_config_path, "loaded arch_config.yaml");
let arch_config = Arc::new(config);
// combine agents and filters into a single list of agents
@ -116,17 +118,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
// Initialize trace collector and start background flusher
// Tracing is enabled if the tracing config is present in arch_config.yaml
// Pass Some(true/false) to override, or None to use env var OTEL_TRACING_ENABLED
let tracing_enabled = if arch_config.tracing.is_some() {
info!("Tracing configuration found in arch_config.yaml");
Some(true)
} else {
info!(
"No tracing configuration in arch_config.yaml, will check OTEL_TRACING_ENABLED env var"
);
None
};
let trace_collector = Arc::new(TraceCollector::new(tracing_enabled));
let _flusher_handle = trace_collector.clone().start_background_flusher();
// OpenTelemetry automatic instrumentation is configured in utils/tracing.rs
// Initialize conversation state storage for v1/responses
// Configurable via arch_config.yaml state_storage section
@ -136,7 +128,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
if let Some(storage_config) = &arch_config.state_storage {
let storage: Arc<dyn StateStorage> = match storage_config.storage_type {
common::configuration::StateStorageType::Memory => {
info!("Initialized conversation state storage: Memory");
info!(
storage_type = "memory",
"initialized conversation state storage"
);
Arc::new(MemoryConversationalStorage::new())
}
common::configuration::StateStorageType::Postgres => {
@ -145,8 +140,11 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
.as_ref()
.expect("connection_string is required for postgres state_storage");
debug!("Postgres connection string (full): {}", connection_string);
info!("Initializing conversation state storage: Postgres");
debug!(connection_string = %connection_string, "postgres connection");
info!(
storage_type = "postgres",
"initializing conversation state storage"
);
Arc::new(
PostgreSQLConversationStorage::new(connection_string.clone())
.await
@ -156,7 +154,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
};
Some(storage)
} else {
info!("No state_storage configured - conversation state management disabled");
info!("no state_storage configured, conversation state management disabled");
None
};
@ -175,7 +173,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let llm_providers = llm_providers.clone();
let agents_list = combined_agents_filters_list.clone();
let listeners = listeners.clone();
let trace_collector = trace_collector.clone();
let state_storage = state_storage.clone();
let service = service_fn(move |req| {
let router_service = Arc::clone(&router_service);
@ -186,7 +183,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let model_aliases = Arc::clone(&model_aliases);
let agents_list = agents_list.clone();
let listeners = listeners.clone();
let trace_collector = trace_collector.clone();
let state_storage = state_storage.clone();
async move {
@ -206,7 +202,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
fully_qualified_url,
agents_list,
listeners,
trace_collector,
)
.with_context(parent_cx)
.await;
@ -224,7 +219,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
fully_qualified_url,
model_aliases,
llm_providers,
trace_collector,
state_storage,
)
.with_context(parent_cx)
@ -265,7 +259,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
Ok(response)
}
_ => {
debug!("No route for {} {}", req.method(), req.uri().path());
debug!(method = %req.method(), path = %req.uri().path(), "no route found");
let mut not_found = Response::new(empty());
*not_found.status_mut() = StatusCode::NOT_FOUND;
Ok(not_found)
@ -275,13 +269,13 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
});
tokio::task::spawn(async move {
debug!("Accepted connection from {:?}", peer_addr);
debug!(peer = ?peer_addr, "accepted connection");
if let Err(err) = http1::Builder::new()
// .serve_connection(io, service_fn(chat_completion))
.serve_connection(io, service)
.await
{
warn!("Error serving connection: {:?}", err);
warn!(error = ?err, "error serving connection");
}
});
}

View file

@ -96,14 +96,14 @@ impl RouterService {
.generate_request(messages, &usage_preferences);
debug!(
"sending request to arch-router model: {}, endpoint: {}",
self.router_model.get_model_name(),
self.router_url
model = %self.router_model.get_model_name(),
endpoint = %self.router_url,
"sending request to arch-router"
);
debug!(
"arch request body: {}",
&serde_json::to_string(&router_request).unwrap(),
body = %serde_json::to_string(&router_request).unwrap(),
"arch router request"
);
let mut llm_route_request_headers = header::HeaderMap::new();
@ -148,9 +148,9 @@ impl RouterService {
Ok(response) => response,
Err(err) => {
warn!(
"Failed to parse JSON: {}. Body: {}",
err,
&serde_json::to_string(&body).unwrap()
error = %err,
body = %serde_json::to_string(&body).unwrap(),
"failed to parse json response"
);
return Err(RoutingError::JsonError(
err,
@ -160,7 +160,7 @@ impl RouterService {
};
if chat_completion_response.choices.is_empty() {
warn!("No choices in router response: {}", body);
warn!(body = %body, "no choices in router response");
return Ok(None);
}
@ -169,10 +169,10 @@ impl RouterService {
.router_model
.parse_response(content, &usage_preferences)?;
info!(
"arch-router determined route: {}, selected_model: {:?}, response time: {}ms",
content.replace("\n", "\\n"),
parsed_response,
router_response_time.as_millis()
content = %content.replace("\n", "\\n"),
selected_model = ?parsed_response,
response_time_ms = router_response_time.as_millis(),
"arch-router determined route"
);
if let Some(ref parsed_response) = parsed_response {

View file

@ -197,12 +197,12 @@ impl OrchestratorModel for OrchestratorModelV1 {
token_count += message_token_count;
if token_count > self.max_token_length {
debug!(
"OrchestratorModelV1: token count {} exceeds max token length {}, truncating conversation, selected message count {}, total message count: {}",
token_count,
self.max_token_length
, selected_messsage_count,
messages_vec.len()
);
token_count = token_count,
max_tokens = self.max_token_length,
selected = selected_messsage_count,
total = messages_vec.len(),
"token count exceeds max, truncating conversation"
);
if message.role == Role::User {
// If message that exceeds max token length is from user, we need to keep it
selected_messages_list_reversed.push(message);
@ -214,9 +214,7 @@ impl OrchestratorModel for OrchestratorModelV1 {
}
if selected_messages_list_reversed.is_empty() {
debug!(
"OrchestratorModelV1: no messages selected, using the last message in the conversation"
);
debug!("no messages selected, using last message");
if let Some(last_message) = messages_vec.last() {
selected_messages_list_reversed.push(last_message);
}
@ -228,12 +226,12 @@ impl OrchestratorModel for OrchestratorModelV1 {
// - last() is the first message in the original conversation
if let Some(first_message) = selected_messages_list_reversed.first() {
if first_message.role != Role::User {
warn!("OrchestratorModelV1: last message in the conversation is not from user, this may lead to incorrect orchestration");
warn!("last message is not from user, may lead to incorrect orchestration");
}
}
if let Some(last_message) = selected_messages_list_reversed.last() {
if last_message.role != Role::User {
warn!("OrchestratorModelV1: first message in the selected conversation is not from user, this may lead to incorrect orchestration");
warn!("first message is not from user, may lead to incorrect orchestration");
}
}
@ -323,8 +321,9 @@ impl OrchestratorModel for OrchestratorModelV1 {
result.push((selected_route, model_name));
} else {
warn!(
"No matching model found for route: {}, usage preferences: {:?}",
selected_route, usage_preferences
route = %selected_route,
preferences = ?usage_preferences,
"no matching model found for route"
);
}
}
@ -339,8 +338,9 @@ impl OrchestratorModel for OrchestratorModelV1 {
result.push((selected_route, model));
} else {
warn!(
"No model found for route: {}, orchestrator model preferences: {:?}",
selected_route, self.agent_orchestration_to_model_map
route = %selected_route,
preferences = ?self.agent_orchestration_to_model_map,
"no model found for route"
);
}
}

View file

@ -2,13 +2,12 @@ use std::{collections::HashMap, sync::Arc};
use common::{
configuration::{AgentUsagePreference, OrchestrationPreference},
consts::{
ARCH_PROVIDER_HINT_HEADER, PLANO_ORCHESTRATOR_MODEL_NAME, REQUEST_ID_HEADER,
TRACE_PARENT_HEADER,
},
consts::{ARCH_PROVIDER_HINT_HEADER, PLANO_ORCHESTRATOR_MODEL_NAME, REQUEST_ID_HEADER},
};
use hermesllm::apis::openai::{ChatCompletionsResponse, Message};
use hyper::header;
use opentelemetry::global;
use opentelemetry_http::HeaderInjector;
use thiserror::Error;
use tracing::{debug, info, warn};
@ -57,7 +56,6 @@ impl OrchestratorService {
pub async fn determine_orchestration(
&self,
messages: &[Message],
trace_parent: Option<String>,
usage_preferences: Option<Vec<AgentUsagePreference>>,
request_id: Option<String>,
) -> Result<Option<Vec<(String, String)>>> {
@ -75,14 +73,14 @@ impl OrchestratorService {
.generate_request(messages, &usage_preferences);
debug!(
"sending request to arch-orchestrator model: {}, endpoint: {}",
self.orchestrator_model.get_model_name(),
self.orchestrator_url
model = %self.orchestrator_model.get_model_name(),
endpoint = %self.orchestrator_url,
"sending request to arch-orchestrator"
);
debug!(
"arch orchestrator request body: {}",
&serde_json::to_string(&orchestrator_request).unwrap(),
body = %serde_json::to_string(&orchestrator_request).unwrap(),
"arch orchestrator request"
);
let mut orchestration_request_headers = header::HeaderMap::new();
@ -96,12 +94,12 @@ impl OrchestratorService {
header::HeaderValue::from_str(PLANO_ORCHESTRATOR_MODEL_NAME).unwrap(),
);
if let Some(trace_parent) = trace_parent {
orchestration_request_headers.insert(
header::HeaderName::from_static(TRACE_PARENT_HEADER),
header::HeaderValue::from_str(&trace_parent).unwrap(),
);
}
// Inject OpenTelemetry trace context from current span
global::get_text_map_propagator(|propagator| {
let cx =
tracing_opentelemetry::OpenTelemetrySpanExt::context(&tracing::Span::current());
propagator.inject_context(&cx, &mut HeaderInjector(&mut orchestration_request_headers));
});
if let Some(request_id) = request_id {
orchestration_request_headers.insert(
@ -131,9 +129,9 @@ impl OrchestratorService {
Ok(response) => response,
Err(err) => {
warn!(
"Failed to parse JSON: {}. Body: {}",
err,
&serde_json::to_string(&body).unwrap()
error = %err,
body = %serde_json::to_string(&body).unwrap(),
"failed to parse json response"
);
return Err(OrchestrationError::JsonError(
err,
@ -143,7 +141,7 @@ impl OrchestratorService {
};
if chat_completion_response.choices.is_empty() {
warn!("No choices in orchestrator response: {}", body);
warn!(body = %body, "no choices in orchestrator response");
return Ok(None);
}
@ -152,10 +150,10 @@ impl OrchestratorService {
.orchestrator_model
.parse_response(content, &usage_preferences)?;
info!(
"arch-orchestrator determined routes: {}, selected_routes: {:?}, response time: {}ms",
content.replace("\n", "\\n"),
parsed_response,
orchestrator_response_time.as_millis()
content = %content.replace("\n", "\\n"),
selected_routes = ?parsed_response,
response_time_ms = orchestrator_response_time.as_millis(),
"arch-orchestrator determined routes"
);
if let Some(ref parsed_response) = parsed_response {

View file

@ -94,12 +94,12 @@ impl RouterModel for RouterModelV1 {
token_count += message_token_count;
if token_count > self.max_token_length {
debug!(
"RouterModelV1: token count {} exceeds max token length {}, truncating conversation, selected message count {}, total message count: {}",
token_count,
self.max_token_length
, selected_messsage_count,
messages_vec.len()
);
token_count = token_count,
max_tokens = self.max_token_length,
selected = selected_messsage_count,
total = messages_vec.len(),
"token count exceeds max, truncating conversation"
);
if message.role == Role::User {
// If message that exceeds max token length is from user, we need to keep it
selected_messages_list_reversed.push(message);
@ -111,9 +111,7 @@ impl RouterModel for RouterModelV1 {
}
if selected_messages_list_reversed.is_empty() {
debug!(
"RouterModelV1: no messages selected, using the last message in the conversation"
);
debug!("no messages selected, using last message");
if let Some(last_message) = messages_vec.last() {
selected_messages_list_reversed.push(last_message);
}
@ -122,12 +120,12 @@ impl RouterModel for RouterModelV1 {
// ensure that first and last selected message is from user
if let Some(first_message) = selected_messages_list_reversed.first() {
if first_message.role != Role::User {
warn!("RouterModelV1: last message in the conversation is not from user, this may lead to incorrect routing");
warn!("last message is not from user, may lead to incorrect routing");
}
}
if let Some(last_message) = selected_messages_list_reversed.last() {
if last_message.role != Role::User {
warn!("RouterModelV1: first message in the conversation is not from user, this may lead to incorrect routing");
warn!("first message is not from user, may lead to incorrect routing");
}
}
@ -206,8 +204,9 @@ impl RouterModel for RouterModelV1 {
return Ok(Some((selected_route, model_name)));
} else {
warn!(
"No matching model found for route: {}, usage preferences: {:?}",
selected_route, usage_preferences
route = %selected_route,
preferences = ?usage_preferences,
"no matching model found for route"
);
return Ok(None);
}
@ -219,8 +218,9 @@ impl RouterModel for RouterModelV1 {
}
warn!(
"No model found for route: {}, router model preferences: {:?}",
selected_route, self.llm_route_to_model_map
route = %selected_route,
preferences = ?self.llm_route_to_model_map,
"no model found for route"
);
Ok(None)

View file

@ -92,18 +92,16 @@ impl<P: StreamProcessor> ResponsesStateProcessor<P> {
match decoder.read_to_end(&mut decompressed) {
Ok(_) => {
debug!(
"[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Successfully decompressed {} bytes to {} bytes",
self.request_id,
self.chunk_buffer.len(),
decompressed.len()
original_bytes = self.chunk_buffer.len(),
decompressed_bytes = decompressed.len(),
"Successfully decompressed response"
);
decompressed
}
Err(e) => {
warn!(
"[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Failed to decompress gzip buffer: {}",
self.request_id,
e
error = %e,
"Failed to decompress gzip buffer"
);
self.chunk_buffer.clone()
}
@ -111,9 +109,8 @@ impl<P: StreamProcessor> ResponsesStateProcessor<P> {
}
Some(encoding) => {
warn!(
"[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Unsupported Content-Encoding: {}. Only gzip is currently supported.",
self.request_id,
encoding
encoding = %encoding,
"Unsupported Content-Encoding, only gzip is supported"
);
self.chunk_buffer.clone()
}
@ -143,10 +140,9 @@ impl<P: StreamProcessor> ResponsesStateProcessor<P> {
serde_json::from_str::<ResponsesAPIStreamEvent>(data_str)
{
info!(
"[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Captured streaming response.completed: response_id={}, output_items={}",
self.request_id,
response.id,
response.output.len()
response_id = %response.id,
output_items = response.output.len(),
"Captured streaming response"
);
self.response_id = Some(response.id.clone());
self.output_items = Some(response.output.clone());
@ -175,24 +171,20 @@ impl<P: StreamProcessor> ResponsesStateProcessor<P> {
) {
Ok(response) => {
info!(
"[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Captured non-streaming response: response_id={}, output_items={}",
self.request_id,
response.id,
response.output.len()
response_id = %response.id,
output_items = response.output.len(),
"Captured non-streaming response"
);
self.response_id = Some(response.id.clone());
self.output_items = Some(response.output.clone());
}
Err(e) => {
// Log parse error with chunk preview for debugging
let chunk_preview = String::from_utf8_lossy(&decompressed);
let preview_len = chunk_preview.len().min(200);
warn!(
"[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Failed to parse non-streaming ResponsesAPIResponse: {}. Decompressed preview (first {} bytes): {}",
self.request_id,
e,
preview_len,
&chunk_preview[..preview_len]
error = %e,
preview = %&chunk_preview[..preview_len],
"Failed to parse non-streaming ResponsesAPIResponse"
);
}
}
@ -221,10 +213,7 @@ impl<P: StreamProcessor> StreamProcessor for ResponsesStateProcessor<P> {
// Skip storage for OpenAI upstream
if self.is_openai_upstream {
debug!(
"[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Skipping state storage for OpenAI upstream provider",
self.request_id
);
debug!("Skipping state storage for OpenAI upstream");
return;
}
@ -234,8 +223,9 @@ impl<P: StreamProcessor> StreamProcessor for ResponsesStateProcessor<P> {
let output_as_inputs = outputs_to_inputs(output_items);
debug!(
"[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Converting outputs to inputs: output_items_count={}, converted_input_items_count={}",
self.request_id, output_items.len(), output_as_inputs.len()
output_items = output_items.len(),
converted_items = output_as_inputs.len(),
"Converting outputs to inputs"
);
// Combine original input + output as new input history
@ -243,11 +233,9 @@ impl<P: StreamProcessor> StreamProcessor for ResponsesStateProcessor<P> {
combined_input.extend(output_as_inputs);
debug!(
"[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Storing state: original_input_count={}, combined_input_count={}, combined_json={}",
self.request_id,
self.original_input.len(),
combined_input.len(),
serde_json::to_string(&combined_input).unwrap_or_else(|_| "serialization_error".to_string())
original_input = self.original_input.len(),
combined_input = combined_input.len(),
"Storing conversation state"
);
let state = OpenAIConversationState {
@ -270,28 +258,27 @@ impl<P: StreamProcessor> StreamProcessor for ResponsesStateProcessor<P> {
match storage.put(state).await {
Ok(()) => {
info!(
"[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Successfully stored conversation state for response_id: {}, items_count={}",
request_id,
response_id_clone,
items_count
request_id = %request_id,
response_id = %response_id_clone,
items = items_count,
"Stored conversation state"
);
}
Err(e) => {
warn!(
"[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Failed to store conversation state for response_id {}: {}",
request_id,
response_id_clone,
e
request_id = %request_id,
response_id = %response_id_clone,
error = %e,
"Failed to store conversation state"
);
}
}
});
} else {
warn!(
"[PLANO_REQ_ID:{}] | STATE_PROCESSOR | No response_id captured from upstream response - cannot store conversation state. response_id present: {}, output present: {}",
self.request_id,
self.response_id.is_some(),
self.output_items.is_some()
has_response_id = self.response_id.is_some(),
has_output = self.output_items.is_some(),
"No response_id captured, cannot store conversation state"
);
}
}

View file

@ -1,5 +1,35 @@
mod constants;
mod service_name_exporter;
pub use constants::{
error, http, llm, operation_component, routing, signals, OperationNameBuilder,
};
pub use service_name_exporter::{ServiceNameOverrideExporter, SERVICE_NAME_OVERRIDE_KEY};
use opentelemetry::trace::get_active_span;
use opentelemetry::KeyValue;
/// Sets the service name override on the current active OpenTelemetry span.
///
/// This function adds the `service.name.override` attribute to the active
/// OpenTelemetry span, which allows observability backends to filter and group
/// spans by their logical service (e.g., `plano(llm)`, `plano(filter)`).
///
/// # Arguments
/// * `service_name` - The service name to use (e.g., `operation_component::LLM`)
///
/// # Example
/// ```rust,ignore
/// use brightstaff::tracing::{set_service_name, operation_component};
///
/// // Inside a traced function:
/// set_service_name(operation_component::LLM);
/// ```
pub fn set_service_name(service_name: &str) {
get_active_span(|span| {
span.set_attribute(KeyValue::new(
SERVICE_NAME_OVERRIDE_KEY,
service_name.to_string(),
));
});
}

View file

@ -0,0 +1,187 @@
//! Service Name Override Exporter
//!
//! This module provides a custom SpanExporter that allows per-span service.name overrides.
//! In OpenTelemetry, `service.name` is part of the Resource, which is tied to the TracerProvider.
//! However, if you need different service names for different spans (e.g., `plano(orchestrator)`,
//! `plano(filter)`, `plano(llm)`) within the same provider, this exporter handles that by:
//!
//! 1. Looking for a special span attribute `service.name.override`
//! 2. Grouping spans by their effective service name
//! 3. Exporting each group via a dedicated OTLP exporter whose Resource has the correct
//! `service.name`
//!
//! All per-service exporters are created eagerly at construction time so that no tonic
//! channel creation happens later inside `futures_executor::block_on` (which the
//! `BatchSpanProcessor` uses and which lacks a tokio runtime).
//!
//! # Usage
//!
//! ```rust
//! use brightstaff::tracing::{set_service_name, operation_component};
//!
//! // In your instrumented code, set the service name override:
//! set_service_name(operation_component::LLM);
//! ```
use opentelemetry::Key;
use opentelemetry_otlp::WithExportConfig;
use opentelemetry_sdk::error::OTelSdkResult;
use opentelemetry_sdk::trace::{SpanData, SpanExporter};
use opentelemetry_sdk::Resource;
use std::collections::HashMap;
use std::time::Duration;
use tokio::sync::Mutex;
use super::operation_component;
/// The attribute key used to override the service name for a specific span.
/// Set this as a span attribute to route the span to a different service.
pub const SERVICE_NAME_OVERRIDE_KEY: &str = "service.name.override";
/// Default service name used when no override is set on a span.
const DEFAULT_SERVICE_NAME: &str = "plano";
/// All known service names that will have dedicated exporters.
const ALL_SERVICE_NAMES: &[&str] = &[
DEFAULT_SERVICE_NAME,
operation_component::INBOUND,
operation_component::ROUTING,
operation_component::ORCHESTRATOR,
operation_component::AGENT_FILTER,
operation_component::AGENT,
operation_component::LLM,
];
/// Span attribute keys to remove before export.
const FILTERED_ATTR_KEYS: &[&str] = &[
"busy_ns",
"idle_ns",
"thread.id",
"thread.name",
"code.file.path",
"code.line.number",
"code.module.name",
"target",
];
/// A SpanExporter that supports per-span `service.name` overrides.
///
/// Internally it holds one OTLP exporter per known service name. Each exporter
/// has its own `Resource` with the correct `service.name`, so backends like
/// Jaeger see the spans under the right service.
pub struct ServiceNameOverrideExporter {
/// Map from service name → pre-created OTLP exporter (behind tokio Mutex
/// because `SpanExporter::export` takes `&self` and the future must be Send).
exporters: HashMap<String, Mutex<opentelemetry_otlp::SpanExporter>>,
}
// Manual Debug because `opentelemetry_otlp::SpanExporter` doesn't implement Debug
impl std::fmt::Debug for ServiceNameOverrideExporter {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("ServiceNameOverrideExporter")
.field("services", &self.exporters.keys().collect::<Vec<_>>())
.finish()
}
}
impl ServiceNameOverrideExporter {
/// Create a new `ServiceNameOverrideExporter`.
///
/// This eagerly creates one OTLP gRPC exporter per known service name so
/// that the tonic channel is established while a tokio runtime is available.
///
/// # Arguments
/// * `endpoint` The OTLP collector endpoint URL (e.g. `http://localhost:4317`)
pub fn new(endpoint: &str) -> Self {
let mut exporters = HashMap::new();
for &service_name in ALL_SERVICE_NAMES {
let resource = Resource::builder_empty()
.with_service_name(service_name)
.build();
let mut exporter = opentelemetry_otlp::SpanExporter::builder()
.with_tonic()
.with_endpoint(endpoint)
.build()
.expect("Failed to create OTLP span exporter");
exporter.set_resource(&resource);
exporters.insert(service_name.to_string(), Mutex::new(exporter));
}
Self { exporters }
}
}
impl SpanExporter for ServiceNameOverrideExporter {
fn export(
&self,
batch: Vec<SpanData>,
) -> impl std::future::Future<Output = OTelSdkResult> + Send {
let override_key = Key::new(SERVICE_NAME_OVERRIDE_KEY);
// Group spans by their effective service name
let mut spans_by_service: HashMap<String, Vec<SpanData>> = HashMap::new();
let should_filter = !tracing::enabled!(tracing::Level::DEBUG);
for span in batch {
let mut span = span;
if should_filter {
span.attributes
.retain(|kv| !FILTERED_ATTR_KEYS.contains(&kv.key.as_str()));
}
let service_name = span
.attributes
.iter()
.find(|kv| kv.key == override_key)
.map(|kv| kv.value.to_string())
.unwrap_or_else(|| DEFAULT_SERVICE_NAME.to_string());
spans_by_service.entry(service_name).or_default().push(span);
}
// Collect grouped spans into a Vec so the async block owns the data.
let results: Vec<(String, Vec<SpanData>)> = spans_by_service.into_iter().collect();
async move {
for (service_name, spans) in results {
// Look up the pre-created exporter; fall back to default if
// the service name isn't one of the known ones.
let key = if self.exporters.contains_key(&service_name) {
service_name.clone()
} else {
DEFAULT_SERVICE_NAME.to_string()
};
if let Some(exporter_mutex) = self.exporters.get(&key) {
let exporter = exporter_mutex.lock().await;
if let Err(e) = exporter.export(spans).await {
tracing::warn!(
service = %service_name,
error = ?e,
"Failed to export spans"
);
}
}
}
Ok(())
}
}
fn shutdown_with_timeout(&mut self, timeout: Duration) -> OTelSdkResult {
for (_, exporter_mutex) in self.exporters.iter() {
if let Ok(mut exporter) = exporter_mutex.try_lock() {
let _ = exporter.shutdown_with_timeout(timeout);
}
}
Ok(())
}
fn set_resource(&mut self, _resource: &Resource) {
// Each inner exporter already has its own resource set at creation time.
// Nothing to propagate.
}
}

View file

@ -3,12 +3,17 @@ use std::sync::OnceLock;
use opentelemetry::global;
use opentelemetry_sdk::{propagation::TraceContextPropagator, trace::SdkTracerProvider};
use opentelemetry_stdout::SpanExporter;
use time::macros::format_description;
use tracing::{Event, Subscriber};
use tracing_subscriber::fmt::{format, time::FormatTime, FmtContext, FormatEvent, FormatFields};
use tracing_subscriber::layer::SubscriberExt;
use tracing_subscriber::registry::LookupSpan;
use tracing_subscriber::util::SubscriberInitExt;
use tracing_subscriber::EnvFilter;
use crate::tracing::ServiceNameOverrideExporter;
use common::configuration::Tracing;
struct BracketedTime;
impl FormatTime for BracketedTime {
@ -29,7 +34,7 @@ struct BracketedFormatter;
impl<S, N> FormatEvent<S, N> for BracketedFormatter
where
S: Subscriber + for<'a> tracing_subscriber::registry::LookupSpan<'a>,
S: Subscriber + for<'a> LookupSpan<'a>,
N: for<'a> FormatFields<'a> + 'static,
{
fn format_event(
@ -43,36 +48,116 @@ where
write!(
writer,
"[{}] ",
"[{}]",
event.metadata().level().to_string().to_lowercase()
)?;
// Extract request_id from span context if present
if let Some(scope) = ctx.event_scope() {
for span in scope.from_root() {
let extensions = span.extensions();
if let Some(fields) = extensions.get::<FormattedFields<N>>() {
let fields_str = fields.fields.as_str();
// Look for request_id in the formatted fields
if let Some(start) = fields_str.find("request_id=") {
let rest = &fields_str[start + 11..]; // Skip "request_id="
let end = rest.find(|c: char| c.is_whitespace()).unwrap_or(rest.len());
let rid = &rest[..end];
write!(writer, " request_id={}", rid)?;
break;
}
}
}
}
write!(writer, " ")?;
ctx.field_format().format_fields(writer.by_ref(), event)?;
writeln!(writer)
}
}
use tracing_subscriber::fmt::FormattedFields;
static INIT_LOGGER: OnceLock<SdkTracerProvider> = OnceLock::new();
pub fn init_tracer() -> &'static SdkTracerProvider {
pub fn init_tracer(tracing_config: Option<&Tracing>) -> &'static SdkTracerProvider {
INIT_LOGGER.get_or_init(|| {
global::set_text_map_propagator(TraceContextPropagator::new());
// Install stdout exporter pipeline to be able to retrieve the collected spans.
// For the demonstration, use `Sampler::AlwaysOn` sampler to sample all traces.
let provider = SdkTracerProvider::builder()
.with_simple_exporter(SpanExporter::default())
.build();
global::set_tracer_provider(provider.clone());
// Get OTEL endpoint and sampling from config.yaml tracing section
let otel_endpoint = tracing_config.and_then(|t| t.opentracing_grpc_endpoint.clone());
tracing_subscriber::fmt()
.with_env_filter(
EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info")),
)
.event_format(BracketedFormatter)
.init();
let random_sampling = tracing_config.and_then(|t| t.random_sampling).unwrap_or(0);
provider
let tracing_enabled = random_sampling > 0 && otel_endpoint.is_some();
eprintln!(
"initializing tracing: tracing_enabled={}, otel_endpoint={:?}, random_sampling={}",
tracing_enabled, otel_endpoint, random_sampling
);
// Create OTLP exporter to send spans to collector
if tracing_enabled {
// Set service name via environment if not already set
if std::env::var("OTEL_SERVICE_NAME").is_err() {
std::env::set_var("OTEL_SERVICE_NAME", "plano");
}
// Create ServiceNameOverrideExporter to support per-span service names
// This allows spans to have different service names (e.g., plano(orchestrator),
// plano(filter), plano(llm)) by setting the "service.name.override" attribute
let exporter = ServiceNameOverrideExporter::new(otel_endpoint.as_ref().unwrap());
let provider = SdkTracerProvider::builder()
.with_batch_exporter(exporter)
.build();
global::set_tracer_provider(provider.clone());
// Create OpenTelemetry tracing layer using TracerProvider trait
use opentelemetry::trace::TracerProvider as _;
let telemetry_layer =
tracing_opentelemetry::layer().with_tracer(provider.tracer("brightstaff"));
// Combine the OpenTelemetry layer with fmt layer using the registry
let env_filter =
EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"));
// Create fmt layer with span field formatting enabled (no ANSI to keep fields parseable)
let fmt_layer = tracing_subscriber::fmt::layer()
.event_format(BracketedFormatter)
.fmt_fields(format::DefaultFields::new())
.with_ansi(false);
let subscriber = tracing_subscriber::registry()
.with(telemetry_layer)
.with(env_filter)
.with(fmt_layer);
tracing::subscriber::set_global_default(subscriber)
.expect("Failed to set tracing subscriber");
provider
} else {
// Tracing disabled - use no-op provider
let provider = SdkTracerProvider::builder().build();
global::set_tracer_provider(provider.clone());
let env_filter =
EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"));
// Create fmt layer with span field formatting enabled (no ANSI to keep fields parseable)
let fmt_layer = tracing_subscriber::fmt::layer()
.event_format(BracketedFormatter)
.fmt_fields(format::DefaultFields::new())
.with_ansi(false);
tracing_subscriber::registry()
.with(env_filter)
.with(fmt_layer)
.init();
provider
}
})
}

View file

@ -21,14 +21,8 @@ url = "2.5.4"
hermesllm = { version = "0.1.0", path = "../hermesllm" }
serde_with = "3.13.0"
# Optional dependencies for trace collection (not available in WASM)
tokio = { version = "1.44", features = ["sync", "time"], optional = true }
reqwest = { version = "0.12", features = ["json"], optional = true }
tracing = { version = "0.1", optional = true }
[features]
default = []
trace-collection = ["tokio", "reqwest", "tracing"]
[dev-dependencies]
pretty_assertions = "1.4.1"

View file

@ -90,6 +90,8 @@ pub struct Overrides {
pub struct Tracing {
pub sampling_rate: Option<f64>,
pub trace_arch_internal: Option<bool>,
pub random_sampling: Option<u32>,
pub opentracing_grpc_endpoint: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash, Default)]

View file

@ -30,7 +30,6 @@ pub const ARCH_MODEL_PREFIX: &str = "Arch";
pub const HALLUCINATION_TEMPLATE: &str =
"It seems I'm missing some information. Could you provide the following details ";
pub const OTEL_COLLECTOR_HTTP: &str = "opentelemetry_collector_http";
pub const OTEL_POST_PATH: &str = "/v1/traces";
pub const LLM_ROUTE_HEADER: &str = "x-arch-llm-route";
pub const ENVOY_RETRY_HEADER: &str = "x-envoy-max-retries";
pub const BRIGHT_STAFF_SERVICE_NAME: &str = "brightstaff";

View file

@ -1,292 +0,0 @@
use super::resource_span_builder::ResourceSpanBuilder;
use super::shapes::Span;
use std::collections::{HashMap, VecDeque};
use std::sync::Arc;
use tokio::sync::Mutex;
use tokio::time::{interval, Duration};
use tracing::{debug, error, warn};
/// Parse W3C traceparent header into trace_id and parent_span_id
/// Format: "00-{trace_id}-{parent_span_id}-01"
///
/// Returns (trace_id, Option<parent_span_id>)
/// - parent_span_id is None if it's all zeros (0000000000000000), indicating a root span
pub fn parse_traceparent(traceparent: &str) -> (String, Option<String>) {
let parts: Vec<&str> = traceparent.split('-').collect();
if parts.len() == 4 {
let trace_id = parts[1].to_string();
let parent_span_id = parts[2].to_string();
// If parent_span_id is all zeros, this is a root span with no parent
let parent = if parent_span_id == "0000000000000000" {
None
} else {
Some(parent_span_id)
};
(trace_id, parent)
} else {
warn!("Invalid traceparent format: {}", traceparent);
// Return empty trace ID and None for parent if parsing fails
(String::new(), None)
}
}
/// Collects and batches spans, flushing them to an OTEL collector
///
/// Supports multiple services, with each service (e.g., "archgw(routing)", "archgw(llm)")
/// maintaining its own span queue. Flushes all services together periodically.
///
/// Tracing can be enabled/disabled in two ways:
/// 1. Via arch_config.yaml: presence of `tracing` configuration section
/// 2. Via environment variable: `OTEL_TRACING_ENABLED=true/false`
///
/// When disabled, span recording and flushing are no-ops.
pub struct TraceCollector {
/// Spans grouped by service name
/// Key: service name (e.g., "archgw(routing)", "archgw(llm)")
/// Value: queue of spans for that service
spans_by_service: Arc<Mutex<HashMap<String, VecDeque<Span>>>>,
flush_interval: Duration,
otel_url: String,
/// Whether tracing is enabled
enabled: bool,
}
impl TraceCollector {
/// Create a new trace collector
///
/// # Arguments
/// * `enabled` - Whether tracing is enabled
/// - `Some(true)` - Force enable tracing
/// - `Some(false)` - Force disable tracing
/// - `None` - Check `OTEL_TRACING_ENABLED` env var (defaults to true if not set)
///
/// Other parameters are read from environment variables:
/// - `TRACE_FLUSH_INTERVAL_MS` - Flush interval in milliseconds (default: 1000)
/// - `OTEL_COLLECTOR_URL` - OTEL collector endpoint (default: http://localhost:9903/v1/traces)
pub fn new(enabled: Option<bool>) -> Self {
let flush_interval_ms = std::env::var("TRACE_FLUSH_INTERVAL_MS")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(1000);
let otel_url = std::env::var("OTEL_COLLECTOR_URL")
.unwrap_or_else(|_| "http://localhost:9903/v1/traces".to_string());
// Determine if tracing is enabled:
// 1. Use explicit parameter if provided
// 2. Otherwise check OTEL_TRACING_ENABLED env var
// 3. Default to false if neither is set (tracing opt-in, not opt-out)
let enabled = enabled.unwrap_or_else(|| {
std::env::var("OTEL_TRACING_ENABLED")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(false)
});
debug!(
"TraceCollector initialized: flush_interval={}ms, url={}, enabled={}",
flush_interval_ms, otel_url, enabled
);
Self {
spans_by_service: Arc::new(Mutex::new(HashMap::new())),
flush_interval: Duration::from_millis(flush_interval_ms),
otel_url,
enabled,
}
}
/// Record a span for a specific service
///
/// # Arguments
/// * `service_name` - Name of the service (e.g., "archgw(routing)", "archgw(llm)")
/// * `span` - The span to record
pub fn record_span(&self, service_name: impl Into<String>, span: Span) {
// Skip recording if tracing is disabled
if !self.enabled {
return;
}
let service_name = service_name.into();
// Use try_lock to avoid blocking in async contexts
// If the lock is held, we skip recording (telemetry shouldn't block the app)
if let Ok(mut spans_by_service) = self.spans_by_service.try_lock() {
// Get or create the queue for this service
let spans = spans_by_service
.entry(service_name)
.or_insert_with(VecDeque::new);
spans.push_back(span);
} else {
// Lock contention - skip recording this span
debug!("Skipped span recording due to lock contention");
}
// Flushing is handled by the periodic background flusher (see `start_background_flusher`).
}
/// Flush all buffered spans to the OTEL collector
/// Builds ResourceSpans for each service with spans
pub async fn flush(&self) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
// Skip flushing if tracing is disabled
if !self.enabled {
return Ok(());
}
let mut spans_by_service = self.spans_by_service.lock().await;
if spans_by_service.is_empty() {
return Ok(());
}
// Snapshot and drain all services' spans
let service_batches: Vec<(String, Vec<Span>)> = spans_by_service
.iter_mut()
.filter_map(|(service_name, spans)| {
if spans.is_empty() {
None
} else {
Some((service_name.clone(), spans.drain(..).collect()))
}
})
.collect();
drop(spans_by_service); // Release lock before HTTP call
if service_batches.is_empty() {
return Ok(());
}
let total_spans: usize = service_batches.iter().map(|(_, spans)| spans.len()).sum();
debug!(
"Flushing {} spans across {} services to OTEL collector",
total_spans,
service_batches.len()
);
// Build canonical OTEL payload structure - one ResourceSpan per service
let resource_spans = self.build_resource_spans(service_batches);
match self.send_to_otel(resource_spans).await {
Ok(_) => {
debug!("Successfully flushed {} spans", total_spans);
Ok(())
}
Err(e) => {
warn!("Failed to send spans to OTEL collector: {:?}", e);
Err(e)
}
}
}
/// Build OTEL-compliant resource spans from collected spans, one ResourceSpan per service
fn build_resource_spans(
&self,
service_batches: Vec<(String, Vec<Span>)>,
) -> Vec<super::shapes::ResourceSpan> {
service_batches
.into_iter()
.map(|(service_name, spans)| {
ResourceSpanBuilder::new(&service_name)
.add_spans(spans)
.build()
})
.collect()
}
/// Send resource spans to OTEL collector
/// Serializes as {"resourceSpans": [...]} per OTEL spec
async fn send_to_otel(
&self,
resource_spans: Vec<super::shapes::ResourceSpan>,
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let client = reqwest::Client::new();
// Create OTEL payload with proper structure
let payload = serde_json::json!({
"resourceSpans": resource_spans
});
let response = client
.post(&self.otel_url)
.header("Content-Type", "application/json")
.json(&payload)
.timeout(Duration::from_secs(5))
.send()
.await?;
if !response.status().is_success() {
warn!(
"OTEL collector returned non-success status: {}",
response.status()
);
return Err(format!("OTEL collector error: {}", response.status()).into());
}
Ok(())
}
/// Start a background task that periodically flushes traces
/// Returns a join handle that can be used to stop the flusher
pub fn start_background_flusher(self: Arc<Self>) -> tokio::task::JoinHandle<()> {
let flush_interval = self.flush_interval;
tokio::spawn(async move {
let mut ticker = interval(flush_interval);
loop {
ticker.tick().await;
if let Err(e) = self.flush().await {
error!("Background trace flush failed: {:?}", e);
}
}
})
}
/// Get current number of buffered spans across all services (for testing/monitoring)
pub async fn buffered_count(&self) -> usize {
self.spans_by_service
.lock()
.await
.values()
.map(|spans| spans.len())
.sum()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::traces::SpanBuilder;
#[tokio::test]
async fn test_collector_basic() {
let collector = TraceCollector::new(Some(true));
let span = SpanBuilder::new("test_operation")
.with_trace_id("abc123")
.build();
collector.record_span("test-service", span);
assert_eq!(collector.buffered_count().await, 1);
}
#[tokio::test]
async fn test_collector_auto_flush() {
// Since batch-triggered flush behavior was removed, record two spans and verify both are buffered
let collector = Arc::new(TraceCollector::new(Some(true)));
let span1 = SpanBuilder::new("test1").build();
let span2 = SpanBuilder::new("test2").build();
collector.record_span("test-service", span1);
collector.record_span("test-service", span2);
// With no batch-triggered flush, both spans should remain buffered
assert_eq!(collector.buffered_count().await, 2);
}
}

View file

@ -5,12 +5,6 @@ mod constants;
mod resource_span_builder;
mod span_builder;
#[cfg(feature = "trace-collection")]
mod collector;
#[cfg(all(test, feature = "trace-collection"))]
mod tests;
// Re-export original types
pub use shapes::{
Attribute, AttributeValue, Event, Resource, ResourceSpan, Scope, ScopeSpan, Span, Traceparent,
@ -21,6 +15,3 @@ pub use shapes::{
pub use constants::*;
pub use resource_span_builder::ResourceSpanBuilder;
pub use span_builder::{generate_random_span_id, SpanBuilder, SpanKind};
#[cfg(feature = "trace-collection")]
pub use collector::{parse_traceparent, TraceCollector};

View file

@ -1,96 +0,0 @@
//! Mock OTEL Collector for testing trace output
//!
//! This module provides a simple HTTP server that mimics an OTEL collector.
//! It exposes three endpoints:
//! - POST /v1/traces: Capture incoming OTLP JSON payloads
//! - GET /v1/traces: Return all captured payloads as JSON array
//! - DELETE /v1/traces: Clear all captured payloads
//!
//! Each test creates its own MockOtelCollector instance.
use axum::{
extract::State,
http::StatusCode,
routing::{delete, get, post},
Json, Router,
};
use serde_json::Value;
use std::sync::Arc;
use tokio::sync::RwLock;
type SharedTraces = Arc<RwLock<Vec<Value>>>;
/// POST /v1/traces - capture incoming OTLP payload
async fn post_traces(State(traces): State<SharedTraces>, Json(payload): Json<Value>) -> StatusCode {
traces.write().await.push(payload);
StatusCode::OK
}
/// GET /v1/traces - return all captured payloads
async fn get_traces(State(traces): State<SharedTraces>) -> Json<Vec<Value>> {
Json(traces.read().await.clone())
}
/// DELETE /v1/traces - clear all captured payloads
async fn delete_traces(State(traces): State<SharedTraces>) -> StatusCode {
traces.write().await.clear();
StatusCode::NO_CONTENT
}
/// Mock OTEL collector server
pub struct MockOtelCollector {
address: String,
client: reqwest::Client,
#[allow(dead_code)]
server_handle: tokio::task::JoinHandle<()>,
}
impl MockOtelCollector {
/// Create and start a new mock collector on a random port
pub async fn start() -> Self {
let traces = Arc::new(RwLock::new(Vec::new()));
let app = Router::new()
.route("/v1/traces", post(post_traces))
.route("/v1/traces", get(get_traces))
.route("/v1/traces", delete(delete_traces))
.with_state(traces.clone());
let listener = tokio::net::TcpListener::bind("127.0.0.1:0")
.await
.expect("Failed to bind to random port");
let addr = listener.local_addr().expect("Failed to get local address");
let address = format!("http://127.0.0.1:{}", addr.port());
let server_handle = tokio::spawn(async move {
axum::serve(listener, app).await.expect("Server failed");
});
// Give server a moment to start
tokio::time::sleep(tokio::time::Duration::from_millis(50)).await;
Self {
address,
client: reqwest::Client::new(),
server_handle,
}
}
/// Get the address of the collector
pub fn address(&self) -> &str {
&self.address
}
/// GET /v1/traces - fetch all captured payloads
pub async fn get_traces(&self) -> Vec<Value> {
self.client
.get(format!("{}/v1/traces", self.address))
.send()
.await
.expect("Failed to GET traces")
.json()
.await
.expect("Failed to parse traces JSON")
}
}

View file

@ -1,4 +0,0 @@
mod mock_otel_collector;
mod trace_integration_test;
pub use mock_otel_collector::MockOtelCollector;

View file

@ -1,342 +0,0 @@
//! Integration tests for OpenTelemetry tracing in router.rs
//!
//! These tests validate that the spans created for LLM requests contain
//! all expected attributes and events by checking the raw JSON payloads
//! sent to the mock OTEL collector.
//!
//! ## Test Design
//! Each test creates its own MockOtelCollector and TraceCollector:
//! 1. Start MockOtelCollector on random port
//! 2. Create TraceCollector with 500ms flush interval
//! 3. Record spans using TraceCollector
//! 4. Flush and wait (500ms + 200ms buffer = 700ms total) for spans to arrive
//! 5. Get raw JSON payloads (GET /v1/traces) and validate structure
//! 6. Test cleanup happens automatically when collectors are dropped
//!
//! ## Serial Execution
//! Tests use the `#[serial]` attribute to run sequentially because they
//! use global environment variables (OTEL_COLLECTOR_URL, OTEL_TRACING_ENABLED,
//! TRACE_FLUSH_INTERVAL_MS). This ensures test isolation without requiring
//! the `--test-threads=1` command line flag.
const FLUSH_INTERVAL_MS: u64 = 50;
const FLUSH_BUFFER_MS: u64 = 50;
const TOTAL_WAIT_MS: u64 = FLUSH_INTERVAL_MS + FLUSH_BUFFER_MS;
use crate::traces::{SpanBuilder, SpanKind, TraceCollector};
use serde_json::Value;
use serial_test::serial;
use std::sync::Arc;
use super::MockOtelCollector;
/// Helper to extract all spans from OTLP JSON payloads
fn extract_spans(payloads: &[Value]) -> Vec<&Value> {
let mut spans = Vec::new();
for payload in payloads {
if let Some(resource_spans) = payload.get("resourceSpans").and_then(|v| v.as_array()) {
for resource_span in resource_spans {
if let Some(scope_spans) =
resource_span.get("scopeSpans").and_then(|v| v.as_array())
{
for scope_span in scope_spans {
if let Some(span_list) = scope_span.get("spans").and_then(|v| v.as_array())
{
spans.extend(span_list.iter());
}
}
}
}
}
}
spans
}
/// Helper to get string attribute value from a span
fn get_string_attr<'a>(span: &'a Value, key: &str) -> Option<&'a str> {
span.get("attributes")
.and_then(|attrs| attrs.as_array())
.and_then(|attrs| {
attrs
.iter()
.find(|attr| attr.get("key").and_then(|k| k.as_str()) == Some(key))
})
.and_then(|attr| attr.get("value"))
.and_then(|v| v.get("stringValue"))
.and_then(|v| v.as_str())
}
#[tokio::test]
#[serial]
async fn test_llm_span_contains_basic_attributes() {
// Start mock OTEL collector
let mock_collector = MockOtelCollector::start().await;
// Create TraceCollector pointing to mock with 500ms flush intervalc
std::env::set_var(
"OTEL_COLLECTOR_URL",
format!("{}/v1/traces", mock_collector.address()),
);
std::env::set_var("OTEL_TRACING_ENABLED", "true");
std::env::set_var("TRACE_FLUSH_INTERVAL_MS", "500");
let trace_collector = Arc::new(TraceCollector::new(Some(true)));
// Create a test span simulating router.rs behavior
let span = SpanBuilder::new("POST /v1/chat/completions >> /v1/chat/completions")
.with_kind(SpanKind::Client)
.with_trace_id("test-trace-123")
.with_attribute("http.method", "POST")
.with_attribute("http.target", "/v1/chat/completions")
.with_attribute("http.upstream_target", "/v1/chat/completions")
.with_attribute("llm.model", "gpt-4o")
.with_attribute("llm.provider", "openai")
.with_attribute("llm.is_streaming", "true")
.with_attribute("llm.temperature", "0.7")
.build();
trace_collector.record_span("archgw(llm)", span);
// Flush and wait for spans to arrive (500ms flush interval + 200ms buffer)
trace_collector.flush().await.expect("Failed to flush");
tokio::time::sleep(tokio::time::Duration::from_millis(TOTAL_WAIT_MS)).await;
let payloads = mock_collector.get_traces().await;
let spans = extract_spans(&payloads);
assert_eq!(spans.len(), 1, "Expected exactly one span");
let span = spans[0];
// Validate HTTP attributes
assert_eq!(get_string_attr(span, "http.method"), Some("POST"));
assert_eq!(
get_string_attr(span, "http.target"),
Some("/v1/chat/completions")
);
// Validate LLM attributes
assert_eq!(get_string_attr(span, "llm.model"), Some("gpt-4o"));
assert_eq!(get_string_attr(span, "llm.provider"), Some("openai"));
assert_eq!(get_string_attr(span, "llm.is_streaming"), Some("true"));
assert_eq!(get_string_attr(span, "llm.temperature"), Some("0.7"));
}
#[tokio::test]
#[serial]
async fn test_llm_span_contains_tool_information() {
let mock_collector = MockOtelCollector::start().await;
std::env::set_var(
"OTEL_COLLECTOR_URL",
format!("{}/v1/traces", mock_collector.address()),
);
std::env::set_var("OTEL_TRACING_ENABLED", "true");
std::env::set_var("TRACE_FLUSH_INTERVAL_MS", "500");
let trace_collector = Arc::new(TraceCollector::new(Some(true)));
let tools_formatted = "get_weather(...)\nsearch_web(...)\ncalculate(...)";
let span = SpanBuilder::new("POST /v1/chat/completions")
.with_trace_id("test-trace-tools")
.with_attribute("llm.request.tools", tools_formatted)
.with_attribute("llm.model", "gpt-4o")
.build();
trace_collector.record_span("archgw(llm)", span);
trace_collector.flush().await.expect("Failed to flush");
tokio::time::sleep(tokio::time::Duration::from_millis(TOTAL_WAIT_MS)).await;
let payloads = mock_collector.get_traces().await;
let spans = extract_spans(&payloads);
assert!(!spans.is_empty(), "No spans captured");
let span = spans[0];
let tools = get_string_attr(span, "llm.request.tools");
assert!(tools.is_some(), "Tools attribute missing");
assert!(tools.unwrap().contains("get_weather(...)"));
assert!(tools.unwrap().contains("search_web(...)"));
assert!(tools.unwrap().contains("calculate(...)"));
assert!(
tools.unwrap().contains('\n'),
"Tools should be newline-separated"
);
}
#[tokio::test]
#[serial]
async fn test_llm_span_contains_user_message_preview() {
let mock_collector = MockOtelCollector::start().await;
std::env::set_var(
"OTEL_COLLECTOR_URL",
format!("{}/v1/traces", mock_collector.address()),
);
std::env::set_var("OTEL_TRACING_ENABLED", "true");
std::env::set_var("TRACE_FLUSH_INTERVAL_MS", "500");
let trace_collector = Arc::new(TraceCollector::new(Some(true)));
let long_message =
"This is a very long user message that should be truncated to 50 characters in the span";
let preview = if long_message.len() > 50 {
format!("{}...", &long_message[..50])
} else {
long_message.to_string()
};
let span = SpanBuilder::new("POST /v1/messages")
.with_trace_id("test-trace-preview")
.with_attribute("llm.request.user_message_preview", &preview)
.build();
trace_collector.record_span("archgw(llm)", span);
trace_collector.flush().await.expect("Failed to flush");
tokio::time::sleep(tokio::time::Duration::from_millis(TOTAL_WAIT_MS)).await;
let payloads = mock_collector.get_traces().await;
let spans = extract_spans(&payloads);
let span = spans[0];
let message_preview = get_string_attr(span, "llm.request.user_message_preview");
assert!(message_preview.is_some());
assert!(message_preview.unwrap().len() <= 53); // 50 chars + "..."
assert!(message_preview.unwrap().contains("..."));
}
#[tokio::test]
#[serial]
async fn test_llm_span_contains_time_to_first_token() {
let mock_collector = MockOtelCollector::start().await;
std::env::set_var(
"OTEL_COLLECTOR_URL",
format!("{}/v1/traces", mock_collector.address()),
);
std::env::set_var("OTEL_TRACING_ENABLED", "true");
std::env::set_var("TRACE_FLUSH_INTERVAL_MS", "500");
let trace_collector = Arc::new(TraceCollector::new(Some(true)));
let ttft_ms = "245"; // milliseconds as string
let span = SpanBuilder::new("POST /v1/chat/completions")
.with_trace_id("test-trace-ttft")
.with_attribute("llm.is_streaming", "true")
.with_attribute("llm.time_to_first_token_ms", ttft_ms)
.build();
trace_collector.record_span("archgw(llm)", span);
trace_collector.flush().await.expect("Failed to flush");
tokio::time::sleep(tokio::time::Duration::from_millis(TOTAL_WAIT_MS)).await;
let payloads = mock_collector.get_traces().await;
let spans = extract_spans(&payloads);
let span = spans[0];
// Check TTFT attribute
let ttft_attr = get_string_attr(span, "llm.time_to_first_token_ms");
assert_eq!(ttft_attr, Some("245"));
}
#[tokio::test]
#[serial]
async fn test_llm_span_contains_upstream_path() {
let mock_collector = MockOtelCollector::start().await;
std::env::set_var(
"OTEL_COLLECTOR_URL",
format!("{}/v1/traces", mock_collector.address()),
);
std::env::set_var("OTEL_TRACING_ENABLED", "true");
std::env::set_var("TRACE_FLUSH_INTERVAL_MS", "500");
let trace_collector = Arc::new(TraceCollector::new(Some(true)));
// Test Zhipu provider with path transformation
let span = SpanBuilder::new("POST /v1/chat/completions >> /api/paas/v4/chat/completions")
.with_trace_id("test-trace-upstream")
.with_attribute("http.upstream_target", "/api/paas/v4/chat/completions")
.with_attribute("llm.provider", "zhipu")
.with_attribute("llm.model", "glm-4")
.build();
trace_collector.record_span("archgw(llm)", span);
trace_collector.flush().await.expect("Failed to flush");
tokio::time::sleep(tokio::time::Duration::from_millis(TOTAL_WAIT_MS)).await;
let payloads = mock_collector.get_traces().await;
let spans = extract_spans(&payloads);
let span = spans[0];
// Operation name should show the transformation
let name = span.get("name").and_then(|v| v.as_str());
assert!(name.is_some());
assert!(
name.unwrap().contains(">>"),
"Operation name should show path transformation"
);
// Check upstream target attribute
let upstream = get_string_attr(span, "http.upstream_target");
assert_eq!(upstream, Some("/api/paas/v4/chat/completions"));
}
#[tokio::test]
#[serial]
async fn test_llm_span_multiple_services() {
let mock_collector = MockOtelCollector::start().await;
std::env::set_var(
"OTEL_COLLECTOR_URL",
format!("{}/v1/traces", mock_collector.address()),
);
std::env::set_var("OTEL_TRACING_ENABLED", "true");
std::env::set_var("TRACE_FLUSH_INTERVAL_MS", "500");
let trace_collector = Arc::new(TraceCollector::new(Some(true)));
// Create spans for different services
let llm_span = SpanBuilder::new("LLM Request")
.with_trace_id("test-multi")
.with_attribute("service", "llm")
.build();
let routing_span = SpanBuilder::new("Routing Decision")
.with_trace_id("test-multi")
.with_attribute("service", "routing")
.build();
trace_collector.record_span("archgw(llm)", llm_span);
trace_collector.record_span("archgw(routing)", routing_span);
trace_collector.flush().await.expect("Failed to flush");
tokio::time::sleep(tokio::time::Duration::from_millis(TOTAL_WAIT_MS)).await;
let payloads = mock_collector.get_traces().await;
let all_spans = extract_spans(&payloads);
assert_eq!(all_spans.len(), 2, "Should have captured both spans");
}
#[tokio::test]
#[serial]
async fn test_tracing_disabled_produces_no_spans() {
let mock_collector = MockOtelCollector::start().await;
// Create TraceCollector with tracing DISABLED
std::env::set_var(
"OTEL_COLLECTOR_URL",
format!("{}/v1/traces", mock_collector.address()),
);
std::env::set_var("OTEL_TRACING_ENABLED", "false");
std::env::set_var("TRACE_FLUSH_INTERVAL_MS", "500");
let trace_collector = Arc::new(TraceCollector::new(Some(false)));
let span = SpanBuilder::new("Test Span")
.with_trace_id("test-disabled")
.build();
trace_collector.record_span("archgw(llm)", span);
trace_collector.flush().await.ok(); // Should be no-op when disabled
tokio::time::sleep(tokio::time::Duration::from_millis(TOTAL_WAIT_MS)).await;
let payloads = mock_collector.get_traces().await;
let all_spans = extract_spans(&payloads);
assert_eq!(
all_spans.len(),
0,
"No spans should be captured when tracing is disabled"
);
}

View file

@ -74,7 +74,7 @@ impl RootContext for FilterContext {
fn create_http_context(&self, context_id: u32) -> Option<Box<dyn HttpContext>> {
trace!(
"||| create_http_context called with context_id: {:?} |||",
"create_http_context called with context_id: {:?}",
context_id
);
@ -107,10 +107,7 @@ impl Context for FilterContext {
_body_size: usize,
_num_trailers: usize,
) {
trace!(
"||| on_http_call_response called with token_id: {:?} |||",
token_id
);
trace!("on_http_call_response called with token_id: {:?}", token_id);
let _callout_data = self
.callouts
@ -119,7 +116,7 @@ impl Context for FilterContext {
.expect("invalid token_id");
if let Some(status) = self.get_http_call_response_header(":status") {
trace!("trace response status: {:?}", status);
trace!("response status: {:?}", status);
};
}
}

View file

@ -97,7 +97,7 @@ impl StreamContext {
.as_ref()
.filter(|id| !id.is_empty())
.cloned()
.unwrap_or_else(|| "NO_REQUEST_ID".to_string())
.unwrap_or_else(|| "no_request_id".to_string())
}
fn llm_provider(&self) -> &LlmProvider {
self.llm_provider
@ -145,14 +145,14 @@ impl StreamContext {
match self.llm_providers.default() {
Some(default_provider) => {
info!(
"[PLANO_REQ_ID:{}] Provider selection failed, using default provider",
"request_id={}: provider selection failed, using default provider",
self.request_identifier()
);
default_provider
}
None => {
error!(
"[PLANO_REQ_ID:{}] PROVIDER_SELECTION_FAILED: Error='{}' and no default provider configured",
"request_id={}: provider selection failed, error='{}' and no default provider configured",
self.request_identifier(),
err
);
@ -165,7 +165,7 @@ impl StreamContext {
self.llm_provider = Some(provider);
info!(
"[PLANO_REQ_ID:{}] PROVIDER_SELECTION: Hint='{}' -> Selected='{}'",
"request_id={}: provider selected, hint='{}' selected='{}'",
self.request_identifier(),
self.get_http_request_header(ARCH_PROVIDER_HINT_HEADER)
.unwrap_or("none".to_string()),
@ -180,12 +180,12 @@ impl StreamContext {
// Check if client provided an Authorization header
if self.get_http_request_header("Authorization").is_none() {
warn!(
"[PLANO_REQ_ID:{}] AUTH_PASSTHROUGH: passthrough_auth enabled but no Authorization header present in client request",
"request_id={}: passthrough_auth enabled but no authorization header present in client request",
self.request_identifier()
);
} else {
debug!(
"[PLANO_REQ_ID:{}] AUTH_PASSTHROUGH: preserving client Authorization header for provider '{}'",
"request_id={}: preserving client authorization header for provider '{}'",
self.request_identifier(),
self.llm_provider().name
);
@ -249,7 +249,11 @@ impl StreamContext {
}
fn send_server_error(&self, error: ServerError, override_status_code: Option<StatusCode>) {
warn!("server error occurred: {}", error);
warn!(
"request_id={}: server error occurred: {}",
self.request_identifier(),
error
);
self.send_http_response(
override_status_code
.unwrap_or(StatusCode::INTERNAL_SERVER_ERROR)
@ -269,7 +273,7 @@ impl StreamContext {
let token_count = tokenizer::token_count(model, json_string).unwrap_or(0);
debug!(
"[PLANO_REQ_ID:{}] TOKEN_COUNT: model='{}' input_tokens={}",
"request_id={}: token count, model='{}' input_tokens={}",
self.request_identifier(),
model,
token_count
@ -283,7 +287,7 @@ impl StreamContext {
// Check if rate limiting needs to be applied.
if let Some(selector) = self.ratelimit_selector.take() {
info!(
"[PLANO_REQ_ID:{}] RATELIMIT_CHECK: model='{}' selector='{}:{}'",
"request_id={}: ratelimit check, model='{}' selector='{}:{}'",
self.request_identifier(),
model,
selector.key,
@ -296,7 +300,7 @@ impl StreamContext {
)?;
} else {
debug!(
"[PLANO_REQ_ID:{}] RATELIMIT_SKIP: model='{}' (no selector)",
"request_id={}: ratelimit skip, model='{}' (no selector)",
self.request_identifier(),
model
);
@ -315,7 +319,7 @@ impl StreamContext {
Ok(duration) => {
let duration_ms = duration.as_millis();
info!(
"[PLANO_REQ_ID:{}] TIME_TO_FIRST_TOKEN: {}ms",
"request_id={}: time to first token {}ms",
self.request_identifier(),
duration_ms
);
@ -324,7 +328,7 @@ impl StreamContext {
}
Err(e) => {
warn!(
"[PLANO_REQ_ID:{}] TIME_MEASUREMENT_ERROR: {:?}",
"request_id={}: time measurement error: {:?}",
self.request_identifier(),
e
);
@ -340,7 +344,7 @@ impl StreamContext {
// Convert the duration to milliseconds
let duration_ms = duration.as_millis();
info!(
"[PLANO_REQ_ID:{}] REQUEST_COMPLETE: latency={}ms tokens={}",
"request_id={}: request complete, latency={}ms tokens={}",
self.request_identifier(),
duration_ms,
self.response_tokens
@ -356,7 +360,7 @@ impl StreamContext {
self.metrics.time_per_output_token.record(tpot);
info!(
"[PLANO_REQ_ID:{}] TOKEN_THROUGHPUT: time_per_token={}ms tokens_per_second={}",
"request_id={}: token throughput, time_per_token={}ms tokens_per_second={}",
self.request_identifier(),
tpot,
1000 / tpot
@ -366,7 +370,11 @@ impl StreamContext {
}
}
Err(e) => {
warn!("SystemTime error: {:?}", e);
warn!(
"request_id={}: system time error: {:?}",
self.request_identifier(),
e
);
}
}
// Record the output sequence length
@ -379,7 +387,7 @@ impl StreamContext {
if self.streaming_response {
let chunk_size = body_size;
debug!(
"[PLANO_REQ_ID:{}] UPSTREAM_RESPONSE_CHUNK: streaming=true chunk_size={}",
"request_id={}: upstream response chunk, streaming=true chunk_size={}",
self.request_identifier(),
chunk_size
);
@ -387,7 +395,7 @@ impl StreamContext {
Some(chunk) => chunk,
None => {
warn!(
"[PLANO_REQ_ID:{}] UPSTREAM_RESPONSE_ERROR: empty chunk, size={}",
"request_id={}: upstream response error, empty chunk size={}",
self.request_identifier(),
chunk_size
);
@ -397,7 +405,7 @@ impl StreamContext {
if streaming_chunk.len() != chunk_size {
warn!(
"[PLANO_REQ_ID:{}] UPSTREAM_RESPONSE_MISMATCH: expected={} actual={}",
"request_id={}: upstream response size mismatch, expected={} actual={}",
self.request_identifier(),
chunk_size,
streaming_chunk.len()
@ -409,14 +417,17 @@ impl StreamContext {
return Err(Action::Continue);
}
debug!(
"[PLANO_REQ_ID:{}] UPSTREAM_RESPONSE_COMPLETE: streaming=false body_size={}",
"request_id={}: upstream response complete, streaming=false body_size={}",
self.request_identifier(),
body_size
);
match self.get_http_response_body(0, body_size) {
Some(body) => Ok(body),
None => {
warn!("non streaming response body empty");
warn!(
"request_id={}: non streaming response body empty",
self.request_identifier()
);
Err(Action::Continue)
}
}
@ -429,7 +440,7 @@ impl StreamContext {
provider_id: ProviderId,
) -> Result<Vec<u8>, Action> {
debug!(
"[PLANO_REQ_ID:{}] STREAMING_PROCESS: client={:?} provider_id={:?} chunk_size={}",
"request_id={}: streaming process, client={:?} provider_id={:?} chunk_size={}",
self.request_identifier(),
self.client_api,
provider_id,
@ -460,7 +471,11 @@ impl StreamContext {
{
Ok(buffer) => Some(buffer),
Err(e) => {
warn!("Failed to create SSE buffer: {}", e);
warn!(
"request_id={}: failed to create sse buffer: {}",
self.request_identifier(),
e
);
return Err(Action::Continue);
}
};
@ -477,7 +492,7 @@ impl StreamContext {
Ok(events) => {
if has_buffered {
debug!(
"[PLANO_REQ_ID:{}] SSE_INCOMPLETE_BUFFERED: {} bytes buffered for next chunk",
"request_id={}: sse incomplete buffered, {} bytes buffered for next chunk",
self.request_identifier(),
buffered_size
);
@ -486,7 +501,7 @@ impl StreamContext {
}
Err(e) => {
warn!(
"[PLANO_REQ_ID:{}] SSE_CHUNK_PROCESS_ERROR: {}",
"request_id={}: sse chunk process error: {}",
self.request_identifier(),
e
);
@ -495,7 +510,10 @@ impl StreamContext {
}
}
None => {
warn!("SSE chunk processor unexpectedly missing");
warn!(
"request_id={}: sse chunk processor unexpectedly missing",
self.request_identifier()
);
return Err(Action::Continue);
}
};
@ -510,7 +528,7 @@ impl StreamContext {
if provider_response.is_final() {
debug!(
"[PLANO_REQ_ID:{}] STREAMING_FINAL_CHUNK: total_tokens={}",
"request_id={}: streaming final chunk, total_tokens={}",
self.request_identifier(),
self.response_tokens
);
@ -520,7 +538,7 @@ impl StreamContext {
let estimated_tokens = content.len() / 4;
self.response_tokens += estimated_tokens.max(1);
debug!(
"[PLANO_REQ_ID:{}] STREAMING_TOKEN_UPDATE: delta_chars={} estimated_tokens={} total_tokens={}",
"request_id={}: streaming token update, delta_chars={} estimated_tokens={} total_tokens={}",
self.request_identifier(),
content.len(),
estimated_tokens.max(1),
@ -530,7 +548,7 @@ impl StreamContext {
}
Err(e) => {
warn!(
"[PLANO_REQ_ID:{}] STREAMING_CHUNK_ERROR: {}",
"request_id={}: streaming chunk error: {}",
self.request_identifier(),
e
);
@ -552,7 +570,7 @@ impl StreamContext {
if !bytes.is_empty() {
let content = String::from_utf8_lossy(&bytes);
debug!(
"[PLANO_REQ_ID:{}] UPSTREAM_TRANSFORMED_CLIENT_RESPONSE: size={} content={}",
"request_id={}: upstream transformed client response, size={} content={}",
self.request_identifier(),
bytes.len(),
content
@ -561,13 +579,19 @@ impl StreamContext {
Ok(bytes)
}
None => {
warn!("SSE buffer unexpectedly missing after initialization");
warn!(
"request_id={}: sse buffer unexpectedly missing after initialization",
self.request_identifier()
);
Err(Action::Continue)
}
}
}
None => {
warn!("Missing client_api for non-streaming response");
warn!(
"request_id={}: missing client_api for non-streaming response",
self.request_identifier()
);
Err(Action::Continue)
}
}
@ -590,7 +614,7 @@ impl StreamContext {
Ok(buffer) => Some(buffer),
Err(e) => {
warn!(
"[PLANO_REQ_ID:{}] BEDROCK_BUFFER_INIT_ERROR: {}",
"request_id={}: bedrock buffer init error: {}",
self.request_identifier(),
e
);
@ -620,7 +644,7 @@ impl StreamContext {
let estimated_tokens = content.len() / 4;
self.response_tokens += estimated_tokens.max(1);
debug!(
"[PLANO_REQ_ID:{}] BEDROCK_TOKEN_UPDATE: delta_chars={} estimated_tokens={} total_tokens={}",
"request_id={}: bedrock token update, delta_chars={} estimated_tokens={} total_tokens={}",
self.request_identifier(),
content.len(),
estimated_tokens.max(1),
@ -638,7 +662,7 @@ impl StreamContext {
}
Err(e) => {
warn!(
"[PLANO_REQ_ID:{}] BEDROCK_FRAME_CONVERSION_ERROR: {}",
"request_id={}: bedrock frame conversion error: {}",
self.request_identifier(),
e
);
@ -648,7 +672,7 @@ impl StreamContext {
Some(DecodedFrame::Incomplete) => {
// Incomplete frame - buffer retains partial data, wait for more bytes
debug!(
"[PLANO_REQ_ID:{}] BEDROCK_INCOMPLETE_FRAME: waiting for more data",
"request_id={}: bedrock incomplete frame, waiting for more data",
self.request_identifier()
);
break;
@ -656,7 +680,7 @@ impl StreamContext {
None => {
// Decode error
warn!(
"[PLANO_REQ_ID:{}] BEDROCK_DECODE_ERROR",
"request_id={}: bedrock decode error",
self.request_identifier()
);
return Err(Action::Continue);
@ -671,7 +695,7 @@ impl StreamContext {
if !bytes.is_empty() {
let content = String::from_utf8_lossy(&bytes);
debug!(
"[PLANO_REQ_ID:{}] UPSTREAM_TRANSFORMED_CLIENT_RESPONSE: size={} content={}",
"request_id={}: upstream transformed client response, size={} content={}",
self.request_identifier(),
bytes.len(),
content
@ -681,7 +705,7 @@ impl StreamContext {
}
None => {
warn!(
"[PLANO_REQ_ID:{}] BEDROCK_BUFFER_MISSING",
"request_id={}: bedrock buffer missing",
self.request_identifier()
);
Err(Action::Continue)
@ -695,7 +719,7 @@ impl StreamContext {
provider_id: ProviderId,
) -> Result<Vec<u8>, Action> {
debug!(
"[PLANO_REQ_ID:{}] NON_STREAMING_PROCESS: provider_id={:?} body_size={}",
"request_id={}: non-streaming process, provider_id={:?} body_size={}",
self.request_identifier(),
provider_id,
body.len()
@ -707,7 +731,7 @@ impl StreamContext {
Ok(response) => response,
Err(e) => {
warn!(
"[PLANO_REQ_ID:{}] UPSTREAM_RESPONSE_PARSE_ERROR: {} | body: {}",
"request_id={}: upstream response parse error: {} | body: {}",
self.request_identifier(),
e,
String::from_utf8_lossy(body)
@ -722,7 +746,7 @@ impl StreamContext {
}
None => {
warn!(
"[PLANO_REQ_ID:{}] UPSTREAM_RESPONSE_ERROR: missing client_api",
"request_id={}: upstream response error, missing client_api",
self.request_identifier()
);
return Err(Action::Continue);
@ -734,7 +758,7 @@ impl StreamContext {
response.extract_usage_counts()
{
debug!(
"[PLANO_REQ_ID:{}] RESPONSE_USAGE: prompt_tokens={} completion_tokens={} total_tokens={}",
"request_id={}: response usage, prompt_tokens={} completion_tokens={} total_tokens={}",
self.request_identifier(),
prompt_tokens,
completion_tokens,
@ -743,7 +767,7 @@ impl StreamContext {
self.response_tokens = completion_tokens;
} else {
warn!(
"[PLANO_REQ_ID:{}] RESPONSE_USAGE: no usage information found",
"request_id={}: response usage, no usage information found",
self.request_identifier()
);
}
@ -751,14 +775,18 @@ impl StreamContext {
match serde_json::to_vec(&response) {
Ok(bytes) => {
debug!(
"[PLANO_REQ_ID:{}] CLIENT_RESPONSE_PAYLOAD: {}",
"request_id={}: client response payload: {}",
self.request_identifier(),
String::from_utf8_lossy(&bytes)
);
Ok(bytes)
}
Err(e) => {
warn!("Failed to serialize normalized response: {}", e);
warn!(
"request_id={}: failed to serialize normalized response: {}",
self.request_identifier(),
e
);
self.send_server_error(
ServerError::LogicError(format!("Response serialization error: {}", e)),
Some(StatusCode::INTERNAL_SERVER_ERROR),
@ -819,7 +847,7 @@ impl HttpContext for StreamContext {
Some(provider_id.compatible_api_for_client(api, self.streaming_response));
debug!(
"[PLANO_REQ_ID:{}] ROUTING_INFO: provider='{}' client_api={:?} resolved_api={:?} request_path='{}'",
"request_id={}: routing info, provider='{}' client_api={:?} resolved_api={:?} request_path='{}'",
self.request_identifier(),
provider.to_provider_id(),
api,
@ -863,7 +891,7 @@ impl HttpContext for StreamContext {
fn on_http_request_body(&mut self, body_size: usize, end_of_stream: bool) -> Action {
debug!(
"[PLANO_REQ_ID:{}] REQUEST_BODY_CHUNK: bytes={} end_stream={}",
"request_id={}: request body chunk, bytes={} end_stream={}",
self.request_identifier(),
body_size,
end_of_stream
@ -902,14 +930,14 @@ impl HttpContext for StreamContext {
let mut deserialized_client_request: ProviderRequestType = match self.client_api.as_ref() {
Some(the_client_api) => {
info!(
"[PLANO_REQ_ID:{}] CLIENT_REQUEST_RECEIVED: api={:?} body_size={}",
"request_id={}: client request received, api={:?} body_size={}",
self.request_identifier(),
the_client_api,
body_bytes.len()
);
debug!(
"[PLANO_REQ_ID:{}] CLIENT_REQUEST_PAYLOAD: {}",
"request_id={}: client request payload: {}",
self.request_identifier(),
String::from_utf8_lossy(&body_bytes)
);
@ -918,7 +946,7 @@ impl HttpContext for StreamContext {
Ok(deserialized) => deserialized,
Err(e) => {
warn!(
"[PLANO_REQ_ID:{}] CLIENT_REQUEST_PARSE_ERROR: {} | body: {}",
"request_id={}: client request parse error: {} | body: {}",
self.request_identifier(),
e,
String::from_utf8_lossy(&body_bytes)
@ -953,7 +981,7 @@ impl HttpContext for StreamContext {
Some(model_name) => model_name,
None => {
warn!(
"[PLANO_REQ_ID:{}] MODEL_RESOLUTION_ERROR: no model specified | req_model='{}' provider='{}' config_model={:?}",
"request_id={}: model resolution error, no model specified | req_model='{}' provider='{}' config_model={:?}",
self.request_identifier(),
model_requested,
self.llm_provider().name,
@ -981,7 +1009,7 @@ impl HttpContext for StreamContext {
self.user_message = deserialized_client_request.get_recent_user_message();
info!(
"[PLANO_REQ_ID:{}] MODEL_RESOLUTION: req_model='{}' -> resolved_model='{}' provider='{}' streaming={}",
"request_id={}: model resolved, req_model='{}' -> resolved_model='{}' provider='{}' streaming={}",
self.request_identifier(),
model_requested,
resolved_model,
@ -1008,56 +1036,68 @@ impl HttpContext for StreamContext {
}
// Convert chat completion request to llm provider specific request using provider interface
let serialized_body_bytes_upstream =
match self.resolved_api.as_ref() {
Some(upstream) => {
info!(
"[PLANO_REQ_ID:{}] UPSTREAM_TRANSFORM: client_api={:?} -> upstream_api={:?}",
self.request_identifier(), self.client_api, upstream
let serialized_body_bytes_upstream = match self.resolved_api.as_ref() {
Some(upstream) => {
info!(
"request_id={}: upstream transform, client_api={:?} -> upstream_api={:?}",
self.request_identifier(),
self.client_api,
upstream
);
match ProviderRequestType::try_from((deserialized_client_request, upstream)) {
Ok(request) => {
debug!(
"[PLANO_REQ_ID:{}] UPSTREAM_REQUEST_PAYLOAD: {}",
self.request_identifier(),
String::from_utf8_lossy(&request.to_bytes().unwrap_or_default())
);
match ProviderRequestType::try_from((deserialized_client_request, upstream)) {
Ok(request) => {
debug!(
"request_id={}: upstream request payload: {}",
self.request_identifier(),
String::from_utf8_lossy(&request.to_bytes().unwrap_or_default())
);
match request.to_bytes() {
Ok(bytes) => bytes,
Err(e) => {
warn!("Failed to serialize request body: {}", e);
self.send_server_error(
ServerError::LogicError(format!(
"Request serialization error: {}",
e
)),
Some(StatusCode::BAD_REQUEST),
);
return Action::Pause;
}
match request.to_bytes() {
Ok(bytes) => bytes,
Err(e) => {
warn!(
"request_id={}: failed to serialize request body: {}",
self.request_identifier(),
e
);
self.send_server_error(
ServerError::LogicError(format!(
"Request serialization error: {}",
e
)),
Some(StatusCode::BAD_REQUEST),
);
return Action::Pause;
}
}
Err(e) => {
warn!("Failed to create provider request: {}", e);
self.send_server_error(
ServerError::LogicError(format!("Provider request error: {}", e)),
Some(StatusCode::BAD_REQUEST),
);
return Action::Pause;
}
}
Err(e) => {
warn!(
"request_id={}: failed to create provider request: {}",
self.request_identifier(),
e
);
self.send_server_error(
ServerError::LogicError(format!("Provider request error: {}", e)),
Some(StatusCode::BAD_REQUEST),
);
return Action::Pause;
}
}
None => {
warn!("No upstream API resolved");
self.send_server_error(
ServerError::LogicError("No upstream API resolved".into()),
Some(StatusCode::BAD_REQUEST),
);
return Action::Pause;
}
};
}
None => {
warn!(
"request_id={}: no upstream api resolved",
self.request_identifier()
);
self.send_server_error(
ServerError::LogicError("No upstream API resolved".into()),
Some(StatusCode::BAD_REQUEST),
);
return Action::Pause;
}
};
self.set_http_request_body(0, body_size, &serialized_body_bytes_upstream);
Action::Continue
@ -1070,7 +1110,7 @@ impl HttpContext for StreamContext {
self.upstream_status_code = StatusCode::from_u16(status_code).ok();
debug!(
"[PLANO_REQ_ID:{}] UPSTREAM_RESPONSE_STATUS: {}",
"request_id={}: upstream response status: {}",
self.request_identifier(),
status_code
);
@ -1090,14 +1130,17 @@ impl HttpContext for StreamContext {
fn on_http_response_body(&mut self, body_size: usize, end_of_stream: bool) -> Action {
if self.request_body_sent_time.is_none() {
debug!("on_http_response_body: request body not sent, not doing any processing in llm filter");
debug!(
"request_id={}: request body not sent, skipping processing in llm filter",
self.request_identifier()
);
return Action::Continue;
}
let current_time = get_current_time().unwrap();
if end_of_stream && body_size == 0 {
debug!(
"[PLANO_REQ_ID:{}] RESPONSE_BODY_COMPLETE: total_bytes={}",
"request_id={}: response body complete, total_bytes={}",
self.request_identifier(),
body_size
);
@ -1109,7 +1152,7 @@ impl HttpContext for StreamContext {
if let Some(status_code) = &self.upstream_status_code {
if status_code.is_client_error() || status_code.is_server_error() {
info!(
"[PLANO_REQ_ID:{}] UPSTREAM_ERROR_RESPONSE: status={} body_size={}",
"request_id={}: upstream error response, status={} body_size={}",
self.request_identifier(),
status_code.as_u16(),
body_size
@ -1119,7 +1162,7 @@ impl HttpContext for StreamContext {
if body_size > 0 {
if let Ok(body) = self.read_raw_response_body(body_size) {
debug!(
"[PLANO_REQ_ID:{}] UPSTREAM_ERROR_BODY: {}",
"request_id={}: upstream error body: {}",
self.request_identifier(),
String::from_utf8_lossy(&body)
);
@ -1141,7 +1184,7 @@ impl HttpContext for StreamContext {
None => "None".to_string(),
};
info!(
"[PLANO_REQ_ID:{}], UNSUPPORTED API: {}",
"request_id={}: unsupported api: {}",
self.request_identifier(),
api_info
);
@ -1155,7 +1198,7 @@ impl HttpContext for StreamContext {
};
debug!(
"[PLANO_REQ_ID:{}] UPSTREAM_RAW_RESPONSE: body_size={} content={}",
"request_id={}: upstream raw response, body_size={} content={}",
self.request_identifier(),
body.len(),
String::from_utf8_lossy(&body)

View file

@ -47,15 +47,15 @@ This brings up:
- RAG Agent REST server on port 10505
- Plano listener on port 8001 (and gateway on 12000)
- Jaeger UI for viewing traces at http://localhost:16686
- Open WebUI at http://localhost:8080 for interactive queries
- AnythingLLM at http://localhost:3001 for interactive queries
> Set `OPENAI_API_KEY` in your environment before running; `LLM_GATEWAY_ENDPOINT` defaults to `http://host.docker.internal:12000/v1`.
### 2. Test the system
**Option A: Using Open WebUI (recommended)**
**Option A: Using AnythingLLM (recommended)**
Navigate to http://localhost:8080 and send queries through the chat interface.
Navigate to http://localhost:3001 and send queries through the chat interface.
**Option B: Using curl**
```bash

View file

@ -31,12 +31,17 @@ services:
- "16686:16686"
- "4317:4317"
- "4318:4318"
open-web-ui:
image: dyrnq/open-webui:main
anythingllm:
image: mintplexlabs/anythingllm
restart: always
ports:
- "8080:8080"
- "3001:3001"
cap_add:
- SYS_ADMIN
environment:
- DEFAULT_MODEL=gpt-4o-mini
- ENABLE_OPENAI_API=true
- OPENAI_API_BASE_URL=http://host.docker.internal:8001/v1
- STORAGE_DIR=/app/server/storage
- LLM_PROVIDER=generic-openai
- GENERIC_OPEN_AI_BASE_PATH=http://plano:8001/v1
- GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
- GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
- GENERIC_OPEN_AI_API_KEY=sk-placeholder

View file

@ -1,15 +1,34 @@
services:
plano:
build:
context: ../../../
dockerfile: Dockerfile
ports:
- "12000:12000"
- "12001:12001"
environment:
- ARCH_CONFIG_PATH=/app/arch_config.yaml
- OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
- OTEL_TRACING_GRPC_ENDPOINT=http://host.docker.internal:4317
volumes:
- ./config.yaml:/app/arch_config.yaml:ro
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
open-web-ui:
image: dyrnq/open-webui:main
anythingllm:
image: mintplexlabs/anythingllm
restart: always
ports:
- "8080:8080"
- "3001:3001"
cap_add:
- SYS_ADMIN
environment:
- DEFAULT_MODEL=gpt-4o-mini
- ENABLE_OPENAI_API=true
- OPENAI_API_BASE_URL=http://host.docker.internal:12000/v1
- STORAGE_DIR=/app/server/storage
- LLM_PROVIDER=generic-openai
- GENERIC_OPEN_AI_BASE_PATH=http://plano:12000/v1
- GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
- GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
- GENERIC_OPEN_AI_API_KEY=sk-placeholder
jaeger:
build:

View file

@ -47,15 +47,15 @@ This brings up:
- RAG Agent REST server on port 10505
- Plano listener on port 8001 (and gateway on 12000)
- Jaeger UI for viewing traces at http://localhost:16686
- Open WebUI at http://localhost:8080 for interactive queries
- AnythingLLM at http://localhost:3001 for interactive queries
> Set `OPENAI_API_KEY` in your environment before running; `LLM_GATEWAY_ENDPOINT` defaults to `http://host.docker.internal:12000/v1`.
### 2. Test the system
**Option A: Using Open WebUI (recommended)**
**Option A: Using AnythingLLM (recommended)**
Navigate to http://localhost:8080 and send queries through the chat interface.
Navigate to http://localhost:3001 and send queries through the chat interface.
**Option B: Using curl**
```bash

View file

@ -16,6 +16,8 @@ services:
context: ../../../
dockerfile: Dockerfile
ports:
- "11000:11000"
- "12001:12001"
- "12000:12000"
- "8001:8001"
environment:
@ -31,12 +33,17 @@ services:
- "16686:16686"
- "4317:4317"
- "4318:4318"
open-web-ui:
image: dyrnq/open-webui:main
anythingllm:
image: mintplexlabs/anythingllm
restart: always
ports:
- "8080:8080"
- "3001:3001"
cap_add:
- SYS_ADMIN
environment:
- DEFAULT_MODEL=gpt-4o-mini
- ENABLE_OPENAI_API=true
- OPENAI_API_BASE_URL=http://host.docker.internal:8001/v1
- STORAGE_DIR=/app/server/storage
- LLM_PROVIDER=generic-openai
- GENERIC_OPEN_AI_BASE_PATH=http://plano:8001/v1
- GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
- GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
- GENERIC_OPEN_AI_API_KEY=sk-placeholder

View file

@ -47,13 +47,13 @@ This starts:
- **Plano** (ports 12000, 8001) - routing and orchestration
- **CrewAI Flight Agent** (port 10520) - flight search
- **LangChain Weather Agent** (port 10510) - weather forecasts
- **Open WebUI** (port 8080) - chat interface
- **AnythingLLM** (port 3001) - chat interface
- **Jaeger** (port 16686) - distributed tracing
### Try It Out
1. **Open the Chat Interface**
- Navigate to [http://localhost:8080](http://localhost:8080)
- Navigate to [http://localhost:3001](http://localhost:3001)
- Create an account (stored locally)
2. **Ask Multi-Agent Questions**
@ -75,9 +75,9 @@ This starts:
## Architecture
```
┌─────────────┐
Open WebUI │ (Chat Interface)
└──────┬──────┘
┌─────────────
AnythingLLM │ (Chat Interface)
└──────┬──────
v
┌─────────────┐

View file

@ -55,3 +55,4 @@ listeners:
tracing:
random_sampling: 100
opentracing_grpc_endpoint: http://jaeger:4317

View file

@ -9,7 +9,8 @@ services:
environment:
- ARCH_CONFIG_PATH=/app/arch_config.yaml
- OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
- OTEL_TRACING_HTTP_ENDPOINT=http://host.docker.internal:4318/v1/traces
- OTEL_TRACING_GRPC_ENDPOINT=http://jaeger:4317
- LOG_LEVEL=${LOG_LEVEL:-info}
volumes:
- ./config.yaml:/app/arch_config.yaml:ro
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
@ -36,15 +37,20 @@ services:
- LLM_GATEWAY_ENDPOINT=http://plano:12000/v1
command: ["python", "-u", "langchain/weather_agent.py"]
open-web-ui:
image: dyrnq/open-webui:main
anythingllm:
image: mintplexlabs/anythingllm
restart: always
ports:
- "8080:8080"
- "3001:3001"
cap_add:
- SYS_ADMIN
environment:
- DEFAULT_MODEL=gpt-4o-mini
- ENABLE_OPENAI_API=true
- OPENAI_API_BASE_URL=http://plano:8001/v1
- STORAGE_DIR=/app/server/storage
- LLM_PROVIDER=generic-openai
- GENERIC_OPEN_AI_BASE_PATH=http://plano:8001/v1
- GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
- GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
- GENERIC_OPEN_AI_API_KEY=sk-placeholder
jaeger:
build:
@ -53,4 +59,3 @@ services:
ports:
- "16686:16686" # Jaeger UI
- "4317:4317" # OTLP gRPC receiver
- "4318:4318" # OTLP HTTP receiver

View file

@ -6,7 +6,7 @@ This demo shows how you can use user preferences to route user prompts to approp
Make sure your machine is up to date with [latest version of plano]([url](https://github.com/katanemo/plano/tree/main?tab=readme-ov-file#prerequisites)). And you have activated the virtual environment.
1. start the openwebui
1. start anythingllm
```bash
(venv) $ cd demos/use_cases/preference_based_routing
(venv) $ docker compose up -d
@ -24,7 +24,7 @@ Make sure your machine is up to date with [latest version of plano]([url](https:
...
```
3. open openwebui http://localhost:8080/
3. open AnythingLLM http://localhost:3001/
# Testing out preference based routing

View file

@ -1,14 +1,37 @@
services:
open-web-ui:
image: dyrnq/open-webui:main
plano:
build:
context: ../../../
dockerfile: Dockerfile
ports:
- "12000:12000"
- "12001:12001"
environment:
- ARCH_CONFIG_PATH=/app/arch_config.yaml
- OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:?ANTHROPIC_API_KEY environment variable is required but not set}
- OTEL_TRACING_GRPC_ENDPOINT=http://host.docker.internal:4317
- OTEL_TRACING_ENABLED=true
- RUST_LOG=debug
volumes:
- ./config.yaml:/app/arch_config.yaml:ro
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
anythingllm:
image: mintplexlabs/anythingllm
restart: always
ports:
- "8080:8080"
- "3001:3001"
cap_add:
- SYS_ADMIN
environment:
- DEFAULT_MODELS=gpt-4o-mini
- ENABLE_OPENAI_API=true
- OPENAI_API_BASE_URL=http://host.docker.internal:12000/v1
- STORAGE_DIR=/app/server/storage
- LLM_PROVIDER=generic-openai
- GENERIC_OPEN_AI_BASE_PATH=http://plano:12000/v1
- GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
- GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
- GENERIC_OPEN_AI_API_KEY=sk-placeholder
jaeger:
build:
@ -18,12 +41,12 @@ services:
- "4317:4317"
- "4318:4318"
prometheus:
build:
context: ../../shared/prometheus
# prometheus:
# build:
# context: ../../shared/prometheus
grafana:
build:
context: ../../shared/grafana
ports:
- "3000:3000"
# grafana:
# build:
# context: ../../shared/grafana
# ports:
# - "3000:3000"

View file

@ -130,5 +130,6 @@ prompt_targets:
required: true
type: int
tracing:
opentracing_grpc_endpoint: http://host.docker.internal:4317
random_sampling: 100
version: v0.3.0

View file

@ -16,4 +16,4 @@ services:
- OPENAI_API_KEY=${OPENAI_API_KEY:?error}
- MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:?error}
- OTEL_TRACING_HTTP_ENDPOINT=http://host.docker.internal:4318/v1/traces
- OTEL_TRACING_GRPC_ENDPOINT=http://host.docker.internal:4317

View file

@ -1,31 +0,0 @@
POST http://localhost:4318/v1/traces
Content-Type: application/json
{
"resourceSpans": [
{
"resource": {
"attributes": [
{ "key": "service.name", "value": { "stringValue": "upstream-llm" } }
]
},
"scopeSpans": [
{
"scope": { "name": "default", "version": "1.0", "attributes": [] },
"spans": [
{
"traceId": "fa8f7c410c28092faafbd7d4a2f5e742",
"spanId": "4dc43055a07410d6",
"parentSpanId": "f0acd74216a5e179",
"name": "archgw",
"startTimeUnixNano": "1731363782228270000",
"endTimeUnixNano": "1731363787843156000",
"kind": 1,
"attributes": []
}
]
}
]
}
]
}