From 46de89590b2a682e26a4a8e432111bc459311a7c Mon Sep 17 00:00:00 2001
From: Adil Hafeez <adil.hafeez@gmail.com>
Date: Mon, 9 Feb 2026 13:33:27 -0800
Subject: [PATCH] use standard tracing and logging in brightstaff (#721)

---
 .github/workflows/rust_tests.yml              |   3 -
 Dockerfile                                    |   2 +-
 archgw.code-workspace                         |   3 +-
 cli/planoai/config_generator.py               |  27 +
 cli/planoai/consts.py                         |   1 +
 cli/planoai/docker_cli.py                     |   7 +
 cli/planoai/main.py                           |   3 +-
 config/arch_config_schema.yaml                |   2 +
 config/docker-compose.dev.yaml                |   2 +-
 config/envoy.template.yaml                    | 113 +----
 crates/.vscode/launch.json                    |   5 +-
 crates/Cargo.lock                             | 177 +++----
 crates/Cargo.toml                             |   4 -
 crates/brightstaff/Cargo.toml                 |  15 +-
 .../src/handlers/agent_chat_completions.rs    | 403 ++++++++-------
 .../src/handlers/agent_selector.rs            |  15 +-
 .../src/handlers/function_calling.rs          |  52 +-
 .../src/handlers/integration_tests.rs         |  10 +-
 crates/brightstaff/src/handlers/llm.rs        | 302 +++++-------
 .../src/handlers/pipeline_processor.rs        | 460 ++++--------------
 .../src/handlers/response_handler.rs          |  53 +-
 .../brightstaff/src/handlers/router_chat.rs   | 122 +----
 crates/brightstaff/src/handlers/utils.rs      | 342 ++++++-------
 crates/brightstaff/src/main.rs                |  46 +-
 crates/brightstaff/src/router/llm_router.rs   |  26 +-
 .../src/router/orchestrator_model_v1.rs       |  30 +-
 .../src/router/plano_orchestrator.rs          |  46 +-
 .../brightstaff/src/router/router_model_v1.rs |  30 +-
 .../src/state/response_state_processor.rs     |  81 ++-
 crates/brightstaff/src/tracing/mod.rs         |  30 ++
 .../src/tracing/service_name_exporter.rs      | 187 +++++++
 crates/brightstaff/src/utils/tracing.rs       | 119 ++++-
 crates/common/Cargo.toml                      |   6 -
 crates/common/src/configuration.rs            |   2 +
 crates/common/src/consts.rs                   |   1 -
 crates/common/src/traces/collector.rs         | 292 -----------
 crates/common/src/traces/mod.rs               |   9 -
 .../src/traces/tests/mock_otel_collector.rs   |  96 ----
 crates/common/src/traces/tests/mod.rs         |   4 -
 .../traces/tests/trace_integration_test.rs    | 342 -------------
 crates/llm_gateway/src/filter_context.rs      |   9 +-
 crates/llm_gateway/src/stream_context.rs      | 247 ++++++----
 demos/use_cases/http_filter/README.md         |   6 +-
 .../use_cases/http_filter/docker-compose.yaml |  17 +-
 .../use_cases/llm_routing/docker-compose.yaml |  31 +-
 demos/use_cases/mcp_filter/README.md          |   6 +-
 .../use_cases/mcp_filter/docker-compose.yaml  |  19 +-
 .../README.md                                 |  10 +-
 .../config.yaml                               |   1 +
 .../docker-compose.yaml                       |  21 +-
 .../preference_based_routing/README.md        |   4 +-
 .../docker-compose.yaml                       |  51 +-
 .../arch_config_full_reference_rendered.yaml  |   1 +
 tests/e2e/docker-compose.yaml                 |   2 +-
 tests/rest/tracing.rest                       |  31 --
 55 files changed, 1494 insertions(+), 2432 deletions(-)
 create mode 100644 crates/brightstaff/src/tracing/service_name_exporter.rs
 delete mode 100644 crates/common/src/traces/collector.rs
 delete mode 100644 crates/common/src/traces/tests/mock_otel_collector.rs
 delete mode 100644 crates/common/src/traces/tests/mod.rs
 delete mode 100644 crates/common/src/traces/tests/trace_integration_test.rs
 delete mode 100644 tests/rest/tracing.rest

diff --git a/.github/workflows/rust_tests.yml b/.github/workflows/rust_tests.yml
index e871745c..d72e7af4 100644
--- a/.github/workflows/rust_tests.yml
+++ b/.github/workflows/rust_tests.yml
@@ -29,6 +29,3 @@ jobs:
 
       - name: Run unit tests
         run: cargo test --lib
-
-      - name: Run trace integration tests
-        run: cargo test -p common --features trace-collection traces::tests::trace_integration_test
diff --git a/Dockerfile b/Dockerfile
index a9c7887f..2bd15377 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -40,7 +40,7 @@ COPY crates/brightstaff/src    brightstaff/src
 RUN find common hermesllm brightstaff -name "*.rs" -exec touch {} +
 RUN cargo build --release -p brightstaff
 
-FROM docker.io/envoyproxy/envoy:v1.36.4 AS envoy
+FROM docker.io/envoyproxy/envoy:v1.37.0 AS envoy
 
 FROM python:3.13.6-slim AS arch
 
diff --git a/archgw.code-workspace b/archgw.code-workspace
index bd42f64a..366191ef 100644
--- a/archgw.code-workspace
+++ b/archgw.code-workspace
@@ -25,8 +25,7 @@
     "[python]": {
       "editor.defaultFormatter": "ms-python.black-formatter",
       "editor.formatOnSave": true
-    },
-    "rust-analyzer.cargo.features": ["trace-collection"]
+    }
   },
   "extensions": {
     "recommendations": [
diff --git a/cli/planoai/config_generator.py b/cli/planoai/config_generator.py
index 2157b714..27768546 100644
--- a/cli/planoai/config_generator.py
+++ b/cli/planoai/config_generator.py
@@ -6,6 +6,7 @@ import yaml
 from jsonschema import validate
 from urllib.parse import urlparse
 from copy import deepcopy
+from planoai.consts import DEFAULT_OTEL_TRACING_GRPC_ENDPOINT
 
 
 SUPPORTED_PROVIDERS_WITH_BASE_URL = [
@@ -158,6 +159,32 @@ def validate_and_render_schema():
 
     arch_tracing = config_yaml.get("tracing", {})
 
+    # Resolution order: config yaml > OTEL_TRACING_GRPC_ENDPOINT env var > hardcoded default
+    opentracing_grpc_endpoint = arch_tracing.get(
+        "opentracing_grpc_endpoint",
+        os.environ.get(
+            "OTEL_TRACING_GRPC_ENDPOINT", DEFAULT_OTEL_TRACING_GRPC_ENDPOINT
+        ),
+    )
+    # resolve env vars in opentracing_grpc_endpoint if present
+    if opentracing_grpc_endpoint and "$" in opentracing_grpc_endpoint:
+        opentracing_grpc_endpoint = os.path.expandvars(opentracing_grpc_endpoint)
+        print(
+            f"Resolved opentracing_grpc_endpoint to {opentracing_grpc_endpoint} after expanding environment variables"
+        )
+    arch_tracing["opentracing_grpc_endpoint"] = opentracing_grpc_endpoint
+    # ensure that opentracing_grpc_endpoint is a valid URL if present and start with http and must not have any path
+    if opentracing_grpc_endpoint:
+        urlparse_result = urlparse(opentracing_grpc_endpoint)
+        if urlparse_result.scheme != "http":
+            raise Exception(
+                f"Invalid opentracing_grpc_endpoint {opentracing_grpc_endpoint}, scheme must be http"
+            )
+        if urlparse_result.path and urlparse_result.path != "/":
+            raise Exception(
+                f"Invalid opentracing_grpc_endpoint {opentracing_grpc_endpoint}, path must be empty"
+            )
+
     llms_with_endpoint = []
     llms_with_endpoint_cluster_names = set()
     updated_model_providers = []
diff --git a/cli/planoai/consts.py b/cli/planoai/consts.py
index 52c92130..e51466f3 100644
--- a/cli/planoai/consts.py
+++ b/cli/planoai/consts.py
@@ -3,3 +3,4 @@ import os
 SERVICE_NAME_ARCHGW = "plano"
 PLANO_DOCKER_NAME = "plano"
 PLANO_DOCKER_IMAGE = os.getenv("PLANO_DOCKER_IMAGE", "katanemo/plano:0.4.4")
+DEFAULT_OTEL_TRACING_GRPC_ENDPOINT = "http://host.docker.internal:4317"
diff --git a/cli/planoai/docker_cli.py b/cli/planoai/docker_cli.py
index 518606a6..599d57a1 100644
--- a/cli/planoai/docker_cli.py
+++ b/cli/planoai/docker_cli.py
@@ -116,11 +116,18 @@ def stream_gateway_logs(follow, service="plano"):
 
 
 def docker_validate_plano_schema(arch_config_file):
+    import os
+
+    env = os.environ.copy()
+    env.pop("PATH", None)
+    env_args = [item for key, value in env.items() for item in ["-e", f"{key}={value}"]]
+
     result = subprocess.run(
         [
             "docker",
             "run",
             "--rm",
+            *env_args,
             "-v",
             f"{arch_config_file}:/app/arch_config.yaml:ro",
             "--entrypoint",
diff --git a/cli/planoai/main.py b/cli/planoai/main.py
index a11945b7..3ba28d52 100644
--- a/cli/planoai/main.py
+++ b/cli/planoai/main.py
@@ -27,6 +27,7 @@ from planoai.core import (
     start_cli_agent,
 )
 from planoai.consts import (
+    DEFAULT_OTEL_TRACING_GRPC_ENDPOINT,
     PLANO_DOCKER_IMAGE,
     PLANO_DOCKER_NAME,
     SERVICE_NAME_ARCHGW,
@@ -156,7 +157,7 @@ def up(file, path, foreground):
 
     # Set the ARCH_CONFIG_FILE environment variable
     env_stage = {
-        "OTEL_TRACING_HTTP_ENDPOINT": "http://host.docker.internal:4318/v1/traces",
+        "OTEL_TRACING_GRPC_ENDPOINT": DEFAULT_OTEL_TRACING_GRPC_ENDPOINT,
     }
     env = os.environ.copy()
     # Remove PATH variable if present
diff --git a/config/arch_config_schema.yaml b/config/arch_config_schema.yaml
index 003bb9b4..0f3cefb7 100644
--- a/config/arch_config_schema.yaml
+++ b/config/arch_config_schema.yaml
@@ -382,6 +382,8 @@ properties:
         type: integer
       trace_arch_internal:
         type: boolean
+      opentracing_grpc_endpoint:
+        type: string
       additionalProperties: false
   mode:
     type: string
diff --git a/config/docker-compose.dev.yaml b/config/docker-compose.dev.yaml
index c683cba3..2e061939 100644
--- a/config/docker-compose.dev.yaml
+++ b/config/docker-compose.dev.yaml
@@ -21,4 +21,4 @@ services:
     environment:
       - OPENAI_API_KEY=${OPENAI_API_KEY:?error}
       - MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
-      - OTEL_TRACING_HTTP_ENDPOINT=http://host.docker.internal:4318/v1/traces
+      - OTEL_TRACING_GRPC_ENDPOINT=http://host.docker.internal:4317
diff --git a/config/envoy.template.yaml b/config/envoy.template.yaml
index ed9254ea..6fa623ac 100644
--- a/config/envoy.template.yaml
+++ b/config/envoy.template.yaml
@@ -54,6 +54,7 @@ static_resources:
                       service_name: plano(inbound)
                   random_sampling:
                     value: {{ arch_tracing.random_sampling }}
+                  operation: "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)%"
                 {% endif %}
                 stat_prefix: plano(inbound)
                 codec_type: AUTO
@@ -206,7 +207,7 @@ static_resources:
     - name: outbound_api_traffic
       address:
         socket_address:
-          address: 127.0.0.1
+          address: 0.0.0.0
           port_value: 11000
       traffic_direction: OUTBOUND
       filter_chains:
@@ -303,6 +304,7 @@ static_resources:
                       service_name: plano(inbound)
                   random_sampling:
                     value: {{ arch_tracing.random_sampling }}
+                  operation: "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)%"
                 {% endif %}
                 stat_prefix: {{ listener.name | replace(" ", "_") }}_traffic
                 codec_type: AUTO
@@ -388,21 +390,6 @@ static_resources:
             - name: envoy.filters.network.http_connection_manager
               typed_config:
                 "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
-                {% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
-                generate_request_id: true
-                tracing:
-                  provider:
-                    name: envoy.tracers.opentelemetry
-                    typed_config:
-                      "@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
-                      grpc_service:
-                        envoy_grpc:
-                          cluster_name: opentelemetry_collector
-                        timeout: 0.250s
-                      service_name: plano(outbound)
-                  random_sampling:
-                    value: {{ arch_tracing.random_sampling }}
-                {% endif %}
                 stat_prefix: egress_traffic
                 codec_type: AUTO
                 scheme_header_transformation:
@@ -470,50 +457,6 @@ static_resources:
                     typed_config:
                       "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
 
-{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
-    - name: otel_collector_proxy
-      address:
-        socket_address:
-          address: 127.0.0.1
-          port_value: 9903
-      traffic_direction: OUTBOUND
-      filter_chains:
-        - filters:
-            - name: envoy.filters.network.http_connection_manager
-              typed_config:
-                "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
-                stat_prefix: otel_proxy
-                codec_type: AUTO
-                # access_log:
-                # - name: envoy.access_loggers.file
-                #   typed_config:
-                #     "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
-                #     path: "/var/log/access_otel.log"
-                #     format: |
-                #      [%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%" "%UPSTREAM_CLUSTER%"
-                route_config:
-                  name: otel_route
-                  virtual_hosts:
-                    - name: otel_backend
-                      domains: ["*"]
-                      routes:
-                        - match:
-                            prefix: "/v1/traces"
-                          route:
-                            cluster: opentelemetry_collector_http
-                            timeout: 5s
-                            retry_policy:
-                              retry_on: "5xx,connect-failure,refused-stream,reset"
-                              num_retries: 3
-                              per_try_timeout: 2s
-                              host_selection_retry_max_attempts: 5
-                              retriable_status_codes: [500, 502, 503, 504]
-                http_filters:
-                  - name: envoy.filters.http.router
-                    typed_config:
-                      "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
-{% endif %}
-
     - name: egress_traffic_llm
       address:
         socket_address:
@@ -524,6 +467,22 @@ static_resources:
             - name: envoy.filters.network.http_connection_manager
               typed_config:
                 "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
+                {% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
+                generate_request_id: true
+                tracing:
+                  provider:
+                    name: envoy.tracers.opentelemetry
+                    typed_config:
+                      "@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
+                      grpc_service:
+                        envoy_grpc:
+                          cluster_name: opentelemetry_collector
+                        timeout: 0.250s
+                      service_name: plano(outbound)
+                  random_sampling:
+                    value: {{ arch_tracing.random_sampling }}
+                  operation: "%REQ(:METHOD)% %REQ(:AUTHORITY)%%REQ(X-ENVOY-ORIGINAL-PATH?:PATH)%"
+                {% endif %}
                 stat_prefix: egress_traffic
                 codec_type: AUTO
                 scheme_header_transformation:
@@ -1071,35 +1030,7 @@ static_resources:
           - endpoint:
               address:
                 socket_address:
-                  address: host.docker.internal
-                  port_value: 4317
-    - name: opentelemetry_collector_http
-      type: STRICT_DNS
-      dns_lookup_family: V4_ONLY
-      lb_policy: ROUND_ROBIN
-      typed_extension_protocol_options:
-      load_assignment:
-        cluster_name: opentelemetry_collector_http
-        endpoints:
-        - lb_endpoints:
-          - endpoint:
-              address:
-                socket_address:
-                  address: host.docker.internal
-                  port_value: 4318
-          # Circuit breaker configuration to prevent overwhelming OTEL collector
-      circuit_breakers:
-        thresholds:
-          - priority: DEFAULT
-            max_connections: 100
-            max_pending_requests: 100
-            max_requests: 100
-            max_retries: 3
-      # Health checking and outlier detection
-      outlier_detection:
-        consecutive_5xx: 5
-        interval: 10s
-        base_ejection_time: 30s
-        max_ejection_percent: 50
-        enforcing_consecutive_5xx: 100
+                  {% set _otel_endpoint = arch_tracing.opentracing_grpc_endpoint | default('host.docker.internal:4317') | replace("http://", "") | replace("https://", "") %}
+                  address: {{ _otel_endpoint.split(":") | first }}
+                  port_value: {{ _otel_endpoint.split(":") | last }}
 {% endif %}
diff --git a/crates/.vscode/launch.json b/crates/.vscode/launch.json
index 56a29b46..1dc48199 100644
--- a/crates/.vscode/launch.json
+++ b/crates/.vscode/launch.json
@@ -13,7 +13,10 @@
       "env": {
         "RUST_LOG": "debug",
         "RUST_BACKTRACE": "1",
-        "ARCH_CONFIG_PATH_RENDERED": "../demos/use_cases/preference_based_routing/arch_config_rendered.yaml"
+        "ARCH_CONFIG_PATH_RENDERED": "../demos/use_cases/multi_agent_with_crewai_langchain/config.yaml_rendered",
+        // "ARCH_CONFIG_PATH_RENDERED": "../demos/use_cases/preference_based_routing/config.yaml_rendered",
+        "OTEL_TRACING_GRPC_ENDPOINT": "http://localhost:4317",
+        "OTEL_TRACING_ENABLED": "true"
       },
       "preLaunchTask": "rust: cargo build"
     }
diff --git a/crates/Cargo.lock b/crates/Cargo.lock
index 1dab5467..f2744ad2 100644
--- a/crates/Cargo.lock
+++ b/crates/Cargo.lock
@@ -1,6 +1,6 @@
 # This file is automatically @generated by Cargo.
 # It is not intended for manual editing.
-version = 3
+version = 4
 
 [[package]]
 name = "acap"
@@ -195,7 +195,7 @@ dependencies = [
  "serde_urlencoded",
  "sync_wrapper",
  "tokio",
- "tower 0.5.2",
+ "tower",
  "tower-layer",
  "tower-service",
  "tracing",
@@ -354,7 +354,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4"
 dependencies = [
  "memchr",
- "regex-automata 0.4.9",
+ "regex-automata",
  "serde",
 ]
 
@@ -445,7 +445,6 @@ dependencies = [
  "pretty_assertions",
  "proxy-wasm",
  "rand 0.8.5",
- "reqwest",
  "serde",
  "serde_json",
  "serde_with",
@@ -454,7 +453,6 @@ dependencies = [
  "thiserror 1.0.69",
  "tiktoken-rs",
  "tokio",
- "tracing",
  "url",
  "urlencoding",
 ]
@@ -946,12 +944,6 @@ version = "0.31.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
 
-[[package]]
-name = "glob"
-version = "0.3.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2"
-
 [[package]]
 name = "governor"
 version = "0.6.3"
@@ -1573,11 +1565,11 @@ checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
 
 [[package]]
 name = "matchers"
-version = "0.1.0"
+version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558"
+checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9"
 dependencies = [
- "regex-automata 0.1.10",
+ "regex-automata",
 ]
 
 [[package]]
@@ -1719,12 +1711,11 @@ checksum = "38bf9645c8b145698bb0b18a4637dcacbc421ea49bef2317e4fd8065a387cf21"
 
 [[package]]
 name = "nu-ansi-term"
-version = "0.46.0"
+version = "0.50.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84"
+checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5"
 dependencies = [
- "overload",
- "winapi",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -1812,9 +1803,9 @@ dependencies = [
 
 [[package]]
 name = "opentelemetry"
-version = "0.29.1"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9e87237e2775f74896f9ad219d26a2081751187eb7c9f5c58dde20a23b95d16c"
+checksum = "b84bcd6ae87133e903af7ef497404dda70c60d0ea14895fc8a5e6722754fc2a0"
 dependencies = [
  "futures-core",
  "futures-sink",
@@ -1826,25 +1817,23 @@ dependencies = [
 
 [[package]]
 name = "opentelemetry-http"
-version = "0.29.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "46d7ab32b827b5b495bd90fa95a6cb65ccc293555dcc3199ae2937d2d237c8ed"
+checksum = "d7a6d09a73194e6b66df7c8f1b680f156d916a1a942abf2de06823dd02b7855d"
 dependencies = [
  "async-trait",
  "bytes",
  "http 1.3.1",
  "opentelemetry",
  "reqwest",
- "tracing",
 ]
 
 [[package]]
 name = "opentelemetry-otlp"
-version = "0.29.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d899720fe06916ccba71c01d04ecd77312734e2de3467fd30d9d580c8ce85656"
+checksum = "7a2366db2dca4d2ad033cad11e6ee42844fd727007af5ad04a1730f4cb8163bf"
 dependencies = [
- "futures-core",
  "http 1.3.1",
  "opentelemetry",
  "opentelemetry-http",
@@ -1860,44 +1849,43 @@ dependencies = [
 
 [[package]]
 name = "opentelemetry-proto"
-version = "0.29.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8c40da242381435e18570d5b9d50aca2a4f4f4d8e146231adb4e7768023309b3"
+checksum = "a7175df06de5eaee9909d4805a3d07e28bb752c34cab57fa9cff549da596b30f"
 dependencies = [
  "opentelemetry",
  "opentelemetry_sdk",
  "prost",
  "tonic",
+ "tonic-prost",
 ]
 
 [[package]]
 name = "opentelemetry-stdout"
-version = "0.29.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a7e27d446dabd68610ef0b77d07b102ecde827a4596ea9c01a4d3811e945b286"
+checksum = "bc8887887e169414f637b18751487cce4e095be787d23fad13c454e2fb1b3811"
 dependencies = [
  "chrono",
- "futures-util",
  "opentelemetry",
  "opentelemetry_sdk",
 ]
 
 [[package]]
 name = "opentelemetry_sdk"
-version = "0.29.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "afdefb21d1d47394abc1ba6c57363ab141be19e27cc70d0e422b7f303e4d290b"
+checksum = "e14ae4f5991976fd48df6d843de219ca6d31b01daaab2dad5af2badeded372bd"
 dependencies = [
  "futures-channel",
  "futures-executor",
  "futures-util",
- "glob",
  "opentelemetry",
  "percent-encoding",
  "rand 0.9.2",
- "serde_json",
  "thiserror 2.0.12",
- "tracing",
+ "tokio",
+ "tokio-stream",
 ]
 
 [[package]]
@@ -1906,12 +1894,6 @@ version = "0.5.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e"
 
-[[package]]
-name = "overload"
-version = "0.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
-
 [[package]]
 name = "parking_lot"
 version = "0.12.4"
@@ -2101,9 +2083,9 @@ dependencies = [
 
 [[package]]
 name = "prost"
-version = "0.13.5"
+version = "0.14.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5"
+checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568"
 dependencies = [
  "bytes",
  "prost-derive",
@@ -2111,9 +2093,9 @@ dependencies = [
 
 [[package]]
 name = "prost-derive"
-version = "0.13.5"
+version = "0.14.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d"
+checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b"
 dependencies = [
  "anyhow",
  "itertools",
@@ -2298,17 +2280,8 @@ checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
 dependencies = [
  "aho-corasick",
  "memchr",
- "regex-automata 0.4.9",
- "regex-syntax 0.8.5",
-]
-
-[[package]]
-name = "regex-automata"
-version = "0.1.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
-dependencies = [
- "regex-syntax 0.6.29",
+ "regex-automata",
+ "regex-syntax",
 ]
 
 [[package]]
@@ -2319,15 +2292,9 @@ checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
 dependencies = [
  "aho-corasick",
  "memchr",
- "regex-syntax 0.8.5",
+ "regex-syntax",
 ]
 
-[[package]]
-name = "regex-syntax"
-version = "0.6.29"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
-
 [[package]]
 name = "regex-syntax"
 version = "0.8.5"
@@ -2375,7 +2342,7 @@ dependencies = [
  "tokio-native-tls",
  "tokio-rustls 0.26.2",
  "tokio-util",
- "tower 0.5.2",
+ "tower",
  "tower-http",
  "tower-service",
  "url",
@@ -3205,9 +3172,9 @@ dependencies = [
 
 [[package]]
 name = "tonic"
-version = "0.12.3"
+version = "0.14.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52"
+checksum = "eb7613188ce9f7df5bfe185db26c5814347d110db17920415cf2fbcad85e7203"
 dependencies = [
  "async-trait",
  "base64 0.22.1",
@@ -3220,33 +3187,24 @@ dependencies = [
  "hyper-util",
  "percent-encoding",
  "pin-project",
- "prost",
+ "sync_wrapper",
  "tokio",
  "tokio-stream",
- "tower 0.4.13",
+ "tower",
  "tower-layer",
  "tower-service",
  "tracing",
 ]
 
 [[package]]
-name = "tower"
-version = "0.4.13"
+name = "tonic-prost"
+version = "0.14.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c"
+checksum = "66bd50ad6ce1252d87ef024b3d64fe4c3cf54a86fb9ef4c631fdd0ded7aeaa67"
 dependencies = [
- "futures-core",
- "futures-util",
- "indexmap 1.9.3",
- "pin-project",
- "pin-project-lite",
- "rand 0.8.5",
- "slab",
- "tokio",
- "tokio-util",
- "tower-layer",
- "tower-service",
- "tracing",
+ "bytes",
+ "prost",
+ "tonic",
 ]
 
 [[package]]
@@ -3257,9 +3215,12 @@ checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9"
 dependencies = [
  "futures-core",
  "futures-util",
+ "indexmap 2.9.0",
  "pin-project-lite",
+ "slab",
  "sync_wrapper",
  "tokio",
+ "tokio-util",
  "tower-layer",
  "tower-service",
  "tracing",
@@ -3278,7 +3239,7 @@ dependencies = [
  "http-body 1.0.1",
  "iri-string",
  "pin-project-lite",
- "tower 0.5.2",
+ "tower",
  "tower-layer",
  "tower-service",
 ]
@@ -3297,9 +3258,9 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"
 
 [[package]]
 name = "tracing"
-version = "0.1.41"
+version = "0.1.44"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0"
+checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100"
 dependencies = [
  "log",
  "pin-project-lite",
@@ -3309,9 +3270,9 @@ dependencies = [
 
 [[package]]
 name = "tracing-attributes"
-version = "0.1.28"
+version = "0.1.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d"
+checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -3320,9 +3281,9 @@ dependencies = [
 
 [[package]]
 name = "tracing-core"
-version = "0.1.33"
+version = "0.1.36"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c"
+checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a"
 dependencies = [
  "once_cell",
  "valuable",
@@ -3341,14 +3302,12 @@ dependencies = [
 
 [[package]]
 name = "tracing-opentelemetry"
-version = "0.30.0"
+version = "0.32.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fd8e764bd6f5813fd8bebc3117875190c5b0415be8f7f8059bffb6ecd979c444"
+checksum = "1ac28f2d093c6c477eaa76b23525478f38de514fa9aeb1285738d4b97a9552fc"
 dependencies = [
  "js-sys",
- "once_cell",
  "opentelemetry",
- "opentelemetry_sdk",
  "smallvec",
  "tracing",
  "tracing-core",
@@ -3359,14 +3318,14 @@ dependencies = [
 
 [[package]]
 name = "tracing-subscriber"
-version = "0.3.19"
+version = "0.3.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008"
+checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e"
 dependencies = [
  "matchers",
  "nu-ansi-term",
  "once_cell",
- "regex",
+ "regex-automata",
  "sharded-slab",
  "smallvec",
  "thread_local",
@@ -3684,28 +3643,6 @@ dependencies = [
  "web-sys",
 ]
 
-[[package]]
-name = "winapi"
-version = "0.3.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
-dependencies = [
- "winapi-i686-pc-windows-gnu",
- "winapi-x86_64-pc-windows-gnu",
-]
-
-[[package]]
-name = "winapi-i686-pc-windows-gnu"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
-
-[[package]]
-name = "winapi-x86_64-pc-windows-gnu"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
-
 [[package]]
 name = "windows-core"
 version = "0.61.2"
diff --git a/crates/Cargo.toml b/crates/Cargo.toml
index c22e252e..5cd6b29c 100644
--- a/crates/Cargo.toml
+++ b/crates/Cargo.toml
@@ -1,7 +1,3 @@
 [workspace]
 resolver = "2"
 members = ["llm_gateway", "prompt_gateway", "common", "brightstaff", "hermesllm"]
-
-[workspace.metadata.rust-analyzer]
-# Enable features for better IDE support
-cargo.features = ["trace-collection"]
diff --git a/crates/brightstaff/Cargo.toml b/crates/brightstaff/Cargo.toml
index f48a8d68..5d986ffa 100644
--- a/crates/brightstaff/Cargo.toml
+++ b/crates/brightstaff/Cargo.toml
@@ -8,7 +8,7 @@ async-openai = "0.30.1"
 async-trait = "0.1"
 bytes = "1.10.1"
 chrono = "0.4"
-common = { version = "0.1.0", path = "../common", features = ["trace-collection"] }
+common = { version = "0.1.0", path = "../common" }
 eventsource-client = "0.15.0"
 eventsource-stream = "0.2.3"
 flate2 = "1.0"
@@ -19,11 +19,11 @@ http-body = "1.0.1"
 http-body-util = "0.1.3"
 hyper = { version = "1.6.0", features = ["full"] }
 hyper-util = "0.1.11"
-opentelemetry = "0.29.1"
-opentelemetry-http = "0.29.0"
-opentelemetry-otlp = {version="0.29.0", features=["trace", "tonic", "grpc-tonic"]}
-opentelemetry-stdout = "0.29.0"
-opentelemetry_sdk = "0.29.0"
+opentelemetry = "0.31"
+opentelemetry-http = "0.31"
+opentelemetry-otlp = {version="0.31", features=["trace", "grpc-tonic"]}
+opentelemetry-stdout = "0.31"
+opentelemetry_sdk = { version = "0.31", features = ["rt-tokio"] }
 pretty_assertions = "1.4.1"
 rand = "0.9.2"
 reqwest = { version = "0.12.15", features = ["stream"] }
@@ -38,6 +38,7 @@ tokio-postgres = { version = "0.7", features = ["with-serde_json-1"] }
 tokio-stream = "0.1"
 time = { version = "0.3", features = ["formatting", "macros"] }
 tracing = "0.1"
+tracing-opentelemetry = "0.32.1"
 tracing-subscriber = { version = "0.3", features = ["env-filter"] }
 uuid = { version = "1.0", features = ["v4", "serde"] }
 
@@ -45,5 +46,5 @@ uuid = { version = "1.0", features = ["v4", "serde"] }
 mockito = "1.0"
 tokio-stream = "0.1.17"
 tracing = "0.1.41"
-tracing-opentelemetry = "0.30.0"
+tracing-opentelemetry = "0.32.1"
 tracing-subscriber = { version = "0.3.19", features = ["env-filter", "fmt", "time"] }
diff --git a/crates/brightstaff/src/handlers/agent_chat_completions.rs b/crates/brightstaff/src/handlers/agent_chat_completions.rs
index 5ced34c0..adfdce02 100644
--- a/crates/brightstaff/src/handlers/agent_chat_completions.rs
+++ b/crates/brightstaff/src/handlers/agent_chat_completions.rs
@@ -1,9 +1,7 @@
 use std::sync::Arc;
-use std::time::{Instant, SystemTime};
+use std::time::Instant;
 
 use bytes::Bytes;
-use common::consts::TRACE_PARENT_HEADER;
-use common::traces::{generate_random_span_id, parse_traceparent, SpanBuilder, SpanKind};
 use hermesllm::apis::OpenAIMessage;
 use hermesllm::clients::SupportedAPIsFromClient;
 use hermesllm::providers::request::ProviderRequest;
@@ -11,14 +9,15 @@ use hermesllm::ProviderRequestType;
 use http_body_util::combinators::BoxBody;
 use http_body_util::BodyExt;
 use hyper::{Request, Response};
+use opentelemetry::trace::get_active_span;
 use serde::ser::Error as SerError;
-use tracing::{debug, info, warn};
+use tracing::{debug, info, info_span, warn, Instrument};
 
 use super::agent_selector::{AgentSelectionError, AgentSelector};
 use super::pipeline_processor::{PipelineError, PipelineProcessor};
 use super::response_handler::ResponseHandler;
 use crate::router::plano_orchestrator::OrchestratorService;
-use crate::tracing::{http, operation_component, OperationNameBuilder};
+use crate::tracing::{operation_component, set_service_name};
 
 /// Main errors for agent chat completions
 #[derive(Debug, thiserror::Error)]
@@ -41,92 +40,122 @@ pub async fn agent_chat(
     _: String,
     agents_list: Arc<tokio::sync::RwLock<Option<Vec<common::configuration::Agent>>>>,
     listeners: Arc<tokio::sync::RwLock<Vec<common::configuration::Listener>>>,
-    trace_collector: Arc<common::traces::TraceCollector>,
 ) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
-    match handle_agent_chat(
-        request,
-        orchestrator_service,
-        agents_list,
-        listeners,
-        trace_collector,
-    )
-    .await
+    // Extract request_id from headers or generate a new one
+    let request_id: String = match request
+        .headers()
+        .get(common::consts::REQUEST_ID_HEADER)
+        .and_then(|h| h.to_str().ok())
+        .map(|s| s.to_string())
     {
-        Ok(response) => Ok(response),
-        Err(err) => {
-            // Check if this is a client error from the pipeline that should be cascaded
-            if let AgentFilterChainError::Pipeline(PipelineError::ClientError {
-                agent,
-                status,
-                body,
-            }) = &err
-            {
-                warn!(
-                    "Client error from agent '{}' (HTTP {}): {}",
-                    agent, status, body
-                );
+        Some(id) => id,
+        None => uuid::Uuid::new_v4().to_string(),
+    };
 
-                // Create error response with the original status code and body
+    // Create a span with request_id that will be included in all log lines
+    let request_span = info_span!(
+        "(orchestrator)",
+        component = "orchestrator",
+        request_id = %request_id,
+        http.method = %request.method(),
+        http.path = %request.uri().path()
+    );
+
+    // Execute the handler inside the span
+    async {
+        // Set service name for orchestrator operations
+        set_service_name(operation_component::ORCHESTRATOR);
+
+        match handle_agent_chat_inner(
+            request,
+            orchestrator_service,
+            agents_list,
+            listeners,
+            request_id,
+        )
+        .await
+        {
+            Ok(response) => Ok(response),
+            Err(err) => {
+                // Check if this is a client error from the pipeline that should be cascaded
+                if let AgentFilterChainError::Pipeline(PipelineError::ClientError {
+                    agent,
+                    status,
+                    body,
+                }) = &err
+                {
+                    warn!(
+                        agent = %agent,
+                        status = %status,
+                        body = %body,
+                        "client error from agent"
+                    );
+
+                    // Create error response with the original status code and body
+                    let error_json = serde_json::json!({
+                        "error": "ClientError",
+                        "agent": agent,
+                        "status": status,
+                        "agent_response": body
+                    });
+
+                    let json_string = error_json.to_string();
+                    let mut response =
+                        Response::new(ResponseHandler::create_full_body(json_string));
+                    *response.status_mut() = hyper::StatusCode::from_u16(*status)
+                        .unwrap_or(hyper::StatusCode::BAD_REQUEST);
+                    response.headers_mut().insert(
+                        hyper::header::CONTENT_TYPE,
+                        "application/json".parse().unwrap(),
+                    );
+                    return Ok(response);
+                }
+
+                // Print detailed error information with full error chain for other errors
+                let mut error_chain = Vec::new();
+                let mut current_error: &dyn std::error::Error = &err;
+
+                // Collect the full error chain
+                loop {
+                    error_chain.push(current_error.to_string());
+                    match current_error.source() {
+                        Some(source) => current_error = source,
+                        None => break,
+                    }
+                }
+
+                // Log the complete error chain
+                warn!(error_chain = ?error_chain, "agent chat error chain");
+                warn!(root_error = ?err, "root error");
+
+                // Create structured error response as JSON
                 let error_json = serde_json::json!({
-                    "error": "ClientError",
-                    "agent": agent,
-                    "status": status,
-                    "agent_response": body
+                    "error": {
+                        "type": "AgentFilterChainError",
+                        "message": err.to_string(),
+                        "error_chain": error_chain,
+                        "debug_info": format!("{:?}", err)
+                    }
                 });
 
-                let json_string = error_json.to_string();
-                let mut response = Response::new(ResponseHandler::create_full_body(json_string));
-                *response.status_mut() =
-                    hyper::StatusCode::from_u16(*status).unwrap_or(hyper::StatusCode::BAD_REQUEST);
-                response.headers_mut().insert(
-                    hyper::header::CONTENT_TYPE,
-                    "application/json".parse().unwrap(),
-                );
-                return Ok(response);
+                // Log the error for debugging
+                info!(error = %error_json, "structured error info");
+
+                // Return JSON error response
+                Ok(ResponseHandler::create_json_error_response(&error_json))
             }
-
-            // Print detailed error information with full error chain for other errors
-            let mut error_chain = Vec::new();
-            let mut current_error: &dyn std::error::Error = &err;
-
-            // Collect the full error chain
-            loop {
-                error_chain.push(current_error.to_string());
-                match current_error.source() {
-                    Some(source) => current_error = source,
-                    None => break,
-                }
-            }
-
-            // Log the complete error chain
-            warn!("Agent chat error chain: {:#?}", error_chain);
-            warn!("Root error: {:?}", err);
-
-            // Create structured error response as JSON
-            let error_json = serde_json::json!({
-                "error": {
-                    "type": "AgentFilterChainError",
-                    "message": err.to_string(),
-                    "error_chain": error_chain,
-                    "debug_info": format!("{:?}", err)
-                }
-            });
-
-            // Log the error for debugging
-            info!("Structured error info: {}", error_json);
-
-            // Return JSON error response
-            Ok(ResponseHandler::create_json_error_response(&error_json))
         }
     }
+    .instrument(request_span)
+    .await
 }
 
-async fn handle_agent_chat(
+async fn handle_agent_chat_inner(
     request: Request<hyper::body::Incoming>,
     orchestrator_service: Arc<OrchestratorService>,
     agents_list: Arc<tokio::sync::RwLock<Option<Vec<common::configuration::Agent>>>>,
     listeners: Arc<tokio::sync::RwLock<Vec<common::configuration::Listener>>>,
-    trace_collector: Arc<common::traces::TraceCollector>,
+    request_id: String,
 ) -> Result<Response<BoxBody<Bytes, hyper::Error>>, AgentFilterChainError> {
     // Initialize services
     let agent_selector = AgentSelector::new(orchestrator_service);
@@ -140,14 +169,18 @@ async fn handle_agent_chat(
         .and_then(|name| name.to_str().ok());
 
     // Find the appropriate listener
-    let listener = {
+    let listener: common::configuration::Listener = {
         let listeners = listeners.read().await;
         agent_selector
             .find_listener(listener_name, &listeners)
             .await?
     };
 
-    info!("Handling request for listener: {}", listener.name);
+    get_active_span(|span| {
+        span.update_name(listener.name.to_string());
+    });
+
+    info!(listener = %listener.name, "handling request");
 
     // Parse request body
     let request_path = request
@@ -162,12 +195,8 @@ async fn handle_agent_chat(
         let mut headers = request.headers().clone();
         headers.remove(common::consts::ENVOY_ORIGINAL_PATH_HEADER);
 
+        // Set the request_id in headers if not already present
         if !headers.contains_key(common::consts::REQUEST_ID_HEADER) {
-            let request_id = uuid::Uuid::new_v4().to_string();
-            info!(
-                "Request id not found in headers, generated new request id: {}",
-                request_id
-            );
             headers.insert(
                 common::consts::REQUEST_ID_HEADER,
                 hyper::header::HeaderValue::from_str(&request_id).unwrap(),
@@ -180,8 +209,8 @@ async fn handle_agent_chat(
     let chat_request_bytes = request.collect().await?.to_bytes();
 
     debug!(
-        "Received request body (raw utf8): {}",
-        String::from_utf8_lossy(&chat_request_bytes)
+        body = %String::from_utf8_lossy(&chat_request_bytes),
+        "received request body"
     );
 
     // Determine the API type from the endpoint
@@ -195,7 +224,7 @@ async fn handle_agent_chat(
     let client_request = match ProviderRequestType::try_from((&chat_request_bytes[..], &api_type)) {
         Ok(request) => request,
         Err(err) => {
-            warn!("Failed to parse request as ProviderRequestType: {}", err);
+            warn!("failed to parse request as ProviderRequestType: {}", err);
             let err_msg = format!("Failed to parse request: {}", err);
             return Err(AgentFilterChainError::RequestParsing(
                 serde_json::Error::custom(err_msg),
@@ -205,12 +234,6 @@ async fn handle_agent_chat(
 
     let message: Vec<OpenAIMessage> = client_request.get_messages();
 
-    // Extract trace parent for routing
-    let traceparent = request_headers
-        .iter()
-        .find(|(key, _)| key.as_str() == TRACE_PARENT_HEADER)
-        .map(|(_, value)| value.to_str().unwrap_or_default().to_string());
-
     let request_id = request_headers
         .get(common::consts::REQUEST_ID_HEADER)
         .and_then(|val| val.to_str().ok())
@@ -223,87 +246,58 @@ async fn handle_agent_chat(
         agent_selector.create_agent_map(agents)
     };
 
-    // Parse trace parent to get trace_id and parent_span_id
-    let (trace_id, parent_span_id) = if let Some(ref tp) = traceparent {
-        parse_traceparent(tp)
-    } else {
-        (String::new(), None)
-    };
-
     // Select appropriate agents using arch orchestrator llm model
-    let selection_span_id = generate_random_span_id();
-    let selection_start_time = SystemTime::now();
-    let selection_start_instant = Instant::now();
-
+    let selection_start = Instant::now();
     let selected_agents = agent_selector
-        .select_agents(&message, &listener, traceparent.clone(), request_id.clone())
+        .select_agents(&message, &listener, request_id.clone())
         .await?;
 
-    // Record agent selection span
-    let selection_end_time = SystemTime::now();
-    let selection_elapsed = selection_start_instant.elapsed();
-    let selection_operation_name = OperationNameBuilder::new()
-        .with_method("POST")
-        .with_path("/agents/select")
-        .with_target(&listener.name)
-        .build();
-
-    let mut selection_span_builder = SpanBuilder::new(&selection_operation_name)
-        .with_span_id(selection_span_id)
-        .with_kind(SpanKind::Internal)
-        .with_start_time(selection_start_time)
-        .with_end_time(selection_end_time)
-        .with_attribute(http::METHOD, "POST")
-        .with_attribute(http::TARGET, "/agents/select")
-        .with_attribute("selection.listener", listener.name.clone())
-        .with_attribute("selection.agent_count", selected_agents.len().to_string())
-        .with_attribute(
+    // Record selection attributes on the current orchestrator span
+    let selection_elapsed_ms = selection_start.elapsed().as_secs_f64() * 1000.0;
+    get_active_span(|span| {
+        span.set_attribute(opentelemetry::KeyValue::new(
+            "selection.listener",
+            listener.name.clone(),
+        ));
+        span.set_attribute(opentelemetry::KeyValue::new(
+            "selection.agent_count",
+            selected_agents.len() as i64,
+        ));
+        span.set_attribute(opentelemetry::KeyValue::new(
             "selection.agents",
             selected_agents
                 .iter()
                 .map(|a| a.id.as_str())
                 .collect::<Vec<_>>()
                 .join(","),
-        )
-        .with_attribute(
-            "duration_ms",
-            format!("{:.2}", selection_elapsed.as_secs_f64() * 1000.0),
-        );
+        ));
+        span.set_attribute(opentelemetry::KeyValue::new(
+            "selection.determination_ms",
+            format!("{:.2}", selection_elapsed_ms),
+        ));
+    });
 
-    if !trace_id.is_empty() {
-        selection_span_builder = selection_span_builder.with_trace_id(trace_id.clone());
-    }
-    if let Some(parent_id) = parent_span_id.clone() {
-        selection_span_builder = selection_span_builder.with_parent_span_id(parent_id);
-    }
-
-    let selection_span = selection_span_builder.build();
-    trace_collector.record_span(operation_component::ORCHESTRATOR, selection_span);
-
-    info!("Selected {} agent(s) for execution", selected_agents.len());
+    info!(
+        count = selected_agents.len(),
+        "selected agents for execution"
+    );
 
     // Execute agents sequentially, passing output from one to the next
     let mut current_messages = message.clone();
     let agent_count = selected_agents.len();
 
     for (agent_index, selected_agent) in selected_agents.iter().enumerate() {
+        // Get agent name
+        let agent_name = selected_agent.id.clone();
         let is_last_agent = agent_index == agent_count - 1;
 
         debug!(
-            "Processing agent {}/{}: {}",
-            agent_index + 1,
-            agent_count,
-            selected_agent.id
+            agent_index = agent_index + 1,
+            total = agent_count,
+            agent = %agent_name,
+            "processing agent"
         );
 
-        // Record the start time for agent span
-        let agent_start_time = SystemTime::now();
-        let agent_start_instant = Instant::now();
-        let span_id = generate_random_span_id();
-
-        // Get agent name
-        let agent_name = selected_agent.id.clone();
-
         // Process the filter chain
         let chat_history = pipeline_processor
             .process_filter_chain(
@@ -311,88 +305,71 @@ async fn handle_agent_chat(
                 selected_agent,
                 &agent_map,
                 &request_headers,
-                Some(&trace_collector),
-                trace_id.clone(),
-                span_id.clone(),
             )
             .await?;
 
         // Get agent details and invoke
         let agent = agent_map.get(&agent_name).unwrap();
 
-        debug!("Invoking agent: {}", agent_name);
+        debug!(agent = %agent_name, "invoking agent");
 
-        let llm_response = pipeline_processor
-            .invoke_agent(
-                &chat_history,
-                client_request.clone(),
-                agent,
-                &request_headers,
-                trace_id.clone(),
-                span_id.clone(),
-            )
-            .await?;
+        let agent_span = info_span!(
+            "agent",
+            agent_id = %agent_name,
+            message_count = chat_history.len(),
+        );
 
-        // Record agent span
-        let agent_end_time = SystemTime::now();
-        let agent_elapsed = agent_start_instant.elapsed();
-        let full_path = format!("/agents{}", request_path);
-        let operation_name = OperationNameBuilder::new()
-            .with_method("POST")
-            .with_path(&full_path)
-            .with_target(&agent_name)
-            .build();
+        let llm_response = async {
+            set_service_name(operation_component::AGENT);
+            get_active_span(|span| {
+                span.update_name(format!("{} /v1/chat/completions", agent_name));
+            });
 
-        let mut span_builder = SpanBuilder::new(&operation_name)
-            .with_span_id(span_id)
-            .with_kind(SpanKind::Internal)
-            .with_start_time(agent_start_time)
-            .with_end_time(agent_end_time)
-            .with_attribute(http::METHOD, "POST")
-            .with_attribute(http::TARGET, full_path)
-            .with_attribute("agent.name", agent_name.clone())
-            .with_attribute(
-                "agent.sequence",
-                format!("{}/{}", agent_index + 1, agent_count),
-            )
-            .with_attribute(
-                "duration_ms",
-                format!("{:.2}", agent_elapsed.as_secs_f64() * 1000.0),
-            );
-
-        if !trace_id.is_empty() {
-            span_builder = span_builder.with_trace_id(trace_id.clone());
+            pipeline_processor
+                .invoke_agent(
+                    &chat_history,
+                    client_request.clone(),
+                    agent,
+                    &request_headers,
+                )
+                .await
         }
-        if let Some(parent_id) = parent_span_id.clone() {
-            span_builder = span_builder.with_parent_span_id(parent_id);
-        }
-
-        let span = span_builder.build();
-        trace_collector.record_span(operation_component::AGENT, span);
+        .instrument(agent_span.clone())
+        .await?;
 
         // If this is the last agent, return the streaming response
         if is_last_agent {
             info!(
-                "Completed agent chain, returning response from last agent: {}",
-                agent_name
+                agent = %agent_name,
+                "completed agent chain, returning response"
             );
-            return response_handler
-                .create_streaming_response(llm_response)
-                .await
-                .map_err(AgentFilterChainError::from);
+            // Capture the orchestrator span (parent of the agent span) so it
+            // stays open for the full streaming duration alongside the agent span.
+            let orchestrator_span = tracing::Span::current();
+            return async {
+                response_handler
+                    .create_streaming_response(
+                        llm_response,
+                        tracing::Span::current(), // agent span (inner)
+                        orchestrator_span,        // orchestrator span (outer)
+                    )
+                    .await
+                    .map_err(AgentFilterChainError::from)
+            }
+            .instrument(agent_span)
+            .await;
         }
 
         // For intermediate agents, collect the full response and pass to next agent
-        debug!(
-            "Collecting response from intermediate agent: {}",
-            agent_name
-        );
-        let response_text = response_handler.collect_full_response(llm_response).await?;
+        debug!(agent = %agent_name, "collecting response from intermediate agent");
+        let response_text = async { response_handler.collect_full_response(llm_response).await }
+            .instrument(agent_span)
+            .await?;
 
         info!(
-            "Agent {} completed, passing {} character response to next agent",
-            agent_name,
-            response_text.len()
+            agent = %agent_name,
+            response_len = response_text.len(),
+            "agent completed, passing response to next agent"
         );
 
         // remove last message and add new one at the end
diff --git a/crates/brightstaff/src/handlers/agent_selector.rs b/crates/brightstaff/src/handlers/agent_selector.rs
index 727dc6c0..faa734ee 100644
--- a/crates/brightstaff/src/handlers/agent_selector.rs
+++ b/crates/brightstaff/src/handlers/agent_selector.rs
@@ -75,7 +75,7 @@ impl AgentSelector {
             .cloned()
             .or_else(|| {
                 warn!(
-                    "No default agent found, routing request to first agent: {}",
+                    "no default agent found, routing request to first agent: {}",
                     agents[0].id
                 );
                 Some(agents[0].clone())
@@ -108,7 +108,6 @@ impl AgentSelector {
         &self,
         messages: &[Message],
         listener: &Listener,
-        trace_parent: Option<String>,
         request_id: Option<String>,
     ) -> Result<Vec<AgentFilterChain>, AgentSelectionError> {
         let agents = listener
@@ -118,7 +117,7 @@ impl AgentSelector {
 
         // If only one agent, skip orchestration
         if agents.len() == 1 {
-            debug!("Only one agent available, skipping orchestration");
+            debug!("only one agent available, skipping orchestration");
             return Ok(vec![agents[0].clone()]);
         }
 
@@ -132,15 +131,15 @@ impl AgentSelector {
 
         match self
             .orchestrator_service
-            .determine_orchestration(messages, trace_parent, Some(usage_preferences), request_id)
+            .determine_orchestration(messages, Some(usage_preferences), request_id)
             .await
         {
             Ok(Some(routes)) => {
-                debug!("Determined {} agent(s) via orchestration", routes.len());
+                debug!(count = routes.len(), "determined agents via orchestration");
                 let mut selected_agents = Vec::new();
 
                 for (route_name, agent_name) in routes {
-                    debug!("Processing route: {}, agent: {}", route_name, agent_name);
+                    debug!(route = %route_name, agent = %agent_name, "processing route");
                     let selected_agent = agents
                         .iter()
                         .find(|a| a.id == agent_name)
@@ -155,14 +154,14 @@ impl AgentSelector {
                 }
 
                 if selected_agents.is_empty() {
-                    debug!("No agents determined using orchestration, using default agent");
+                    debug!("no agents determined via orchestration, using default");
                     Ok(vec![self.get_default_agent(agents, &listener.name)?])
                 } else {
                     Ok(selected_agents)
                 }
             }
             Ok(None) => {
-                debug!("No agents determined using orchestration, using default agent");
+                debug!("no agents determined using orchestration, using default agent");
                 Ok(vec![self.get_default_agent(agents, &listener.name)?])
             }
             Err(err) => Err(AgentSelectionError::OrchestrationError(err.to_string())),
diff --git a/crates/brightstaff/src/handlers/function_calling.rs b/crates/brightstaff/src/handlers/function_calling.rs
index 7ba15e2d..0d6c6d3c 100644
--- a/crates/brightstaff/src/handlers/function_calling.rs
+++ b/crates/brightstaff/src/handlers/function_calling.rs
@@ -944,7 +944,7 @@ impl ArchFunctionHandler {
     ) -> Result<ChatCompletionsResponse> {
         use tracing::{error, info};
 
-        info!("[Arch-Function] - ChatCompletion");
+        info!("processing chat completion request");
 
         let messages = self.process_messages(
             &request.messages,
@@ -955,9 +955,9 @@ impl ArchFunctionHandler {
         )?;
 
         info!(
-            "[request to arch-fc]: model: {}, messages count: {}",
-            self.model_name,
-            messages.len()
+            model = %self.model_name,
+            message_count = messages.len(),
+            "sending request to arch-fc"
         );
 
         let use_agent_orchestrator = request
@@ -991,7 +991,7 @@ impl ArchFunctionHandler {
                     }
                 }
             }
-            info!("[Agent Orchestrator]: response received");
+            info!("agent orchestrator response received");
         } else if let Some(tools) = request.tools.as_ref() {
             let mut hallucination_state = HallucinationState::new(tools);
             let mut has_tool_calls = None;
@@ -1040,7 +1040,10 @@ impl ArchFunctionHandler {
             }
 
             if has_tool_calls == Some(true) && has_hallucination {
-                info!("[Hallucination]: {}", hallucination_state.error_message);
+                info!(
+                    "detected hallucination: {}",
+                    hallucination_state.error_message
+                );
 
                 let clarify_messages = self.prefill_message(messages.clone(), &self.clarify_prefix);
                 let clarify_request = self.create_request_with_extra_body(clarify_messages, false);
@@ -1075,8 +1078,8 @@ impl ArchFunctionHandler {
         let response_dict = self.parse_model_response(&model_response);
 
         info!(
-            "[arch-fc]: raw model response: {}",
-            response_dict.raw_response
+            raw_response = %response_dict.raw_response,
+            "arch-fc model response"
         );
 
         // General model response (no intent matched - should route to default target)
@@ -1126,7 +1129,7 @@ impl ArchFunctionHandler {
 
                         if verification.is_valid {
                             info!(
-                                "[Tool calls]: {:?}",
+                                "tool calls extracted: {:?}",
                                 response_dict
                                     .tool_calls
                                     .iter()
@@ -1143,7 +1146,7 @@ impl ArchFunctionHandler {
                                 tool_calls: Some(response_dict.tool_calls.clone()),
                             }
                         } else {
-                            error!("Invalid tool call - {}", verification.error_message);
+                            error!(error = %verification.error_message, "invalid tool call");
                             ResponseMessage {
                                 role: Role::Assistant,
                                 content: Some(String::new()),
@@ -1155,7 +1158,7 @@ impl ArchFunctionHandler {
                             }
                         }
                     } else {
-                        error!("Tool calls present but no tools provided in request");
+                        error!("tool calls present but no tools provided in request");
                         ResponseMessage {
                             role: Role::Assistant,
                             content: Some(String::new()),
@@ -1168,7 +1171,7 @@ impl ArchFunctionHandler {
                     }
                 } else {
                     info!(
-                        "[Tool calls]: {:?}",
+                        "tool calls extracted: {:?}",
                         response_dict
                             .tool_calls
                             .iter()
@@ -1187,8 +1190,8 @@ impl ArchFunctionHandler {
                 }
             } else {
                 error!(
-                    "Invalid tool calls in response: {}",
-                    response_dict.error_message
+                    error = %response_dict.error_message,
+                    "invalid tool calls in response"
                 );
                 ResponseMessage {
                     role: Role::Assistant,
@@ -1201,7 +1204,7 @@ impl ArchFunctionHandler {
                 }
             }
         } else {
-            error!("Invalid model response - {}", model_response);
+            error!(response = %model_response, "invalid model response");
             ResponseMessage {
                 role: Role::Assistant,
                 content: Some(String::new()),
@@ -1244,7 +1247,7 @@ impl ArchFunctionHandler {
             metadata: Some(metadata),
         };
 
-        info!("[response arch-fc]: {:?}", chat_completion_response);
+        info!(response = ?chat_completion_response, "arch-fc response");
 
         Ok(chat_completion_response)
     }
@@ -1331,7 +1334,7 @@ pub async fn function_calling_chat_handler(
     let mut body_json: Value = match serde_json::from_slice(&whole_body) {
         Ok(json) => json,
         Err(e) => {
-            error!("Failed to parse request body as JSON: {}", e);
+            error!(error = %e, "failed to parse request body as json");
             let mut response = Response::new(full(
                 serde_json::json!({
                     "error": format!("Invalid request body: {}", e)
@@ -1355,13 +1358,13 @@ pub async fn function_calling_chat_handler(
     let chat_request: ChatCompletionsRequest = match serde_json::from_value(body_json) {
         Ok(req) => {
             info!(
-                "[request body]: {}",
-                serde_json::to_string(&req).unwrap_or_default()
+                request_body = %serde_json::to_string(&req).unwrap_or_default(),
+                "received request"
             );
             req
         }
         Err(e) => {
-            error!("Failed to parse request body: {}", e);
+            error!(error = %e, "failed to parse request body");
             let mut response = Response::new(full(
                 serde_json::json!({
                     "error": format!("Invalid request body: {}", e)
@@ -1384,7 +1387,10 @@ pub async fn function_calling_chat_handler(
         .and_then(|v| v.as_bool())
         .unwrap_or(false);
 
-    info!("Use agent orchestrator: {}", use_agent_orchestrator);
+    info!(
+        use_agent_orchestrator = use_agent_orchestrator,
+        "handler mode"
+    );
 
     // Create the appropriate handler
     let handler_name = if use_agent_orchestrator {
@@ -1415,7 +1421,7 @@ pub async fn function_calling_chat_handler(
     match final_response {
         Ok(response_data) => {
             let response_json = serde_json::to_string(&response_data).unwrap_or_else(|e| {
-                error!("Failed to serialize response: {}", e);
+                error!(error = %e, "failed to serialize response");
                 serde_json::json!({"error": "Failed to serialize response"}).to_string()
             });
 
@@ -1428,7 +1434,7 @@ pub async fn function_calling_chat_handler(
             Ok(response)
         }
         Err(e) => {
-            error!("[{}] - Error in function calling: {}", handler_name, e);
+            error!(handler = handler_name, error = %e, "error in function calling");
 
             let error_response = serde_json::json!({
                 "error": format!("[{}] - Error in function calling: {}", handler_name, e)
diff --git a/crates/brightstaff/src/handlers/integration_tests.rs b/crates/brightstaff/src/handlers/integration_tests.rs
index 9239f94a..70eaacd7 100644
--- a/crates/brightstaff/src/handlers/integration_tests.rs
+++ b/crates/brightstaff/src/handlers/integration_tests.rs
@@ -112,15 +112,7 @@ mod tests {
 
         let headers = HeaderMap::new();
         let result = pipeline_processor
-            .process_filter_chain(
-                &request.messages,
-                &test_pipeline,
-                &agent_map,
-                &headers,
-                None,
-                String::new(),
-                String::new(),
-            )
+            .process_filter_chain(&request.messages, &test_pipeline, &agent_map, &headers)
             .await;
 
         println!("Pipeline processing result: {:?}", result);
diff --git a/crates/brightstaff/src/handlers/llm.rs b/crates/brightstaff/src/handlers/llm.rs
index e1fe5a93..b9fe0ba3 100644
--- a/crates/brightstaff/src/handlers/llm.rs
+++ b/crates/brightstaff/src/handlers/llm.rs
@@ -4,7 +4,6 @@ use common::consts::{
     ARCH_IS_STREAMING_HEADER, ARCH_PROVIDER_HINT_HEADER, REQUEST_ID_HEADER, TRACE_PARENT_HEADER,
 };
 use common::llm_providers::LlmProviders;
-use common::traces::TraceCollector;
 use hermesllm::apis::openai_responses::InputParam;
 use hermesllm::clients::{SupportedAPIsFromClient, SupportedUpstreamAPIs};
 use hermesllm::{ProviderRequest, ProviderRequestType};
@@ -12,10 +11,13 @@ use http_body_util::combinators::BoxBody;
 use http_body_util::{BodyExt, Full};
 use hyper::header::{self};
 use hyper::{Request, Response, StatusCode};
+use opentelemetry::global;
+use opentelemetry::trace::get_active_span;
+use opentelemetry_http::HeaderInjector;
 use std::collections::HashMap;
 use std::sync::Arc;
 use tokio::sync::RwLock;
-use tracing::{debug, info, warn};
+use tracing::{debug, info, info_span, warn, Instrument};
 
 use crate::handlers::router_chat::router_chat_get_upstream_model;
 use crate::handlers::utils::{
@@ -26,7 +28,7 @@ use crate::state::response_state_processor::ResponsesStateProcessor;
 use crate::state::{
     extract_input_items, retrieve_and_combine_input, StateStorage, StateStorageError,
 };
-use crate::tracing::operation_component;
+use crate::tracing::{operation_component, set_service_name};
 
 fn full<T: Into<Bytes>>(chunk: T) -> BoxBody<Bytes, hyper::Error> {
     Full::new(chunk.into())
@@ -40,7 +42,6 @@ pub async fn llm_chat(
     full_qualified_llm_provider_url: String,
     model_aliases: Arc<Option<HashMap<String, ModelAlias>>>,
     llm_providers: Arc<RwLock<LlmProviders>>,
-    trace_collector: Arc<TraceCollector>,
     state_storage: Option<Arc<dyn StateStorage>>,
 ) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
     let request_path = request.uri().path().to_string();
@@ -51,16 +52,49 @@ pub async fn llm_chat(
         .map(|s| s.to_string())
     {
         Some(id) => id,
-        None => {
-            let generated_id = uuid::Uuid::new_v4().to_string();
-            warn!(
-                "[PLANO_REQ_ID:{}] | REQUEST_ID header missing, generated new ID",
-                generated_id
-            );
-            generated_id
-        }
+        None => uuid::Uuid::new_v4().to_string(),
     };
 
+    // Create a span with request_id that will be included in all log lines
+    let request_span = info_span!(
+        "llm",
+        component = "llm",
+        request_id = %request_id,
+        http.method = %request.method(),
+        http.path = %request_path,
+    );
+
+    // Execute the rest of the handler inside the span
+    llm_chat_inner(
+        request,
+        router_service,
+        full_qualified_llm_provider_url,
+        model_aliases,
+        llm_providers,
+        state_storage,
+        request_id,
+        request_path,
+        request_headers,
+    )
+    .instrument(request_span)
+    .await
+}
+
+#[allow(clippy::too_many_arguments)]
+async fn llm_chat_inner(
+    request: Request<hyper::body::Incoming>,
+    router_service: Arc<RouterService>,
+    full_qualified_llm_provider_url: String,
+    model_aliases: Arc<Option<HashMap<String, ModelAlias>>>,
+    llm_providers: Arc<RwLock<LlmProviders>>,
+    state_storage: Option<Arc<dyn StateStorage>>,
+    request_id: String,
+    request_path: String,
+    mut request_headers: hyper::HeaderMap,
+) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
+    // Set service name for LLM operations
+    set_service_name(operation_component::LLM);
+
     // Extract or generate traceparent - this establishes the trace context for all spans
     let traceparent: String = match request_headers
         .get(TRACE_PARENT_HEADER)
@@ -73,20 +107,18 @@ pub async fn llm_chat(
             let trace_id = Uuid::new_v4().to_string().replace("-", "");
             let generated_tp = format!("00-{}-0000000000000000-01", trace_id);
             warn!(
-                "[PLANO_REQ_ID:{}] | TRACE_PARENT header missing, generated new traceparent: {}",
-                request_id, generated_tp
+                generated_traceparent = %generated_tp,
+                "TRACE_PARENT header missing, generated new traceparent"
             );
             generated_tp
         }
     };
 
-    let mut request_headers = request_headers;
     let chat_request_bytes = request.collect().await?.to_bytes();
 
     debug!(
-        "[PLANO_REQ_ID:{}] | REQUEST_BODY (UTF8): {}",
-        request_id,
-        String::from_utf8_lossy(&chat_request_bytes)
+        body = %String::from_utf8_lossy(&chat_request_bytes),
+        "request body received"
     );
 
     let mut client_request = match ProviderRequestType::try_from((
@@ -96,13 +128,10 @@ pub async fn llm_chat(
         Ok(request) => request,
         Err(err) => {
             warn!(
-                "[PLANO_REQ_ID:{}] | FAILURE | Failed to parse request as ProviderRequestType: {}",
-                request_id, err
-            );
-            let err_msg = format!(
-                "[PLANO_REQ_ID:{}] | FAILURE | Failed to parse request: {}",
-                request_id, err
+                error = %err,
+                "failed to parse request as ProviderRequestType"
             );
+            let err_msg = format!("Failed to parse request: {}", err);
             let mut bad_request = Response::new(full(err_msg));
             *bad_request.status_mut() = StatusCode::BAD_REQUEST;
             return Ok(bad_request);
@@ -120,18 +149,23 @@ pub async fn llm_chat(
     // Model alias resolution: update model field in client_request immediately
     // This ensures all downstream objects use the resolved model
     let model_from_request = client_request.model().to_string();
-    let temperature = client_request.get_temperature();
+    let _temperature = client_request.get_temperature();
     let is_streaming_request = client_request.is_streaming();
-    let resolved_model = resolve_model_alias(&model_from_request, &model_aliases);
+    let alias_resolved_model = resolve_model_alias(&model_from_request, &model_aliases);
 
     // Validate that the requested model exists in configuration
     // This matches the validation in llm_gateway routing.rs
-    if llm_providers.read().await.get(&resolved_model).is_none() {
+    if llm_providers
+        .read()
+        .await
+        .get(&alias_resolved_model)
+        .is_none()
+    {
         let err_msg = format!(
             "Model '{}' not found in configured providers",
-            resolved_model
+            alias_resolved_model
         );
-        warn!("[PLANO_REQ_ID:{}] | FAILURE | {}", request_id, err_msg);
+        warn!(model = %alias_resolved_model, "model not found in configured providers");
         let mut bad_request = Response::new(full(err_msg));
         *bad_request.status_mut() = StatusCode::BAD_REQUEST;
         return Ok(bad_request);
@@ -139,29 +173,26 @@ pub async fn llm_chat(
 
     // Handle provider/model slug format (e.g., "openai/gpt-4")
     // Extract just the model name for upstream (providers don't understand the slug)
-    let model_name_only = if let Some((_, model)) = resolved_model.split_once('/') {
+    let model_name_only = if let Some((_, model)) = alias_resolved_model.split_once('/') {
         model.to_string()
     } else {
-        resolved_model.clone()
+        alias_resolved_model.clone()
     };
 
     // Extract tool names and user message preview for span attributes
-    let tool_names = client_request.get_tool_names();
-    let user_message_preview = client_request
+    let _tool_names = client_request.get_tool_names();
+    let _user_message_preview = client_request
         .get_recent_user_message()
         .map(|msg| truncate_message(&msg, 50));
 
     // Extract messages for signal analysis (clone before moving client_request)
-    let messages_for_signals = client_request.get_messages();
+    let messages_for_signals = Some(client_request.get_messages());
 
     // Set the model to just the model name (without provider prefix)
     // This ensures upstream receives "gpt-4" not "openai/gpt-4"
     client_request.set_model(model_name_only.clone());
     if client_request.remove_metadata_key("archgw_preference_config") {
-        debug!(
-            "[PLANO_REQ_ID:{}] Removed archgw_preference_config from metadata",
-            request_id
-        );
+        debug!("removed archgw_preference_config from metadata");
     }
 
     // === v1/responses state management: Determine upstream API and combine input if needed ===
@@ -180,9 +211,9 @@ pub async fn llm_chat(
             // Get the upstream path and check if it's ResponsesAPI
             let upstream_path = get_upstream_path(
                 &llm_providers,
-                &resolved_model,
+                &alias_resolved_model,
                 &request_path,
-                &resolved_model,
+                &alias_resolved_model,
                 is_streaming_request,
             )
             .await;
@@ -209,14 +240,17 @@ pub async fn llm_chat(
                             // Update both the request and original_input_items
                             responses_req.input = InputParam::Items(combined_input.clone());
                             original_input_items = combined_input;
-                            info!("[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Updated request with conversation history ({} items)", request_id, original_input_items.len());
+                            info!(
+                                items = original_input_items.len(),
+                                "updated request with conversation history"
+                            );
                         }
                         Err(StateStorageError::NotFound(_)) => {
                             // Return 409 Conflict when previous_response_id not found
-                            warn!("[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Previous response_id not found: {}", request_id, prev_resp_id);
+                            warn!(previous_response_id = %prev_resp_id, "previous response_id not found");
                             let err_msg = format!(
-                                "[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Conversation state not found for previous_response_id: {}",
-                                request_id, prev_resp_id
+                                "Conversation state not found for previous_response_id: {}",
+                                prev_resp_id
                             );
                             let mut conflict_response = Response::new(full(err_msg));
                             *conflict_response.status_mut() = StatusCode::CONFLICT;
@@ -225,8 +259,9 @@ pub async fn llm_chat(
                         Err(e) => {
                             // Log warning but continue on other storage errors
                             warn!(
-                                "[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Failed to retrieve conversation state for {}: {}",
-                                request_id, prev_resp_id, e
+                                previous_response_id = %prev_resp_id,
+                                error = %e,
+                                "failed to retrieve conversation state"
                             );
                             // Restore original_input_items since we passed ownership
                             original_input_items = extract_input_items(&responses_req.input);
@@ -234,10 +269,7 @@ pub async fn llm_chat(
                     }
                 }
             } else {
-                debug!(
-                    "[PLANO_REQ_ID:{}] | BRIGHT_STAFF | Upstream supports ResponsesAPI natively.",
-                    request_id
-                );
+                debug!("upstream supports ResponsesAPI natively");
             }
         }
     }
@@ -246,14 +278,29 @@ pub async fn llm_chat(
     let client_request_bytes_for_upstream = ProviderRequestType::to_bytes(&client_request).unwrap();
 
     // Determine routing using the dedicated router_chat module
-    let routing_result = match router_chat_get_upstream_model(
-        router_service,
-        client_request, // Pass the original request - router_chat will convert it
-        trace_collector.clone(),
-        &traceparent,
-        &request_path,
-        &request_id,
-    )
+    // This gets its own span for latency and error tracking
+    let routing_span = info_span!(
+        "routing",
+        component = "routing",
+        http.method = "POST",
+        http.target = %request_path,
+        model.requested = %model_from_request,
+        model.alias_resolved = %alias_resolved_model,
+        route.selected_model = tracing::field::Empty,
+        routing.determination_ms = tracing::field::Empty,
+    );
+    let routing_result = match async {
+        set_service_name(operation_component::ROUTING);
+        router_chat_get_upstream_model(
+            router_service,
+            client_request, // Pass the original request - router_chat will convert it
+            &traceparent,
+            &request_path,
+            &request_id,
+        )
+        .await
+    }
+    .instrument(routing_span)
     .await
     {
         Ok(result) => result,
@@ -267,22 +314,36 @@ pub async fn llm_chat(
     // Determine final model to use
     // Router returns "none" as a sentinel value when it doesn't select a specific model
     let router_selected_model = routing_result.model_name;
-    let model_name = if router_selected_model != "none" {
+    let resolved_model = if router_selected_model != "none" {
         // Router selected a specific model via routing preferences
         router_selected_model
     } else {
         // Router returned "none" sentinel, use validated resolved_model from request
-        resolved_model.clone()
+        alias_resolved_model.clone()
     };
 
+    let span_name = if model_from_request == resolved_model {
+        format!("POST {} {}", request_path, resolved_model)
+    } else {
+        format!(
+            "POST {} {} -> {}",
+            request_path, model_from_request, resolved_model
+        )
+    };
+    get_active_span(|span| {
+        span.update_name(span_name.clone());
+    });
+
     debug!(
-        "[PLANO_REQ_ID:{}] | ARCH_ROUTER URL | {}, Provider Hint: {}, Model for upstream: {}",
-        request_id, full_qualified_llm_provider_url, model_name, model_name_only
+        url = %full_qualified_llm_provider_url,
+        provider_hint = %resolved_model,
+        upstream_model = %model_name_only,
+        "Routing to upstream"
     );
 
     request_headers.insert(
         ARCH_PROVIDER_HINT_HEADER,
-        header::HeaderValue::from_str(&model_name).unwrap(),
+        header::HeaderValue::from_str(&resolved_model).unwrap(),
     );
 
     request_headers.insert(
@@ -292,12 +353,18 @@ pub async fn llm_chat(
     // remove content-length header if it exists
     request_headers.remove(header::CONTENT_LENGTH);
 
+    // Inject current LLM span's trace context so upstream spans are children of plano(llm)
+    global::get_text_map_propagator(|propagator| {
+        let cx = tracing_opentelemetry::OpenTelemetrySpanExt::context(&tracing::Span::current());
+        propagator.inject_context(&cx, &mut HeaderInjector(&mut request_headers));
+    });
+
     // Capture start time right before sending request to upstream
     let request_start_time = std::time::Instant::now();
-    let request_start_system_time = std::time::SystemTime::now();
+    let _request_start_system_time = std::time::SystemTime::now();
 
     let llm_response = match reqwest::Client::new()
-        .post(full_qualified_llm_provider_url)
+        .post(&full_qualified_llm_provider_url)
         .headers(request_headers)
         .body(client_request_bytes_for_upstream)
         .send()
@@ -324,29 +391,12 @@ pub async fn llm_chat(
     // Build LLM span with actual status code using constants
     let byte_stream = llm_response.bytes_stream();
 
-    // Build the LLM span (will be finalized after streaming completes)
-    let llm_span = build_llm_span(
-        &traceparent,
-        &request_path,
-        &resolved_model,
-        &model_name,
-        upstream_status.as_u16(),
-        is_streaming_request,
-        request_start_system_time,
-        tool_names,
-        user_message_preview,
-        temperature,
-        &llm_providers,
-    )
-    .await;
-
     // Create base processor for metrics and tracing
     let base_processor = ObservableStreamProcessor::new(
-        trace_collector,
         operation_component::LLM,
-        llm_span,
+        span_name,
         request_start_time,
-        Some(messages_for_signals),
+        messages_for_signals,
     );
 
     // === v1/responses state management: Wrap with ResponsesStateProcessor ===
@@ -367,8 +417,8 @@ pub async fn llm_chat(
             base_processor,
             state_store,
             original_input_items,
+            alias_resolved_model.clone(),
             resolved_model.clone(),
-            model_name.clone(),
             is_streaming_request,
             false, // Not OpenAI upstream since should_manage_state is true
             content_encoding,
@@ -409,88 +459,6 @@ fn resolve_model_alias(
     model_from_request.to_string()
 }
 
-/// Builds the LLM span with all required and optional attributes.
-#[allow(clippy::too_many_arguments)]
-async fn build_llm_span(
-    traceparent: &str,
-    request_path: &str,
-    resolved_model: &str,
-    model_name: &str,
-    status_code: u16,
-    is_streaming: bool,
-    start_time: std::time::SystemTime,
-    tool_names: Option<Vec<String>>,
-    user_message_preview: Option<String>,
-    temperature: Option<f32>,
-    llm_providers: &Arc<RwLock<LlmProviders>>,
-) -> common::traces::Span {
-    use crate::tracing::{http, llm, OperationNameBuilder};
-    use common::traces::{parse_traceparent, SpanBuilder, SpanKind};
-
-    // Calculate the upstream path based on provider configuration
-    let upstream_path = get_upstream_path(
-        llm_providers,
-        model_name,
-        request_path,
-        resolved_model,
-        is_streaming,
-    )
-    .await;
-
-    // Build operation name showing path transformation if different
-    let operation_name = if request_path != upstream_path {
-        OperationNameBuilder::new()
-            .with_method("POST")
-            .with_path(format!("{} >> {}", request_path, upstream_path))
-            .with_target(resolved_model)
-            .build()
-    } else {
-        OperationNameBuilder::new()
-            .with_method("POST")
-            .with_path(request_path)
-            .with_target(resolved_model)
-            .build()
-    };
-
-    let (trace_id, parent_span_id) = parse_traceparent(traceparent);
-
-    let mut span_builder = SpanBuilder::new(&operation_name)
-        .with_trace_id(&trace_id)
-        .with_kind(SpanKind::Client)
-        .with_start_time(start_time)
-        .with_attribute(http::METHOD, "POST")
-        .with_attribute(http::STATUS_CODE, status_code.to_string())
-        .with_attribute(http::TARGET, request_path.to_string())
-        .with_attribute(http::UPSTREAM_TARGET, upstream_path)
-        .with_attribute(llm::MODEL_NAME, resolved_model.to_string())
-        .with_attribute(llm::IS_STREAMING, is_streaming.to_string());
-
-    // Only set parent span ID if it exists (not a root span)
-    if let Some(parent) = parent_span_id {
-        span_builder = span_builder.with_parent_span_id(&parent);
-    }
-
-    // Add optional attributes
-    if let Some(temp) = temperature {
-        span_builder = span_builder.with_attribute(llm::TEMPERATURE, temp.to_string());
-    }
-
-    if let Some(tools) = tool_names {
-        let formatted_tools = tools
-            .iter()
-            .map(|name| format!("{}(...)", name))
-            .collect::<Vec<_>>()
-            .join("\n");
-        span_builder = span_builder.with_attribute(llm::TOOLS, formatted_tools);
-    }
-
-    if let Some(preview) = user_message_preview {
-        span_builder = span_builder.with_attribute(llm::USER_MESSAGE_PREVIEW, preview);
-    }
-
-    span_builder.build()
-}
-
 /// Calculates the upstream path for the provider based on the model name.
 /// Looks up provider configuration, gets the ProviderId and base_url_path_prefix,
 /// then uses target_endpoint_for_provider to calculate the correct upstream path.
diff --git a/crates/brightstaff/src/handlers/pipeline_processor.rs b/crates/brightstaff/src/handlers/pipeline_processor.rs
index bc36de01..d9e87095 100644
--- a/crates/brightstaff/src/handlers/pipeline_processor.rs
+++ b/crates/brightstaff/src/handlers/pipeline_processor.rs
@@ -4,20 +4,18 @@ use common::configuration::{Agent, AgentFilterChain};
 use common::consts::{
     ARCH_UPSTREAM_HOST_HEADER, BRIGHT_STAFF_SERVICE_NAME, ENVOY_RETRY_HEADER, TRACE_PARENT_HEADER,
 };
-use common::traces::{generate_random_span_id, SpanBuilder, SpanKind};
 use hermesllm::apis::openai::Message;
 use hermesllm::{ProviderRequest, ProviderRequestType};
 use hyper::header::HeaderMap;
-use std::time::{Instant, SystemTime};
-use tracing::{debug, info, warn};
-
-use crate::tracing::operation_component::{self};
-use crate::tracing::{http, OperationNameBuilder};
+use opentelemetry::global;
+use opentelemetry_http::HeaderInjector;
+use tracing::{debug, info, instrument, warn};
 
 use crate::handlers::jsonrpc::{
     JsonRpcId, JsonRpcNotification, JsonRpcRequest, JsonRpcResponse, JSON_RPC_VERSION,
     MCP_INITIALIZE, MCP_INITIALIZE_NOTIFICATION, TOOL_CALL_METHOD,
 };
+use crate::tracing::{operation_component, set_service_name};
 use uuid::Uuid;
 
 /// Errors that can occur during pipeline processing
@@ -81,115 +79,14 @@ impl PipelineProcessor {
         }
     }
 
-    /// Record a span for filter execution
-    #[allow(clippy::too_many_arguments)]
-    fn record_filter_span(
-        &self,
-        collector: &std::sync::Arc<common::traces::TraceCollector>,
-        agent_name: &str,
-        tool_name: &str,
-        start_time: SystemTime,
-        end_time: SystemTime,
-        elapsed: std::time::Duration,
-        trace_id: String,
-        parent_span_id: String,
-        span_id: String,
-    ) -> String {
-        // let (trace_id, parent_span_id) = self.extract_trace_context();
-
-        // Build operation name: POST /agents/* {filter_name}
-        // Using generic path since we don't have access to specific endpoint here
-        let operation_name = OperationNameBuilder::new()
-            .with_method("POST")
-            .with_path("/agents/*")
-            .with_target(agent_name)
-            .build();
-
-        let mut span_builder = SpanBuilder::new(&operation_name)
-            .with_span_id(span_id.clone())
-            .with_kind(SpanKind::Client)
-            .with_start_time(start_time)
-            .with_end_time(end_time)
-            .with_attribute(http::METHOD, "POST")
-            .with_attribute(http::TARGET, "/agents/*")
-            .with_attribute("filter.name", agent_name.to_string())
-            .with_attribute("filter.tool_name", tool_name.to_string())
-            .with_attribute(
-                "duration_ms",
-                format!("{:.2}", elapsed.as_secs_f64() * 1000.0),
-            );
-
-        if !trace_id.is_empty() {
-            span_builder = span_builder.with_trace_id(trace_id);
-        }
-        if !parent_span_id.is_empty() {
-            span_builder = span_builder.with_parent_span_id(parent_span_id);
-        }
-
-        let span = span_builder.build();
-        // Use plano(filter) as service name for filter execution spans
-        collector.record_span(operation_component::AGENT_FILTER, span);
-        span_id.clone()
-    }
-
-    /// Record a span for MCP protocol interactions
-    #[allow(clippy::too_many_arguments)]
-    fn record_agent_filter_span(
-        &self,
-        collector: &std::sync::Arc<common::traces::TraceCollector>,
-        operation: &str,
-        agent_id: &str,
-        start_time: SystemTime,
-        end_time: SystemTime,
-        elapsed: std::time::Duration,
-        additional_attrs: Option<HashMap<&str, String>>,
-        trace_id: String,
-        parent_span_id: String,
-        span_id: Option<String>,
-    ) {
-        // let (trace_id, parent_span_id) = self.extract_trace_context();
-
-        // Build operation name: POST /mcp {agent_id}
-        let operation_name = OperationNameBuilder::new()
-            .with_method("POST")
-            .with_path("/mcp")
-            .with_operation(operation)
-            .with_target(agent_id)
-            .build();
-
-        let mut span_builder = SpanBuilder::new(&operation_name)
-            .with_span_id(span_id.unwrap_or_else(generate_random_span_id))
-            .with_kind(SpanKind::Client)
-            .with_start_time(start_time)
-            .with_end_time(end_time)
-            .with_attribute(http::METHOD, "POST")
-            .with_attribute(http::TARGET, format!("/mcp ({})", operation))
-            .with_attribute("mcp.operation", operation.to_string())
-            .with_attribute("mcp.agent_id", agent_id.to_string())
-            .with_attribute(
-                "duration_ms",
-                format!("{:.2}", elapsed.as_secs_f64() * 1000.0),
-            );
-
-        if let Some(attrs) = additional_attrs {
-            for (key, value) in attrs {
-                span_builder = span_builder.with_attribute(key, value);
-            }
-        }
-
-        if !trace_id.is_empty() {
-            span_builder = span_builder.with_trace_id(trace_id);
-        }
-        if !parent_span_id.is_empty() {
-            span_builder = span_builder.with_parent_span_id(parent_span_id);
-        }
-
-        let span = span_builder.build();
-        // MCP spans also use plano(filter) service name as they are part of filter operations
-        collector.record_span(operation_component::AGENT_FILTER, span);
-    }
-
-    /// Process the filter chain of agents (all except the terminal agent)
+    // /// Process the filter chain of agents (all except the terminal agent)
+    // #[instrument(
+    //     skip(self, chat_history, agent_filter_chain, agent_map, request_headers),
+    //     fields(
+    //         filter_count = agent_filter_chain.filter_chain.as_ref().map(|fc| fc.len()).unwrap_or(0),
+    //         message_count = chat_history.len()
+    //     )
+    // )]
     #[allow(clippy::too_many_arguments)]
     pub async fn process_filter_chain(
         &mut self,
@@ -197,9 +94,6 @@ impl PipelineProcessor {
         agent_filter_chain: &AgentFilterChain,
         agent_map: &HashMap<String, Agent>,
         request_headers: &HeaderMap,
-        trace_collector: Option<&std::sync::Arc<common::traces::TraceCollector>>,
-        trace_id: String,
-        parent_span_id: String,
     ) -> Result<Vec<Message>, PipelineError> {
         let mut chat_history_updated = chat_history.to_vec();
 
@@ -210,7 +104,7 @@ impl PipelineProcessor {
         };
 
         for agent_name in filter_chain {
-            debug!("Processing filter agent: {}", agent_name);
+            debug!(agent = %agent_name, "processing filter agent");
 
             let agent = agent_map
                 .get(agent_name)
@@ -219,68 +113,29 @@ impl PipelineProcessor {
             let tool_name = agent.tool.as_deref().unwrap_or(&agent.id);
 
             info!(
-                "executing filter: {}/{}, url: {}, type: {}, conversation length: {}",
-                agent_name,
-                tool_name,
-                agent.url,
-                agent.agent_type.as_deref().unwrap_or("mcp"),
-                chat_history.len()
+                agent = %agent_name,
+                tool = %tool_name,
+                url = %agent.url,
+                agent_type = %agent.agent_type.as_deref().unwrap_or("mcp"),
+                conversation_len = chat_history.len(),
+                "executing filter"
             );
 
-            let start_time = SystemTime::now();
-            let start_instant = Instant::now();
-
-            // Generate filter span ID before execution so MCP spans can use it as parent
-            let filter_span_id = generate_random_span_id();
-
             if agent.agent_type.as_deref().unwrap_or("mcp") == "mcp" {
                 chat_history_updated = self
-                    .execute_mcp_filter(
-                        &chat_history_updated,
-                        agent,
-                        request_headers,
-                        trace_collector,
-                        trace_id.clone(),
-                        filter_span_id.clone(),
-                    )
+                    .execute_mcp_filter(&chat_history_updated, agent, request_headers)
                     .await?;
             } else {
                 chat_history_updated = self
-                    .execute_http_filter(
-                        &chat_history_updated,
-                        agent,
-                        request_headers,
-                        trace_collector,
-                        trace_id.clone(),
-                        filter_span_id.clone(),
-                    )
+                    .execute_http_filter(&chat_history_updated, agent, request_headers)
                     .await?;
             }
 
-            let end_time = SystemTime::now();
-            let elapsed = start_instant.elapsed();
-
             info!(
-                "Filter '{}' completed in {:.2}ms, updated conversation length: {}",
-                agent_name,
-                elapsed.as_secs_f64() * 1000.0,
-                chat_history_updated.len()
+                agent = %agent_name,
+                updated_len = chat_history_updated.len(),
+                "filter completed"
             );
-
-            // Record span for this filter execution
-            if let Some(collector) = trace_collector {
-                self.record_filter_span(
-                    collector,
-                    agent_name,
-                    tool_name,
-                    start_time,
-                    end_time,
-                    elapsed,
-                    trace_id.clone(),
-                    parent_span_id.clone(),
-                    filter_span_id,
-                );
-            }
         }
 
         Ok(chat_history_updated)
@@ -292,18 +147,17 @@ impl PipelineProcessor {
         request_headers: &HeaderMap,
         agent_id: &str,
         session_id: Option<&str>,
-        trace_id: String,
-        parent_span_id: String,
     ) -> Result<HeaderMap, PipelineError> {
-        let trace_parent = format!("00-{}-{}-01", trace_id, parent_span_id);
         let mut headers = request_headers.clone();
         headers.remove(hyper::header::CONTENT_LENGTH);
 
+        // Inject OpenTelemetry trace context automatically
         headers.remove(TRACE_PARENT_HEADER);
-        headers.insert(
-            TRACE_PARENT_HEADER,
-            hyper::header::HeaderValue::from_str(&trace_parent).unwrap(),
-        );
+        global::get_text_map_propagator(|propagator| {
+            let cx =
+                tracing_opentelemetry::OpenTelemetrySpanExt::context(&tracing::Span::current());
+            propagator.inject_context(&cx, &mut HeaderInjector(&mut headers));
+        });
 
         headers.insert(
             ARCH_UPSTREAM_HOST_HEADER,
@@ -348,9 +202,9 @@ impl PipelineProcessor {
         // Validate SSE format: first line should be "event: message"
         if lines.is_empty() || lines[0] != "event: message" {
             warn!(
-                "Invalid SSE response format from agent {}: expected 'event: message' as first line, got: {:?}",
-                agent_id,
-                lines.first()
+                agent = %agent_id,
+                first_line = ?lines.first(),
+                "invalid SSE response format"
             );
             return Err(PipelineError::NoContentInResponse(format!(
                 "Invalid SSE response format from agent {}: expected 'event: message' as first line",
@@ -367,9 +221,9 @@ impl PipelineProcessor {
 
         if data_lines.len() != 1 {
             warn!(
-                "Expected exactly one 'data:' line from agent {}, found {}",
-                agent_id,
-                data_lines.len()
+                agent = %agent_id,
+                found = data_lines.len(),
+                "expected exactly one 'data:' line"
             );
             return Err(PipelineError::NoContentInResponse(format!(
                 "Expected exactly one 'data:' line from agent {}, found {}",
@@ -429,27 +283,34 @@ impl PipelineProcessor {
     }
 
     /// Send request to a specific agent and return the response content
+    #[instrument(
+        skip(self, messages, agent, request_headers),
+        fields(
+            agent_id = %agent.id,
+            filter_name = %agent.id,
+            message_count = messages.len()
+        )
+    )]
     async fn execute_mcp_filter(
         &mut self,
         messages: &[Message],
         agent: &Agent,
         request_headers: &HeaderMap,
-        trace_collector: Option<&std::sync::Arc<common::traces::TraceCollector>>,
-        trace_id: String,
-        filter_span_id: String,
     ) -> Result<Vec<Message>, PipelineError> {
+        // Set service name for this filter span
+        set_service_name(operation_component::AGENT_FILTER);
+
+        // Update current span name to include filter name
+        use opentelemetry::trace::get_active_span;
+        get_active_span(|span| {
+            span.update_name(format!("execute_mcp_filter ({})", agent.id));
+        });
+
         // Get or create MCP session
         let mcp_session_id = if let Some(session_id) = self.agent_id_session_map.get(&agent.id) {
             session_id.clone()
         } else {
-            let session_id = self
-                .get_new_session_id(
-                    &agent.id,
-                    trace_id.clone(),
-                    filter_span_id.clone(),
-                    request_headers,
-                )
-                .await;
+            let session_id = self.get_new_session_id(&agent.id, request_headers).await;
             self.agent_id_session_map
                 .insert(agent.id.clone(), session_id.clone());
             session_id
@@ -464,21 +325,9 @@ impl PipelineProcessor {
         let tool_name = agent.tool.as_deref().unwrap_or(&agent.id);
         let json_rpc_request = self.build_tool_call_request(tool_name, messages)?;
 
-        // Generate span ID for this MCP tool call (child of filter span)
-        let mcp_span_id = generate_random_span_id();
-
         // Build headers
-        let agent_headers = self.build_mcp_headers(
-            request_headers,
-            &agent.id,
-            Some(&mcp_session_id),
-            trace_id.clone(),
-            mcp_span_id.clone(),
-        )?;
-
-        // Send request with tracing
-        let start_time = SystemTime::now();
-        let start_instant = Instant::now();
+        let agent_headers =
+            self.build_mcp_headers(request_headers, &agent.id, Some(&mcp_session_id))?;
 
         let response = self
             .send_mcp_request(&json_rpc_request, &agent_headers, &agent.id)
@@ -486,31 +335,6 @@ impl PipelineProcessor {
         let http_status = response.status();
         let response_bytes = response.bytes().await?;
 
-        let end_time = SystemTime::now();
-        let elapsed = start_instant.elapsed();
-
-        // Record MCP tool call span
-        if let Some(collector) = trace_collector {
-            let mut attrs = HashMap::new();
-            attrs.insert("mcp.method", "tools/call".to_string());
-            attrs.insert("mcp.tool_name", tool_name.to_string());
-            attrs.insert("mcp.session_id", mcp_session_id.clone());
-            attrs.insert("http.status_code", http_status.as_u16().to_string());
-
-            self.record_agent_filter_span(
-                collector,
-                "tool_call",
-                &agent.id,
-                start_time,
-                end_time,
-                elapsed,
-                Some(attrs),
-                trace_id.clone(),
-                filter_span_id.clone(),
-                Some(mcp_span_id),
-            );
-        }
-
         // Handle HTTP errors
         if !http_status.is_success() {
             let error_body = String::from_utf8_lossy(&response_bytes).to_string();
@@ -611,8 +435,6 @@ impl PipelineProcessor {
         &self,
         agent_id: &str,
         session_id: &str,
-        trace_id: String,
-        parent_span_id: String,
         request_headers: &HeaderMap,
     ) -> Result<(), PipelineError> {
         let initialized_notification = JsonRpcNotification {
@@ -622,15 +444,9 @@ impl PipelineProcessor {
         };
 
         let notification_body = serde_json::to_string(&initialized_notification)?;
-        debug!("Sending initialized notification for agent {}", agent_id);
+        debug!("sending initialized notification for agent {}", agent_id);
 
-        let headers = self.build_mcp_headers(
-            request_headers,
-            agent_id,
-            Some(session_id),
-            trace_id.clone(),
-            parent_span_id.clone(),
-        )?;
+        let headers = self.build_mcp_headers(request_headers, agent_id, Some(session_id))?;
 
         let response = self
             .client
@@ -641,31 +457,19 @@ impl PipelineProcessor {
             .await?;
 
         info!(
-            "Initialized notification response status: {}",
+            "initialized notification response status: {}",
             response.status()
         );
 
         Ok(())
     }
 
-    async fn get_new_session_id(
-        &self,
-        agent_id: &str,
-        trace_id: String,
-        parent_span_id: String,
-        request_headers: &HeaderMap,
-    ) -> String {
-        info!("Initializing MCP session for agent {}", agent_id);
+    async fn get_new_session_id(&self, agent_id: &str, request_headers: &HeaderMap) -> String {
+        info!("initializing MCP session for agent {}", agent_id);
 
         let initialize_request = self.build_initialize_request();
         let headers = self
-            .build_mcp_headers(
-                request_headers,
-                agent_id,
-                None,
-                trace_id.clone(),
-                parent_span_id.clone(),
-            )
+            .build_mcp_headers(request_headers, agent_id, None)
             .expect("Failed to build headers for initialization");
 
         let response = self
@@ -673,7 +477,7 @@ impl PipelineProcessor {
             .await
             .expect("Failed to initialize MCP session");
 
-        info!("Initialize response status: {}", response.status());
+        info!("initialize response status: {}", response.status());
 
         let session_id = response
             .headers()
@@ -683,49 +487,54 @@ impl PipelineProcessor {
             .to_string();
 
         info!(
-            "Created new MCP session for agent {}: {}",
+            "created new MCP session for agent {}: {}",
             agent_id, session_id
         );
 
         // Send initialized notification
-        self.send_initialized_notification(
-            agent_id,
-            &session_id,
-            trace_id.clone(),
-            parent_span_id.clone(),
-            &headers,
-        )
-        .await
-        .expect("Failed to send initialized notification");
+        self.send_initialized_notification(agent_id, &session_id, &headers)
+            .await
+            .expect("Failed to send initialized notification");
 
         session_id
     }
 
     /// Execute a HTTP-based filter agent
+    #[instrument(
+        skip(self, messages, agent, request_headers),
+        fields(
+            agent_id = %agent.id,
+            agent_url = %agent.url,
+            filter_name = %agent.id,
+            message_count = messages.len()
+        )
+    )]
     async fn execute_http_filter(
         &mut self,
         messages: &[Message],
         agent: &Agent,
         request_headers: &HeaderMap,
-        trace_collector: Option<&std::sync::Arc<common::traces::TraceCollector>>,
-        trace_id: String,
-        filter_span_id: String,
     ) -> Result<Vec<Message>, PipelineError> {
-        let tool_name = agent.tool.as_deref().unwrap_or(&agent.id);
+        // Set service name for this filter span
+        set_service_name(operation_component::AGENT_FILTER);
 
-        // Generate span ID for this HTTP call (child of filter span)
-        let http_span_id = generate_random_span_id();
+        // Update current span name to include filter name
+        use opentelemetry::trace::get_active_span;
+        get_active_span(|span| {
+            span.update_name(format!("execute_http_filter ({})", agent.id));
+        });
 
         // Build headers
-        let trace_parent = format!("00-{}-{}-01", trace_id, http_span_id);
         let mut agent_headers = request_headers.clone();
         agent_headers.remove(hyper::header::CONTENT_LENGTH);
 
+        // Inject OpenTelemetry trace context automatically
         agent_headers.remove(TRACE_PARENT_HEADER);
-        agent_headers.insert(
-            TRACE_PARENT_HEADER,
-            hyper::header::HeaderValue::from_str(&trace_parent).unwrap(),
-        );
+        global::get_text_map_propagator(|propagator| {
+            let cx =
+                tracing_opentelemetry::OpenTelemetrySpanExt::context(&tracing::Span::current());
+            propagator.inject_context(&cx, &mut HeaderInjector(&mut agent_headers));
+        });
 
         agent_headers.insert(
             ARCH_UPSTREAM_HOST_HEADER,
@@ -748,10 +557,6 @@ impl PipelineProcessor {
             hyper::header::HeaderValue::from_static("application/json"),
         );
 
-        // Send request with tracing
-        let start_time = SystemTime::now();
-        let start_instant = Instant::now();
-
         debug!(
             "Sending HTTP request to agent {} at URL: {}",
             agent.id, agent.url
@@ -769,30 +574,6 @@ impl PipelineProcessor {
         let http_status = response.status();
         let response_bytes = response.bytes().await?;
 
-        let end_time = SystemTime::now();
-        let elapsed = start_instant.elapsed();
-
-        // Record HTTP call span
-        if let Some(collector) = trace_collector {
-            let mut attrs = HashMap::new();
-            attrs.insert("http.tool_name", tool_name.to_string());
-            attrs.insert("http.url", agent.url.clone());
-            attrs.insert("http.status_code", http_status.as_u16().to_string());
-
-            self.record_agent_filter_span(
-                collector,
-                "http_call",
-                &agent.id,
-                start_time,
-                end_time,
-                elapsed,
-                Some(attrs),
-                trace_id.clone(),
-                filter_span_id.clone(),
-                Some(http_span_id),
-            );
-        }
-
         // Handle HTTP errors
         if !http_status.is_success() {
             let error_body = String::from_utf8_lossy(&response_bytes).to_string();
@@ -825,34 +606,34 @@ impl PipelineProcessor {
     }
 
     /// Send request to terminal agent and return the raw response for streaming
+    /// Note: The caller is responsible for creating the plano(agent) span that wraps
+    /// both this call and the subsequent response consumption.
     pub async fn invoke_agent(
         &self,
         messages: &[Message],
         mut original_request: ProviderRequestType,
         terminal_agent: &Agent,
         request_headers: &HeaderMap,
-        trace_id: String,
-        agent_span_id: String,
     ) -> Result<reqwest::Response, PipelineError> {
         // let mut request = original_request.clone();
         original_request.set_messages(messages);
 
+        let request_url = "/v1/chat/completions";
+
         let request_body = ProviderRequestType::to_bytes(&original_request).unwrap();
         // let request_body = serde_json::to_string(&request)?;
-        debug!("Sending request to terminal agent {}", terminal_agent.id);
+        debug!("sending request to terminal agent {}", terminal_agent.id);
 
         let mut agent_headers = request_headers.clone();
         agent_headers.remove(hyper::header::CONTENT_LENGTH);
 
-        // Set traceparent header to make the egress span a child of the agent span
-        if !trace_id.is_empty() && !agent_span_id.is_empty() {
-            let trace_parent = format!("00-{}-{}-01", trace_id, agent_span_id);
-            agent_headers.remove(TRACE_PARENT_HEADER);
-            agent_headers.insert(
-                TRACE_PARENT_HEADER,
-                hyper::header::HeaderValue::from_str(&trace_parent).unwrap(),
-            );
-        }
+        // Inject OpenTelemetry trace context automatically
+        agent_headers.remove(TRACE_PARENT_HEADER);
+        global::get_text_map_propagator(|propagator| {
+            let cx =
+                tracing_opentelemetry::OpenTelemetrySpanExt::context(&tracing::Span::current());
+            propagator.inject_context(&cx, &mut HeaderInjector(&mut agent_headers));
+        });
 
         agent_headers.insert(
             ARCH_UPSTREAM_HOST_HEADER,
@@ -867,7 +648,7 @@ impl PipelineProcessor {
 
         let response = self
             .client
-            .post(format!("{}/v1/chat/completions", self.url))
+            .post(format!("{}{}", self.url, request_url))
             .headers(agent_headers)
             .body(request_body)
             .send()
@@ -914,15 +695,7 @@ mod tests {
         let pipeline = create_test_pipeline(vec!["nonexistent-agent", "terminal-agent"]);
 
         let result = processor
-            .process_filter_chain(
-                &messages,
-                &pipeline,
-                &agent_map,
-                &request_headers,
-                None,
-                String::new(),
-                String::new(),
-            )
+            .process_filter_chain(&messages, &pipeline, &agent_map, &request_headers)
             .await;
 
         assert!(result.is_err());
@@ -956,14 +729,7 @@ mod tests {
         let request_headers = HeaderMap::new();
 
         let result = processor
-            .execute_mcp_filter(
-                &messages,
-                &agent,
-                &request_headers,
-                None,
-                "trace-123".to_string(),
-                "span-123".to_string(),
-            )
+            .execute_mcp_filter(&messages, &agent, &request_headers)
             .await;
 
         match result {
@@ -1002,14 +768,7 @@ mod tests {
         let request_headers = HeaderMap::new();
 
         let result = processor
-            .execute_mcp_filter(
-                &messages,
-                &agent,
-                &request_headers,
-                None,
-                "trace-456".to_string(),
-                "span-456".to_string(),
-            )
+            .execute_mcp_filter(&messages, &agent, &request_headers)
             .await;
 
         match result {
@@ -1061,14 +820,7 @@ mod tests {
         let request_headers = HeaderMap::new();
 
         let result = processor
-            .execute_mcp_filter(
-                &messages,
-                &agent,
-                &request_headers,
-                None,
-                "trace-789".to_string(),
-                "span-789".to_string(),
-            )
+            .execute_mcp_filter(&messages, &agent, &request_headers)
             .await;
 
         match result {
diff --git a/crates/brightstaff/src/handlers/response_handler.rs b/crates/brightstaff/src/handlers/response_handler.rs
index d386df1e..e2561a8f 100644
--- a/crates/brightstaff/src/handlers/response_handler.rs
+++ b/crates/brightstaff/src/handlers/response_handler.rs
@@ -9,7 +9,7 @@ use hyper::{Response, StatusCode};
 use tokio::sync::mpsc;
 use tokio_stream::wrappers::ReceiverStream;
 use tokio_stream::StreamExt;
-use tracing::{info, warn};
+use tracing::{info, warn, Instrument};
 
 /// Errors that can occur during response handling
 #[derive(Debug, thiserror::Error)]
@@ -69,10 +69,14 @@ impl ResponseHandler {
         response
     }
 
-    /// Create a streaming response from a reqwest response
+    /// Create a streaming response from a reqwest response.
+    /// The spawned streaming task is instrumented with both `agent_span` and `orchestrator_span`
+    /// so their durations reflect the actual time spent streaming to the client.
     pub async fn create_streaming_response(
         &self,
         llm_response: reqwest::Response,
+        agent_span: tracing::Span,
+        orchestrator_span: tracing::Span,
     ) -> Result<Response<BoxBody<Bytes, hyper::Error>>, ResponseError> {
         // Copy headers from the original response
         let response_headers = llm_response.headers();
@@ -89,25 +93,30 @@ impl ResponseHandler {
         // Create channel for async streaming
         let (tx, rx) = mpsc::channel::<Bytes>(16);
 
-        // Spawn task to stream data
-        tokio::spawn(async move {
-            let mut byte_stream = llm_response.bytes_stream();
+        // Spawn streaming task instrumented with both spans (nested) so both
+        // remain entered for the full streaming duration.
+        tokio::spawn(
+            async move {
+                let mut byte_stream = llm_response.bytes_stream();
 
-            while let Some(item) = byte_stream.next().await {
-                let chunk = match item {
-                    Ok(chunk) => chunk,
-                    Err(err) => {
-                        warn!("Error receiving chunk: {:?}", err);
+                while let Some(item) = byte_stream.next().await {
+                    let chunk = match item {
+                        Ok(chunk) => chunk,
+                        Err(err) => {
+                            warn!(error = ?err, "error receiving chunk");
+                            break;
+                        }
+                    };
+
+                    if tx.send(chunk).await.is_err() {
+                        warn!("receiver dropped");
                         break;
                     }
-                };
-
-                if tx.send(chunk).await.is_err() {
-                    warn!("Receiver dropped");
-                    break;
                 }
             }
-        });
+            .instrument(agent_span)
+            .instrument(orchestrator_span),
+        );
 
         let stream = ReceiverStream::new(rx).map(|chunk| Ok::<_, hyper::Error>(Frame::data(chunk)));
         let stream_body = BoxBody::new(StreamBody::new(stream));
@@ -164,11 +173,11 @@ impl ResponseHandler {
                         if let Some(content) = provider_response.content_delta() {
                             accumulated_text.push_str(content);
                         } else {
-                            info!("No content delta in provider response");
+                            info!("no content delta in provider response");
                         }
                     }
                     Err(e) => {
-                        warn!("Failed to parse provider response: {:?}", e);
+                        warn!(error = ?e, "failed to parse provider response");
                     }
                 }
             }
@@ -248,7 +257,13 @@ mod tests {
         let llm_response = client.get(&(server.url() + "/test")).send().await.unwrap();
 
         let handler = ResponseHandler::new();
-        let result = handler.create_streaming_response(llm_response).await;
+        let result = handler
+            .create_streaming_response(
+                llm_response,
+                tracing::Span::current(),
+                tracing::Span::current(),
+            )
+            .await;
 
         mock.assert_async().await;
         assert!(result.is_ok());
diff --git a/crates/brightstaff/src/handlers/router_chat.rs b/crates/brightstaff/src/handlers/router_chat.rs
index c3a517e0..01c67a94 100644
--- a/crates/brightstaff/src/handlers/router_chat.rs
+++ b/crates/brightstaff/src/handlers/router_chat.rs
@@ -1,14 +1,12 @@
 use common::configuration::ModelUsagePreference;
-use common::traces::{parse_traceparent, SpanBuilder, SpanKind, TraceCollector};
 use hermesllm::clients::endpoints::SupportedUpstreamAPIs;
 use hermesllm::{ProviderRequest, ProviderRequestType};
 use hyper::StatusCode;
-use std::collections::HashMap;
 use std::sync::Arc;
 use tracing::{debug, info, warn};
 
 use crate::router::llm_router::RouterService;
-use crate::tracing::{http, operation_component, routing, OperationNameBuilder};
+use crate::tracing::routing;
 
 pub struct RoutingResult {
     pub model_name: String,
@@ -36,7 +34,6 @@ impl RoutingError {
 pub async fn router_chat_get_upstream_model(
     router_service: Arc<RouterService>,
     client_request: ProviderRequestType,
-    trace_collector: Arc<TraceCollector>,
     traceparent: &str,
     request_path: &str,
     request_id: &str,
@@ -56,14 +53,14 @@ pub async fn router_chat_get_upstream_model(
             | ProviderRequestType::BedrockConverseStream(_)
             | ProviderRequestType::ResponsesAPIRequest(_),
         ) => {
-            warn!("Unexpected: got non-ChatCompletions request after converting to OpenAI format");
+            warn!("unexpected: got non-ChatCompletions request after converting to OpenAI format");
             return Err(RoutingError::internal_error(
                 "Request conversion failed".to_string(),
             ));
         }
         Err(err) => {
             warn!(
-                "Failed to convert request to ChatCompletionsRequest: {}",
+                "failed to convert request to ChatCompletionsRequest: {}",
                 err
             );
             return Err(RoutingError::internal_error(format!(
@@ -74,9 +71,8 @@ pub async fn router_chat_get_upstream_model(
     };
 
     debug!(
-        "[PLANO_REQ_ID: {:?}]: ROUTER_REQ: {}",
-        request_id,
-        &serde_json::to_string(&chat_request).unwrap()
+        request = %serde_json::to_string(&chat_request).unwrap(),
+        "router request"
     );
 
     // Extract usage preferences from metadata
@@ -112,16 +108,14 @@ pub async fn router_chat_get_upstream_model(
     };
 
     info!(
-        "[PLANO_REQ_ID: {:?}] | ROUTER_REQ | Usage preferences from request: {}, request_path: {}, latest message: {}",
-        request_id,
-        usage_preferences.is_some(),
-        request_path,
-        latest_message_for_log
+        has_usage_preferences = usage_preferences.is_some(),
+        path = %request_path,
+        latest_message = %latest_message_for_log,
+        "processing router request"
     );
 
     // Capture start time for routing span
     let routing_start_time = std::time::Instant::now();
-    let routing_start_system_time = std::time::SystemTime::now();
 
     // Attempt to determine route using the router service
     let routing_result = router_service
@@ -133,41 +127,21 @@ pub async fn router_chat_get_upstream_model(
         )
         .await;
 
+    let determination_ms = routing_start_time.elapsed().as_millis() as i64;
+    let current_span = tracing::Span::current();
+    current_span.record(routing::ROUTE_DETERMINATION_MS, determination_ms);
+
     match routing_result {
         Ok(route) => match route {
             Some((_, model_name)) => {
-                // Record successful routing span
-                let mut attrs: HashMap<String, String> = HashMap::new();
-                attrs.insert("route.selected_model".to_string(), model_name.clone());
-                record_routing_span(
-                    trace_collector,
-                    traceparent,
-                    routing_start_time,
-                    routing_start_system_time,
-                    attrs,
-                )
-                .await;
-
+                current_span.record("route.selected_model", model_name.as_str());
                 Ok(RoutingResult { model_name })
             }
             None => {
                 // No route determined, return sentinel value "none"
                 // This signals to llm.rs to use the original validated request model
-                info!(
-                    "[PLANO_REQ_ID: {}] | ROUTER_REQ | No route determined, returning sentinel 'none'",
-                    request_id
-                );
-
-                let mut attrs = HashMap::new();
-                attrs.insert("route.selected_model".to_string(), "none".to_string());
-                record_routing_span(
-                    trace_collector,
-                    traceparent,
-                    routing_start_time,
-                    routing_start_system_time,
-                    attrs,
-                )
-                .await;
+                current_span.record("route.selected_model", "none");
+                info!("no route determined, using default model");
 
                 Ok(RoutingResult {
                     model_name: "none".to_string(),
@@ -175,19 +149,7 @@ pub async fn router_chat_get_upstream_model(
             }
         },
         Err(err) => {
-            // Record failed routing span
-            let mut attrs = HashMap::new();
-            attrs.insert("route.selected_model".to_string(), "unknown".to_string());
-            attrs.insert("error.message".to_string(), err.to_string());
-            record_routing_span(
-                trace_collector,
-                traceparent,
-                routing_start_time,
-                routing_start_system_time,
-                attrs,
-            )
-            .await;
-
+            current_span.record("route.selected_model", "unknown");
             Err(RoutingError::internal_error(format!(
                 "Failed to determine route: {}",
                 err
@@ -195,53 +157,3 @@ pub async fn router_chat_get_upstream_model(
         }
     }
 }
-
-/// Helper function to record a routing span with the given attributes.
-/// Reduces code duplication across different routing outcomes.
-async fn record_routing_span(
-    trace_collector: Arc<TraceCollector>,
-    traceparent: &str,
-    start_time: std::time::Instant,
-    start_system_time: std::time::SystemTime,
-    attrs: HashMap<String, String>,
-) {
-    // The routing always uses OpenAI Chat Completions format internally,
-    // so we log that as the actual API being used for routing
-    let routing_api_path = "/v1/chat/completions";
-
-    let routing_operation_name = OperationNameBuilder::new()
-        .with_method("POST")
-        .with_path(routing_api_path)
-        .with_target("Arch-Router-1.5B")
-        .build();
-
-    let (trace_id, parent_span_id) = parse_traceparent(traceparent);
-
-    // Build the routing span directly using constants
-    let mut span_builder = SpanBuilder::new(&routing_operation_name)
-        .with_trace_id(&trace_id)
-        .with_kind(SpanKind::Client)
-        .with_start_time(start_system_time)
-        .with_end_time(std::time::SystemTime::now())
-        .with_attribute(http::METHOD, "POST")
-        .with_attribute(http::TARGET, routing_api_path.to_string())
-        .with_attribute(
-            routing::ROUTE_DETERMINATION_MS,
-            start_time.elapsed().as_millis().to_string(),
-        );
-
-    // Only set parent span ID if it exists (not a root span)
-    if let Some(parent) = parent_span_id {
-        span_builder = span_builder.with_parent_span_id(&parent);
-    }
-
-    // Add all custom attributes
-    for (key, value) in attrs {
-        span_builder = span_builder.with_attribute(key, value);
-    }
-
-    let span = span_builder.build();
-
-    // Record the span directly to the collector
-    trace_collector.record_span(operation_component::ROUTING, span);
-}
diff --git a/crates/brightstaff/src/handlers/utils.rs b/crates/brightstaff/src/handlers/utils.rs
index f2a318a4..bff87ad4 100644
--- a/crates/brightstaff/src/handlers/utils.rs
+++ b/crates/brightstaff/src/handlers/utils.rs
@@ -1,18 +1,18 @@
 use bytes::Bytes;
-use common::traces::{Attribute, AttributeValue, Event, Span, TraceCollector};
 use http_body_util::combinators::BoxBody;
 use http_body_util::StreamBody;
 use hyper::body::Frame;
-use std::sync::Arc;
-use std::time::{Instant, SystemTime};
+use opentelemetry::trace::TraceContextExt;
+use opentelemetry::KeyValue;
+use std::time::Instant;
 use tokio::sync::mpsc;
 use tokio_stream::wrappers::ReceiverStream;
 use tokio_stream::StreamExt;
-use tracing::warn;
+use tracing::{info, warn, Instrument};
+use tracing_opentelemetry::OpenTelemetrySpanExt;
 
-// Import tracing constants and signals
 use crate::signals::{InteractionQuality, SignalAnalyzer, TextBasedSignalAnalyzer, FLAG_MARKER};
-use crate::tracing::{error, llm, signals as signal_constants};
+use crate::tracing::{llm, set_service_name, signals as signal_constants};
 use hermesllm::apis::openai::Message;
 
 /// Trait for processing streaming chunks
@@ -31,11 +31,10 @@ pub trait StreamProcessor: Send + 'static {
     fn on_error(&mut self, _error: &str) {}
 }
 
-/// A processor that tracks streaming metrics and finalizes the span
+/// A processor that tracks streaming metrics
 pub struct ObservableStreamProcessor {
-    collector: Arc<TraceCollector>,
     service_name: String,
-    span: Span,
+    operation_name: String,
     total_bytes: usize,
     chunk_count: usize,
     start_time: Instant,
@@ -47,22 +46,28 @@ impl ObservableStreamProcessor {
     /// Create a new passthrough processor
     ///
     /// # Arguments
-    /// * `collector` - The trace collector to record the span to
-    /// * `service_name` - The service name for this span (e.g., "archgw(llm)")
-    /// * `span` - The span to finalize after streaming completes
+    /// * `service_name` - The service name for this span (e.g., "plano(llm)")
+    ///   This will be set as the `service.name.override` attribute on the current span,
+    ///   allowing the ServiceNameOverrideExporter to route spans to different services.
+    /// * `operation_name` - The current span operation name (e.g., "POST /v1/chat/completions gpt-4")
+    ///   Used to append the flag marker when concerning signals are detected.
     /// * `start_time` - When the request started (for duration calculation)
     /// * `messages` - Optional conversation messages for signal analysis
     pub fn new(
-        collector: Arc<TraceCollector>,
         service_name: impl Into<String>,
-        span: Span,
+        operation_name: impl Into<String>,
         start_time: Instant,
         messages: Option<Vec<Message>>,
     ) -> Self {
+        let service_name = service_name.into();
+
+        // Set the service name override on the current span for OpenTelemetry export
+        // This allows the ServiceNameOverrideExporter to route this span to the correct service
+        set_service_name(&service_name);
+
         Self {
-            collector,
-            service_name: service_name.into(),
-            span,
+            service_name,
+            operation_name: operation_name.into(),
             total_bytes: 0,
             chunk_count: 0,
             start_time,
@@ -87,89 +92,81 @@ impl StreamProcessor for ObservableStreamProcessor {
     }
 
     fn on_complete(&mut self) {
-        // Update span with streaming metrics and end time
-        let end_time_nanos = SystemTime::now()
-            .duration_since(SystemTime::UNIX_EPOCH)
-            .unwrap_or_default()
-            .as_nanos();
-
-        self.span.end_time_unix_nano = format!("{}", end_time_nanos);
-
-        // Add streaming metrics as attributes using constants
-        self.span.attributes.push(Attribute {
-            key: llm::RESPONSE_BYTES.to_string(),
-            value: AttributeValue {
-                string_value: Some(self.total_bytes.to_string()),
-            },
-        });
-
-        self.span.attributes.push(Attribute {
-            key: llm::DURATION_MS.to_string(),
-            value: AttributeValue {
-                string_value: Some(self.start_time.elapsed().as_millis().to_string()),
-            },
-        });
-
-        // Add time to first token if available (streaming only)
+        // Record time-to-first-token as an OTel span attribute + event (streaming only)
         if let Some(ttft) = self.time_to_first_token {
-            self.span.attributes.push(Attribute {
-                key: llm::TIME_TO_FIRST_TOKEN_MS.to_string(),
-                value: AttributeValue {
-                    string_value: Some(ttft.to_string()),
-                },
-            });
-
-            // Add time to first token as a span event
-            // Calculate the timestamp by adding ttft duration to span start time
-            if let Ok(start_time_nanos) = self.span.start_time_unix_nano.parse::<u128>() {
-                // Convert ttft from milliseconds to nanoseconds and add to start time
-                let event_timestamp = start_time_nanos + (ttft * 1_000_000);
-                let mut event =
-                    Event::new(llm::TIME_TO_FIRST_TOKEN_MS.to_string(), event_timestamp);
-                event.add_attribute(llm::TIME_TO_FIRST_TOKEN_MS.to_string(), ttft.to_string());
-
-                // Initialize events vector if needed
-                if self.span.events.is_none() {
-                    self.span.events = Some(Vec::new());
-                }
-
-                if let Some(ref mut events) = self.span.events {
-                    events.push(event);
-                }
-            }
+            let span = tracing::Span::current();
+            let otel_context = span.context();
+            let otel_span = otel_context.span();
+            otel_span.set_attribute(KeyValue::new(llm::TIME_TO_FIRST_TOKEN_MS, ttft as i64));
+            otel_span.add_event(
+                llm::TIME_TO_FIRST_TOKEN_MS,
+                vec![KeyValue::new(llm::TIME_TO_FIRST_TOKEN_MS, ttft as i64)],
+            );
         }
 
-        // Analyze signals if messages are available and add to span attributes
+        // Analyze signals if messages are available and record as span attributes
         if let Some(ref messages) = self.messages {
             let analyzer: Box<dyn SignalAnalyzer> = Box::new(TextBasedSignalAnalyzer::new());
             let report = analyzer.analyze(messages);
 
+            // Get the current OTel span to set signal attributes
+            let span = tracing::Span::current();
+            let otel_context = span.context();
+            let otel_span = otel_context.span();
+
             // Add overall quality
-            self.span.attributes.push(Attribute {
-                key: signal_constants::QUALITY.to_string(),
-                value: AttributeValue {
-                    string_value: Some(format!("{:?}", report.overall_quality)),
-                },
-            });
+            otel_span.set_attribute(KeyValue::new(
+                signal_constants::QUALITY,
+                format!("{:?}", report.overall_quality),
+            ));
 
             // Add repair/follow-up metrics if concerning
             if report.follow_up.is_concerning || report.follow_up.repair_count > 0 {
-                self.span.attributes.push(Attribute {
-                    key: signal_constants::REPAIR_COUNT.to_string(),
-                    value: AttributeValue {
-                        string_value: Some(report.follow_up.repair_count.to_string()),
-                    },
-                });
-
-                self.span.attributes.push(Attribute {
-                    key: signal_constants::REPAIR_RATIO.to_string(),
-                    value: AttributeValue {
-                        string_value: Some(format!("{:.3}", report.follow_up.repair_ratio)),
-                    },
-                });
+                otel_span.set_attribute(KeyValue::new(
+                    signal_constants::REPAIR_COUNT,
+                    report.follow_up.repair_count as i64,
+                ));
+                otel_span.set_attribute(KeyValue::new(
+                    signal_constants::REPAIR_RATIO,
+                    format!("{:.3}", report.follow_up.repair_ratio),
+                ));
             }
 
-            // Add flag marker to operation name if any concerning signal is detected
+            // Add frustration metrics
+            if report.frustration.has_frustration {
+                otel_span.set_attribute(KeyValue::new(
+                    signal_constants::FRUSTRATION_COUNT,
+                    report.frustration.frustration_count as i64,
+                ));
+                otel_span.set_attribute(KeyValue::new(
+                    signal_constants::FRUSTRATION_SEVERITY,
+                    report.frustration.severity as i64,
+                ));
+            }
+
+            // Add repetition metrics
+            if report.repetition.has_looping {
+                otel_span.set_attribute(KeyValue::new(
+                    signal_constants::REPETITION_COUNT,
+                    report.repetition.repetition_count as i64,
+                ));
+            }
+
+            // Add escalation metrics
+            if report.escalation.escalation_requested {
+                otel_span
+                    .set_attribute(KeyValue::new(signal_constants::ESCALATION_REQUESTED, true));
+            }
+
+            // Add positive feedback metrics
+            if report.positive_feedback.has_positive_feedback {
+                otel_span.set_attribute(KeyValue::new(
+                    signal_constants::POSITIVE_FEEDBACK_COUNT,
+                    report.positive_feedback.positive_count as i64,
+                ));
+            }
+
+            // Flag the span name if any concerning signal is detected
             let should_flag = report.frustration.has_frustration
                 || report.repetition.has_looping
                 || report.escalation.escalation_requested
@@ -179,94 +176,27 @@ impl StreamProcessor for ObservableStreamProcessor {
                 );
 
             if should_flag {
-                // Prepend flag marker to the operation name
-                self.span.name = format!("{} {}", self.span.name, FLAG_MARKER);
-            }
-
-            // Add key signal metrics
-            if report.frustration.has_frustration {
-                self.span.attributes.push(Attribute {
-                    key: signal_constants::FRUSTRATION_COUNT.to_string(),
-                    value: AttributeValue {
-                        string_value: Some(report.frustration.frustration_count.to_string()),
-                    },
-                });
-                self.span.attributes.push(Attribute {
-                    key: signal_constants::FRUSTRATION_SEVERITY.to_string(),
-                    value: AttributeValue {
-                        string_value: Some(report.frustration.severity.to_string()),
-                    },
-                });
-            }
-
-            if report.repetition.has_looping {
-                self.span.attributes.push(Attribute {
-                    key: signal_constants::REPETITION_COUNT.to_string(),
-                    value: AttributeValue {
-                        string_value: Some(report.repetition.repetition_count.to_string()),
-                    },
-                });
-            }
-
-            if report.escalation.escalation_requested {
-                self.span.attributes.push(Attribute {
-                    key: signal_constants::ESCALATION_REQUESTED.to_string(),
-                    value: AttributeValue {
-                        string_value: Some("true".to_string()),
-                    },
-                });
-            }
-
-            if report.positive_feedback.has_positive_feedback {
-                self.span.attributes.push(Attribute {
-                    key: signal_constants::POSITIVE_FEEDBACK_COUNT.to_string(),
-                    value: AttributeValue {
-                        string_value: Some(report.positive_feedback.positive_count.to_string()),
-                    },
-                });
+                otel_span.update_name(format!("{} {}", self.operation_name, FLAG_MARKER));
             }
         }
 
-        // Record the finalized span
-        self.collector
-            .record_span(&self.service_name, self.span.clone());
+        info!(
+            service = %self.service_name,
+            total_bytes = self.total_bytes,
+            chunk_count = self.chunk_count,
+            duration_ms = self.start_time.elapsed().as_millis(),
+            time_to_first_token_ms = ?self.time_to_first_token,
+            "streaming completed"
+        );
     }
 
     fn on_error(&mut self, error_msg: &str) {
-        warn!("Stream error in PassthroughProcessor: {}", error_msg);
-
-        // Update span with error info and end time
-        let end_time_nanos = SystemTime::now()
-            .duration_since(SystemTime::UNIX_EPOCH)
-            .unwrap_or_default()
-            .as_nanos();
-
-        self.span.end_time_unix_nano = format!("{}", end_time_nanos);
-
-        self.span.attributes.push(Attribute {
-            key: error::ERROR.to_string(),
-            value: AttributeValue {
-                string_value: Some("true".to_string()),
-            },
-        });
-
-        self.span.attributes.push(Attribute {
-            key: error::MESSAGE.to_string(),
-            value: AttributeValue {
-                string_value: Some(error_msg.to_string()),
-            },
-        });
-
-        self.span.attributes.push(Attribute {
-            key: llm::DURATION_MS.to_string(),
-            value: AttributeValue {
-                string_value: Some(self.start_time.elapsed().as_millis().to_string()),
-            },
-        });
-
-        // Record the error span
-        self.collector
-            .record_span(&self.service_name, self.span.clone());
+        warn!(
+            service = %self.service_name,
+            error = error_msg,
+            duration_ms = self.start_time.elapsed().as_millis(),
+            "stream error"
+        );
     }
 }
 
@@ -287,49 +217,55 @@ where
 {
     let (tx, rx) = mpsc::channel::<Bytes>(buffer_size);
 
+    // Capture the current span so the spawned task inherits the request context
+    let current_span = tracing::Span::current();
+
     // Spawn a task to process and forward chunks
-    let processor_handle = tokio::spawn(async move {
-        let mut is_first_chunk = true;
+    let processor_handle = tokio::spawn(
+        async move {
+            let mut is_first_chunk = true;
 
-        while let Some(item) = byte_stream.next().await {
-            let chunk = match item {
-                Ok(chunk) => chunk,
-                Err(err) => {
-                    let err_msg = format!("Error receiving chunk: {:?}", err);
-                    warn!("{}", err_msg);
-                    processor.on_error(&err_msg);
-                    break;
+            while let Some(item) = byte_stream.next().await {
+                let chunk = match item {
+                    Ok(chunk) => chunk,
+                    Err(err) => {
+                        let err_msg = format!("Error receiving chunk: {:?}", err);
+                        warn!(error = %err_msg, "stream error");
+                        processor.on_error(&err_msg);
+                        break;
+                    }
+                };
+
+                // Call on_first_bytes for the first chunk
+                if is_first_chunk {
+                    processor.on_first_bytes();
+                    is_first_chunk = false;
                 }
-            };
 
-            // Call on_first_bytes for the first chunk
-            if is_first_chunk {
-                processor.on_first_bytes();
-                is_first_chunk = false;
-            }
-
-            // Process the chunk
-            match processor.process_chunk(chunk) {
-                Ok(Some(processed_chunk)) => {
-                    if tx.send(processed_chunk).await.is_err() {
-                        warn!("Receiver dropped");
+                // Process the chunk
+                match processor.process_chunk(chunk) {
+                    Ok(Some(processed_chunk)) => {
+                        if tx.send(processed_chunk).await.is_err() {
+                            warn!("receiver dropped");
+                            break;
+                        }
+                    }
+                    Ok(None) => {
+                        // Skip this chunk
+                        continue;
+                    }
+                    Err(err) => {
+                        warn!("processor error: {}", err);
+                        processor.on_error(&err);
                         break;
                     }
                 }
-                Ok(None) => {
-                    // Skip this chunk
-                    continue;
-                }
-                Err(err) => {
-                    warn!("Processor error: {}", err);
-                    processor.on_error(&err);
-                    break;
-                }
             }
-        }
 
-        processor.on_complete();
-    });
+            processor.on_complete();
+        }
+        .instrument(current_span),
+    );
 
     // Convert channel receiver to HTTP stream
     let stream = ReceiverStream::new(rx).map(|chunk| Ok::<_, hyper::Error>(Frame::data(chunk)));
diff --git a/crates/brightstaff/src/main.rs b/crates/brightstaff/src/main.rs
index b8fa8832..3469d8bd 100644
--- a/crates/brightstaff/src/main.rs
+++ b/crates/brightstaff/src/main.rs
@@ -14,7 +14,6 @@ use common::consts::{
     CHAT_COMPLETIONS_PATH, MESSAGES_PATH, OPENAI_RESPONSES_API_PATH, PLANO_ORCHESTRATOR_MODEL_NAME,
 };
 use common::llm_providers::LlmProviders;
-use common::traces::TraceCollector;
 use http_body_util::{combinators::BoxBody, BodyExt, Empty};
 use hyper::body::Incoming;
 use hyper::server::conn::http1;
@@ -51,13 +50,12 @@ fn empty() -> BoxBody<Bytes, hyper::Error> {
 
 #[tokio::main]
 async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
-    let _tracer_provider = init_tracer();
     let bind_address = env::var("BIND_ADDRESS").unwrap_or_else(|_| BIND_ADDRESS.to_string());
 
-    // loading arch_config.yaml file
+    // loading arch_config.yaml file (before tracing init so we can read tracing config)
     let arch_config_path = env::var("ARCH_CONFIG_PATH_RENDERED")
         .unwrap_or_else(|_| "./arch_config_rendered.yaml".to_string());
-    info!("Loading arch_config.yaml from {}", arch_config_path);
+    eprintln!("loading arch_config.yaml from {}", arch_config_path);
 
     let config_contents =
         fs::read_to_string(&arch_config_path).expect("Failed to read arch_config.yaml");
@@ -65,6 +63,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
     let config: Configuration =
         serde_yaml::from_str(&config_contents).expect("Failed to parse arch_config.yaml");
 
+    // Initialize tracing using config.yaml tracing section
+    let _tracer_provider = init_tracer(config.tracing.as_ref());
+    info!(path = %arch_config_path, "loaded arch_config.yaml");
+
     let arch_config = Arc::new(config);
 
     // combine agents and filters into a single list of agents
@@ -116,17 +118,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
     // Initialize trace collector and start background flusher
     // Tracing is enabled if the tracing config is present in arch_config.yaml
     // Pass Some(true/false) to override, or None to use env var OTEL_TRACING_ENABLED
-    let tracing_enabled = if arch_config.tracing.is_some() {
-        info!("Tracing configuration found in arch_config.yaml");
-        Some(true)
-    } else {
-        info!(
-            "No tracing configuration in arch_config.yaml, will check OTEL_TRACING_ENABLED env var"
-        );
-        None
-    };
-    let trace_collector = Arc::new(TraceCollector::new(tracing_enabled));
-    let _flusher_handle = trace_collector.clone().start_background_flusher();
+    // OpenTelemetry automatic instrumentation is configured in utils/tracing.rs
 
     // Initialize conversation state storage for v1/responses
     // Configurable via arch_config.yaml state_storage section
@@ -136,7 +128,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
         if let Some(storage_config) = &arch_config.state_storage {
             let storage: Arc<dyn StateStorage> = match storage_config.storage_type {
                 common::configuration::StateStorageType::Memory => {
-                    info!("Initialized conversation state storage: Memory");
+                    info!(
+                        storage_type = "memory",
+                        "initialized conversation state storage"
+                    );
                     Arc::new(MemoryConversationalStorage::new())
                 }
                 common::configuration::StateStorageType::Postgres => {
@@ -145,8 +140,11 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
                         .as_ref()
                         .expect("connection_string is required for postgres state_storage");
 
-                    debug!("Postgres connection string (full): {}", connection_string);
-                    info!("Initializing conversation state storage: Postgres");
+                    debug!(connection_string = %connection_string, "postgres connection");
+                    info!(
+                        storage_type = "postgres",
+                        "initializing conversation state storage"
+                    );
                     Arc::new(
                         PostgreSQLConversationStorage::new(connection_string.clone())
                             .await
@@ -156,7 +154,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
             };
             Some(storage)
         } else {
-            info!("No state_storage configured - conversation state management disabled");
+            info!("no state_storage configured, conversation state management disabled");
             None
         };
 
@@ -175,7 +173,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
         let llm_providers = llm_providers.clone();
         let agents_list = combined_agents_filters_list.clone();
         let listeners = listeners.clone();
-        let trace_collector = trace_collector.clone();
         let state_storage = state_storage.clone();
         let service = service_fn(move |req| {
             let router_service = Arc::clone(&router_service);
@@ -186,7 +183,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
             let model_aliases = Arc::clone(&model_aliases);
             let agents_list = agents_list.clone();
             let listeners = listeners.clone();
-            let trace_collector = trace_collector.clone();
             let state_storage = state_storage.clone();
 
             async move {
@@ -206,7 +202,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
                             fully_qualified_url,
                             agents_list,
                             listeners,
-                            trace_collector,
                         )
                         .with_context(parent_cx)
                         .await;
@@ -224,7 +219,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
                             fully_qualified_url,
                             model_aliases,
                             llm_providers,
-                            trace_collector,
                             state_storage,
                         )
                         .with_context(parent_cx)
@@ -265,7 +259,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
                         Ok(response)
                     }
                     _ => {
-                        debug!("No route for {} {}", req.method(), req.uri().path());
+                        debug!(method = %req.method(), path = %req.uri().path(), "no route found");
                         let mut not_found = Response::new(empty());
                         *not_found.status_mut() = StatusCode::NOT_FOUND;
                         Ok(not_found)
@@ -275,13 +269,13 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
         });
 
         tokio::task::spawn(async move {
-            debug!("Accepted connection from {:?}", peer_addr);
+            debug!(peer = ?peer_addr, "accepted connection");
             if let Err(err) = http1::Builder::new()
                 // .serve_connection(io, service_fn(chat_completion))
                 .serve_connection(io, service)
                 .await
             {
-                warn!("Error serving connection: {:?}", err);
+                warn!(error = ?err, "error serving connection");
             }
         });
     }
diff --git a/crates/brightstaff/src/router/llm_router.rs b/crates/brightstaff/src/router/llm_router.rs
index 743630e6..ec3fe3ab 100644
--- a/crates/brightstaff/src/router/llm_router.rs
+++ b/crates/brightstaff/src/router/llm_router.rs
@@ -96,14 +96,14 @@ impl RouterService {
             .generate_request(messages, &usage_preferences);
 
         debug!(
-            "sending request to arch-router model: {}, endpoint: {}",
-            self.router_model.get_model_name(),
-            self.router_url
+            model = %self.router_model.get_model_name(),
+            endpoint = %self.router_url,
+            "sending request to arch-router"
         );
 
         debug!(
-            "arch request body: {}",
-            &serde_json::to_string(&router_request).unwrap(),
+            body = %serde_json::to_string(&router_request).unwrap(),
+            "arch router request"
         );
 
         let mut llm_route_request_headers = header::HeaderMap::new();
@@ -148,9 +148,9 @@ impl RouterService {
             Ok(response) => response,
             Err(err) => {
                 warn!(
-                    "Failed to parse JSON: {}. Body: {}",
-                    err,
-                    &serde_json::to_string(&body).unwrap()
+                    error = %err,
+                    body = %serde_json::to_string(&body).unwrap(),
+                    "failed to parse json response"
                 );
                 return Err(RoutingError::JsonError(
                     err,
@@ -160,7 +160,7 @@ impl RouterService {
         };
 
         if chat_completion_response.choices.is_empty() {
-            warn!("No choices in router response: {}", body);
+            warn!(body = %body, "no choices in router response");
             return Ok(None);
         }
 
@@ -169,10 +169,10 @@ impl RouterService {
                 .router_model
                 .parse_response(content, &usage_preferences)?;
             info!(
-                "arch-router determined route: {}, selected_model: {:?}, response time: {}ms",
-                content.replace("\n", "\\n"),
-                parsed_response,
-                router_response_time.as_millis()
+                content = %content.replace("\n", "\\n"),
+                selected_model = ?parsed_response,
+                response_time_ms = router_response_time.as_millis(),
+                "arch-router determined route"
             );
 
             if let Some(ref parsed_response) = parsed_response {
diff --git a/crates/brightstaff/src/router/orchestrator_model_v1.rs b/crates/brightstaff/src/router/orchestrator_model_v1.rs
index 8d64f8e7..c6d3d56d 100644
--- a/crates/brightstaff/src/router/orchestrator_model_v1.rs
+++ b/crates/brightstaff/src/router/orchestrator_model_v1.rs
@@ -197,12 +197,12 @@ impl OrchestratorModel for OrchestratorModelV1 {
             token_count += message_token_count;
             if token_count > self.max_token_length {
                 debug!(
-                      "OrchestratorModelV1: token count {} exceeds max token length {}, truncating conversation, selected message count {}, total message count: {}",
-                      token_count,
-                      self.max_token_length
-                      , selected_messsage_count,
-                      messages_vec.len()
-                  );
+                    token_count = token_count,
+                    max_tokens = self.max_token_length,
+                    selected = selected_messsage_count,
+                    total = messages_vec.len(),
+                    "token count exceeds max, truncating conversation"
+                );
                 if message.role == Role::User {
                     // If message that exceeds max token length is from user, we need to keep it
                     selected_messages_list_reversed.push(message);
@@ -214,9 +214,7 @@ impl OrchestratorModel for OrchestratorModelV1 {
         }
 
         if selected_messages_list_reversed.is_empty() {
-            debug!(
-                "OrchestratorModelV1: no messages selected, using the last message in the conversation"
-            );
+            debug!("no messages selected, using last message");
             if let Some(last_message) = messages_vec.last() {
                 selected_messages_list_reversed.push(last_message);
             }
@@ -228,12 +226,12 @@ impl OrchestratorModel for OrchestratorModelV1 {
         // - last() is the first message in the original conversation
         if let Some(first_message) = selected_messages_list_reversed.first() {
             if first_message.role != Role::User {
-                warn!("OrchestratorModelV1: last message in the conversation is not from user, this may lead to incorrect orchestration");
+                warn!("last message is not from user, may lead to incorrect orchestration");
             }
         }
         if let Some(last_message) = selected_messages_list_reversed.last() {
             if last_message.role != Role::User {
-                warn!("OrchestratorModelV1: first message in the selected conversation is not from user, this may lead to incorrect orchestration");
+                warn!("first message is not from user, may lead to incorrect orchestration");
             }
         }
 
@@ -323,8 +321,9 @@ impl OrchestratorModel for OrchestratorModelV1 {
                     result.push((selected_route, model_name));
                 } else {
                     warn!(
-                        "No matching model found for route: {}, usage preferences: {:?}",
-                        selected_route, usage_preferences
+                        route = %selected_route,
+                        preferences = ?usage_preferences,
+                        "no matching model found for route"
                     );
                 }
             }
@@ -339,8 +338,9 @@ impl OrchestratorModel for OrchestratorModelV1 {
                     result.push((selected_route, model));
                 } else {
                     warn!(
-                        "No model found for route: {}, orchestrator model preferences: {:?}",
-                        selected_route, self.agent_orchestration_to_model_map
+                        route = %selected_route,
+                        preferences = ?self.agent_orchestration_to_model_map,
+                        "no model found for route"
                     );
                 }
             }
diff --git a/crates/brightstaff/src/router/plano_orchestrator.rs b/crates/brightstaff/src/router/plano_orchestrator.rs
index ee91d52e..cf2688b9 100644
--- a/crates/brightstaff/src/router/plano_orchestrator.rs
+++ b/crates/brightstaff/src/router/plano_orchestrator.rs
@@ -2,13 +2,12 @@ use std::{collections::HashMap, sync::Arc};
 
 use common::{
     configuration::{AgentUsagePreference, OrchestrationPreference},
-    consts::{
-        ARCH_PROVIDER_HINT_HEADER, PLANO_ORCHESTRATOR_MODEL_NAME, REQUEST_ID_HEADER,
-        TRACE_PARENT_HEADER,
-    },
+    consts::{ARCH_PROVIDER_HINT_HEADER, PLANO_ORCHESTRATOR_MODEL_NAME, REQUEST_ID_HEADER},
 };
 use hermesllm::apis::openai::{ChatCompletionsResponse, Message};
 use hyper::header;
+use opentelemetry::global;
+use opentelemetry_http::HeaderInjector;
 use thiserror::Error;
 use tracing::{debug, info, warn};
 
@@ -57,7 +56,6 @@ impl OrchestratorService {
     pub async fn determine_orchestration(
         &self,
         messages: &[Message],
-        trace_parent: Option<String>,
         usage_preferences: Option<Vec<AgentUsagePreference>>,
         request_id: Option<String>,
     ) -> Result<Option<Vec<(String, String)>>> {
@@ -75,14 +73,14 @@ impl OrchestratorService {
             .generate_request(messages, &usage_preferences);
 
         debug!(
-            "sending request to arch-orchestrator model: {}, endpoint: {}",
-            self.orchestrator_model.get_model_name(),
-            self.orchestrator_url
+            model = %self.orchestrator_model.get_model_name(),
+            endpoint = %self.orchestrator_url,
+            "sending request to arch-orchestrator"
         );
 
         debug!(
-            "arch orchestrator request body: {}",
-            &serde_json::to_string(&orchestrator_request).unwrap(),
+            body = %serde_json::to_string(&orchestrator_request).unwrap(),
+            "arch orchestrator request"
         );
 
         let mut orchestration_request_headers = header::HeaderMap::new();
@@ -96,12 +94,12 @@ impl OrchestratorService {
             header::HeaderValue::from_str(PLANO_ORCHESTRATOR_MODEL_NAME).unwrap(),
         );
 
-        if let Some(trace_parent) = trace_parent {
-            orchestration_request_headers.insert(
-                header::HeaderName::from_static(TRACE_PARENT_HEADER),
-                header::HeaderValue::from_str(&trace_parent).unwrap(),
-            );
-        }
+        // Inject OpenTelemetry trace context from current span
+        global::get_text_map_propagator(|propagator| {
+            let cx =
+                tracing_opentelemetry::OpenTelemetrySpanExt::context(&tracing::Span::current());
+            propagator.inject_context(&cx, &mut HeaderInjector(&mut orchestration_request_headers));
+        });
 
         if let Some(request_id) = request_id {
             orchestration_request_headers.insert(
@@ -131,9 +129,9 @@ impl OrchestratorService {
             Ok(response) => response,
             Err(err) => {
                 warn!(
-                    "Failed to parse JSON: {}. Body: {}",
-                    err,
-                    &serde_json::to_string(&body).unwrap()
+                    error = %err,
+                    body = %serde_json::to_string(&body).unwrap(),
+                    "failed to parse json response"
                 );
                 return Err(OrchestrationError::JsonError(
                     err,
@@ -143,7 +141,7 @@ impl OrchestratorService {
         };
 
         if chat_completion_response.choices.is_empty() {
-            warn!("No choices in orchestrator response: {}", body);
+            warn!(body = %body, "no choices in orchestrator response");
             return Ok(None);
         }
 
@@ -152,10 +150,10 @@ impl OrchestratorService {
                 .orchestrator_model
                 .parse_response(content, &usage_preferences)?;
             info!(
-                "arch-orchestrator determined routes: {}, selected_routes: {:?}, response time: {}ms",
-                content.replace("\n", "\\n"),
-                parsed_response,
-                orchestrator_response_time.as_millis()
+                content = %content.replace("\n", "\\n"),
+                selected_routes = ?parsed_response,
+                response_time_ms = orchestrator_response_time.as_millis(),
+                "arch-orchestrator determined routes"
             );
 
             if let Some(ref parsed_response) = parsed_response {
diff --git a/crates/brightstaff/src/router/router_model_v1.rs b/crates/brightstaff/src/router/router_model_v1.rs
index 796dfaac..430b4f8e 100644
--- a/crates/brightstaff/src/router/router_model_v1.rs
+++ b/crates/brightstaff/src/router/router_model_v1.rs
@@ -94,12 +94,12 @@ impl RouterModel for RouterModelV1 {
             token_count += message_token_count;
             if token_count > self.max_token_length {
                 debug!(
-                      "RouterModelV1: token count {} exceeds max token length {}, truncating conversation, selected message count {}, total message count: {}",
-                      token_count,
-                      self.max_token_length
-                      , selected_messsage_count,
-                      messages_vec.len()
-                  );
+                    token_count = token_count,
+                    max_tokens = self.max_token_length,
+                    selected = selected_messsage_count,
+                    total = messages_vec.len(),
+                    "token count exceeds max, truncating conversation"
+                );
                 if message.role == Role::User {
                     // If message that exceeds max token length is from user, we need to keep it
                     selected_messages_list_reversed.push(message);
@@ -111,9 +111,7 @@ impl RouterModel for RouterModelV1 {
         }
 
         if selected_messages_list_reversed.is_empty() {
-            debug!(
-                "RouterModelV1: no messages selected, using the last message in the conversation"
-            );
+            debug!("no messages selected, using last message");
             if let Some(last_message) = messages_vec.last() {
                 selected_messages_list_reversed.push(last_message);
             }
@@ -122,12 +120,12 @@ impl RouterModel for RouterModelV1 {
         // ensure that first and last selected message is from user
         if let Some(first_message) = selected_messages_list_reversed.first() {
             if first_message.role != Role::User {
-                warn!("RouterModelV1: last message in the conversation is not from user, this may lead to incorrect routing");
+                warn!("last message is not from user, may lead to incorrect routing");
             }
         }
         if let Some(last_message) = selected_messages_list_reversed.last() {
             if last_message.role != Role::User {
-                warn!("RouterModelV1: first message in the conversation is not from user, this may lead to incorrect routing");
+                warn!("first message is not from user, may lead to incorrect routing");
             }
         }
 
@@ -206,8 +204,9 @@ impl RouterModel for RouterModelV1 {
                 return Ok(Some((selected_route, model_name)));
             } else {
                 warn!(
-                    "No matching model found for route: {}, usage preferences: {:?}",
-                    selected_route, usage_preferences
+                    route = %selected_route,
+                    preferences = ?usage_preferences,
+                    "no matching model found for route"
                 );
                 return Ok(None);
             }
@@ -219,8 +218,9 @@ impl RouterModel for RouterModelV1 {
         }
 
         warn!(
-            "No model found for route: {}, router model preferences: {:?}",
-            selected_route, self.llm_route_to_model_map
+            route = %selected_route,
+            preferences = ?self.llm_route_to_model_map,
+            "no model found for route"
         );
 
         Ok(None)
diff --git a/crates/brightstaff/src/state/response_state_processor.rs b/crates/brightstaff/src/state/response_state_processor.rs
index 3d1e8673..0920324c 100644
--- a/crates/brightstaff/src/state/response_state_processor.rs
+++ b/crates/brightstaff/src/state/response_state_processor.rs
@@ -92,18 +92,16 @@ impl<P: StreamProcessor> ResponsesStateProcessor<P> {
                 match decoder.read_to_end(&mut decompressed) {
                     Ok(_) => {
                         debug!(
-                            "[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Successfully decompressed {} bytes to {} bytes",
-                            self.request_id,
-                            self.chunk_buffer.len(),
-                            decompressed.len()
+                            original_bytes = self.chunk_buffer.len(),
+                            decompressed_bytes = decompressed.len(),
+                            "Successfully decompressed response"
                         );
                         decompressed
                     }
                     Err(e) => {
                         warn!(
-                            "[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Failed to decompress gzip buffer: {}",
-                            self.request_id,
-                            e
+                            error = %e,
+                            "Failed to decompress gzip buffer"
                         );
                         self.chunk_buffer.clone()
                     }
@@ -111,9 +109,8 @@ impl<P: StreamProcessor> ResponsesStateProcessor<P> {
             }
             Some(encoding) => {
                 warn!(
-                    "[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Unsupported Content-Encoding: {}. Only gzip is currently supported.",
-                    self.request_id,
-                    encoding
+                    encoding = %encoding,
+                    "Unsupported Content-Encoding, only gzip is supported"
                 );
                 self.chunk_buffer.clone()
             }
@@ -143,10 +140,9 @@ impl<P: StreamProcessor> ResponsesStateProcessor<P> {
                         serde_json::from_str::<ResponsesAPIStreamEvent>(data_str)
                     {
                         info!(
-                            "[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Captured streaming response.completed: response_id={}, output_items={}",
-                            self.request_id,
-                            response.id,
-                            response.output.len()
+                            response_id = %response.id,
+                            output_items = response.output.len(),
+                            "Captured streaming response"
                         );
                         self.response_id = Some(response.id.clone());
                         self.output_items = Some(response.output.clone());
@@ -175,24 +171,20 @@ impl<P: StreamProcessor> ResponsesStateProcessor<P> {
         ) {
             Ok(response) => {
                 info!(
-                    "[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Captured non-streaming response: response_id={}, output_items={}",
-                    self.request_id,
-                    response.id,
-                    response.output.len()
+                    response_id = %response.id,
+                    output_items = response.output.len(),
+                    "Captured non-streaming response"
                 );
                 self.response_id = Some(response.id.clone());
                 self.output_items = Some(response.output.clone());
             }
             Err(e) => {
-                // Log parse error with chunk preview for debugging
                 let chunk_preview = String::from_utf8_lossy(&decompressed);
                 let preview_len = chunk_preview.len().min(200);
                 warn!(
-                    "[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Failed to parse non-streaming ResponsesAPIResponse: {}. Decompressed preview (first {} bytes): {}",
-                    self.request_id,
-                    e,
-                    preview_len,
-                    &chunk_preview[..preview_len]
+                    error = %e,
+                    preview = %&chunk_preview[..preview_len],
+                    "Failed to parse non-streaming ResponsesAPIResponse"
                 );
             }
         }
@@ -221,10 +213,7 @@ impl<P: StreamProcessor> StreamProcessor for ResponsesStateProcessor<P> {
 
         // Skip storage for OpenAI upstream
         if self.is_openai_upstream {
-            debug!(
-                "[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Skipping state storage for OpenAI upstream provider",
-                self.request_id
-            );
+            debug!("Skipping state storage for OpenAI upstream");
             return;
         }
 
@@ -234,8 +223,9 @@ impl<P: StreamProcessor> StreamProcessor for ResponsesStateProcessor<P> {
             let output_as_inputs = outputs_to_inputs(output_items);
 
             debug!(
-                "[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Converting outputs to inputs: output_items_count={}, converted_input_items_count={}",
-                self.request_id, output_items.len(), output_as_inputs.len()
+                output_items = output_items.len(),
+                converted_items = output_as_inputs.len(),
+                "Converting outputs to inputs"
             );
 
             // Combine original input + output as new input history
@@ -243,11 +233,9 @@ impl<P: StreamProcessor> StreamProcessor for ResponsesStateProcessor<P> {
             combined_input.extend(output_as_inputs);
 
             debug!(
-                "[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Storing state: original_input_count={}, combined_input_count={}, combined_json={}",
-                self.request_id,
-                self.original_input.len(),
-                combined_input.len(),
-                serde_json::to_string(&combined_input).unwrap_or_else(|_| "serialization_error".to_string())
+                original_input = self.original_input.len(),
+                combined_input = combined_input.len(),
+                "Storing conversation state"
             );
 
             let state = OpenAIConversationState {
@@ -270,28 +258,27 @@ impl<P: StreamProcessor> StreamProcessor for ResponsesStateProcessor<P> {
                 match storage.put(state).await {
                     Ok(()) => {
                         info!(
-                            "[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Successfully stored conversation state for response_id: {}, items_count={}",
-                            request_id,
-                            response_id_clone,
-                            items_count
+                            request_id = %request_id,
+                            response_id = %response_id_clone,
+                            items = items_count,
+                            "Stored conversation state"
                         );
                     }
                     Err(e) => {
                         warn!(
-                            "[PLANO_REQ_ID:{}] | STATE_PROCESSOR | Failed to store conversation state for response_id {}: {}",
-                            request_id,
-                            response_id_clone,
-                            e
+                            request_id = %request_id,
+                            response_id = %response_id_clone,
+                            error = %e,
+                            "Failed to store conversation state"
                         );
                     }
                 }
             });
         } else {
             warn!(
-                "[PLANO_REQ_ID:{}] | STATE_PROCESSOR | No response_id captured from upstream response - cannot store conversation state. response_id present: {}, output present: {}",
-                self.request_id,
-                self.response_id.is_some(),
-                self.output_items.is_some()
+                has_response_id = self.response_id.is_some(),
+                has_output = self.output_items.is_some(),
+                "No response_id captured, cannot store conversation state"
             );
         }
     }
diff --git a/crates/brightstaff/src/tracing/mod.rs b/crates/brightstaff/src/tracing/mod.rs
index 09ec6f2a..7332170c 100644
--- a/crates/brightstaff/src/tracing/mod.rs
+++ b/crates/brightstaff/src/tracing/mod.rs
@@ -1,5 +1,35 @@
 mod constants;
+mod service_name_exporter;
 
 pub use constants::{
     error, http, llm, operation_component, routing, signals, OperationNameBuilder,
 };
+pub use service_name_exporter::{ServiceNameOverrideExporter, SERVICE_NAME_OVERRIDE_KEY};
+
+use opentelemetry::trace::get_active_span;
+use opentelemetry::KeyValue;
+
+/// Sets the service name override on the current active OpenTelemetry span.
+///
+/// This function adds the `service.name.override` attribute to the active
+/// OpenTelemetry span, which allows observability backends to filter and group
+/// spans by their logical service (e.g., `plano(llm)`, `plano(filter)`).
+///
+/// # Arguments
+/// * `service_name` - The service name to use (e.g., `operation_component::LLM`)
+///
+/// # Example
+/// ```rust,ignore
+/// use brightstaff::tracing::{set_service_name, operation_component};
+///
+/// // Inside a traced function:
+/// set_service_name(operation_component::LLM);
+/// ```
+pub fn set_service_name(service_name: &str) {
+    get_active_span(|span| {
+        span.set_attribute(KeyValue::new(
+            SERVICE_NAME_OVERRIDE_KEY,
+            service_name.to_string(),
+        ));
+    });
+}
diff --git a/crates/brightstaff/src/tracing/service_name_exporter.rs b/crates/brightstaff/src/tracing/service_name_exporter.rs
new file mode 100644
index 00000000..ca0bde15
--- /dev/null
+++ b/crates/brightstaff/src/tracing/service_name_exporter.rs
@@ -0,0 +1,187 @@
+//! Service Name Override Exporter
+//!
+//! This module provides a custom SpanExporter that allows per-span service.name overrides.
+//! In OpenTelemetry, `service.name` is part of the Resource, which is tied to the TracerProvider.
+//! However, if you need different service names for different spans (e.g., `plano(orchestrator)`,
+//! `plano(filter)`, `plano(llm)`) within the same provider, this exporter handles that by:
+//!
+//! 1. Looking for a special span attribute `service.name.override`
+//! 2. Grouping spans by their effective service name
+//! 3. Exporting each group via a dedicated OTLP exporter whose Resource has the correct
+//!    `service.name`
+//!
+//! All per-service exporters are created eagerly at construction time so that no tonic
+//! channel creation happens later inside `futures_executor::block_on` (which the
+//! `BatchSpanProcessor` uses and which lacks a tokio runtime).
+//!
+//! # Usage
+//!
+//! ```rust
+//! use brightstaff::tracing::{set_service_name, operation_component};
+//!
+//! // In your instrumented code, set the service name override:
+//! set_service_name(operation_component::LLM);
+//! ```
+
+use opentelemetry::Key;
+use opentelemetry_otlp::WithExportConfig;
+use opentelemetry_sdk::error::OTelSdkResult;
+use opentelemetry_sdk::trace::{SpanData, SpanExporter};
+use opentelemetry_sdk::Resource;
+use std::collections::HashMap;
+use std::time::Duration;
+use tokio::sync::Mutex;
+
+use super::operation_component;
+
+/// The attribute key used to override the service name for a specific span.
+/// Set this as a span attribute to route the span to a different service.
+pub const SERVICE_NAME_OVERRIDE_KEY: &str = "service.name.override";
+
+/// Default service name used when no override is set on a span.
+const DEFAULT_SERVICE_NAME: &str = "plano";
+
+/// All known service names that will have dedicated exporters.
+const ALL_SERVICE_NAMES: &[&str] = &[
+    DEFAULT_SERVICE_NAME,
+    operation_component::INBOUND,
+    operation_component::ROUTING,
+    operation_component::ORCHESTRATOR,
+    operation_component::AGENT_FILTER,
+    operation_component::AGENT,
+    operation_component::LLM,
+];
+
+/// Span attribute keys to remove before export.
+const FILTERED_ATTR_KEYS: &[&str] = &[
+    "busy_ns",
+    "idle_ns",
+    "thread.id",
+    "thread.name",
+    "code.file.path",
+    "code.line.number",
+    "code.module.name",
+    "target",
+];
+
+/// A SpanExporter that supports per-span `service.name` overrides.
+///
+/// Internally it holds one OTLP exporter per known service name.  Each exporter
+/// has its own `Resource` with the correct `service.name`, so backends like
+/// Jaeger see the spans under the right service.
+pub struct ServiceNameOverrideExporter {
+    /// Map from service name → pre-created OTLP exporter (behind tokio Mutex
+    /// because `SpanExporter::export` takes `&self` and the future must be Send).
+    exporters: HashMap<String, Mutex<opentelemetry_otlp::SpanExporter>>,
+}
+
+// Manual Debug because `opentelemetry_otlp::SpanExporter` doesn't implement Debug
+impl std::fmt::Debug for ServiceNameOverrideExporter {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("ServiceNameOverrideExporter")
+            .field("services", &self.exporters.keys().collect::<Vec<_>>())
+            .finish()
+    }
+}
+
+impl ServiceNameOverrideExporter {
+    /// Create a new `ServiceNameOverrideExporter`.
+    ///
+    /// This eagerly creates one OTLP gRPC exporter per known service name so
+    /// that the tonic channel is established while a tokio runtime is available.
+    ///
+    /// # Arguments
+    /// * `endpoint` – The OTLP collector endpoint URL (e.g. `http://localhost:4317`)
+    pub fn new(endpoint: &str) -> Self {
+        let mut exporters = HashMap::new();
+
+        for &service_name in ALL_SERVICE_NAMES {
+            let resource = Resource::builder_empty()
+                .with_service_name(service_name)
+                .build();
+
+            let mut exporter = opentelemetry_otlp::SpanExporter::builder()
+                .with_tonic()
+                .with_endpoint(endpoint)
+                .build()
+                .expect("Failed to create OTLP span exporter");
+
+            exporter.set_resource(&resource);
+            exporters.insert(service_name.to_string(), Mutex::new(exporter));
+        }
+
+        Self { exporters }
+    }
+}
+
+impl SpanExporter for ServiceNameOverrideExporter {
+    fn export(
+        &self,
+        batch: Vec<SpanData>,
+    ) -> impl std::future::Future<Output = OTelSdkResult> + Send {
+        let override_key = Key::new(SERVICE_NAME_OVERRIDE_KEY);
+
+        // Group spans by their effective service name
+        let mut spans_by_service: HashMap<String, Vec<SpanData>> = HashMap::new();
+
+        let should_filter = !tracing::enabled!(tracing::Level::DEBUG);
+
+        for span in batch {
+            let mut span = span;
+
+            if should_filter {
+                span.attributes
+                    .retain(|kv| !FILTERED_ATTR_KEYS.contains(&kv.key.as_str()));
+            }
+
+            let service_name = span
+                .attributes
+                .iter()
+                .find(|kv| kv.key == override_key)
+                .map(|kv| kv.value.to_string())
+                .unwrap_or_else(|| DEFAULT_SERVICE_NAME.to_string());
+
+            spans_by_service.entry(service_name).or_default().push(span);
+        }
+
+        // Collect grouped spans into a Vec so the async block owns the data.
+        let results: Vec<(String, Vec<SpanData>)> = spans_by_service.into_iter().collect();
+        async move {
+            for (service_name, spans) in results {
+                // Look up the pre-created exporter; fall back to default if
+                // the service name isn't one of the known ones.
+                let key = if self.exporters.contains_key(&service_name) {
+                    service_name.clone()
+                } else {
+                    DEFAULT_SERVICE_NAME.to_string()
+                };
+
+                if let Some(exporter_mutex) = self.exporters.get(&key) {
+                    let exporter = exporter_mutex.lock().await;
+                    if let Err(e) = exporter.export(spans).await {
+                        tracing::warn!(
+                            service = %service_name,
+                            error = ?e,
+                            "Failed to export spans"
+                        );
+                    }
+                }
+            }
+            Ok(())
+        }
+    }
+
+    fn shutdown_with_timeout(&mut self, timeout: Duration) -> OTelSdkResult {
+        for (_, exporter_mutex) in self.exporters.iter() {
+            if let Ok(mut exporter) = exporter_mutex.try_lock() {
+                let _ = exporter.shutdown_with_timeout(timeout);
+            }
+        }
+        Ok(())
+    }
+
+    fn set_resource(&mut self, _resource: &Resource) {
+        // Each inner exporter already has its own resource set at creation time.
+        // Nothing to propagate.
+    }
+}
diff --git a/crates/brightstaff/src/utils/tracing.rs b/crates/brightstaff/src/utils/tracing.rs
index 6da4b631..21882303 100644
--- a/crates/brightstaff/src/utils/tracing.rs
+++ b/crates/brightstaff/src/utils/tracing.rs
@@ -3,12 +3,17 @@ use std::sync::OnceLock;
 
 use opentelemetry::global;
 use opentelemetry_sdk::{propagation::TraceContextPropagator, trace::SdkTracerProvider};
-use opentelemetry_stdout::SpanExporter;
 use time::macros::format_description;
 use tracing::{Event, Subscriber};
 use tracing_subscriber::fmt::{format, time::FormatTime, FmtContext, FormatEvent, FormatFields};
+use tracing_subscriber::layer::SubscriberExt;
+use tracing_subscriber::registry::LookupSpan;
+use tracing_subscriber::util::SubscriberInitExt;
 use tracing_subscriber::EnvFilter;
 
+use crate::tracing::ServiceNameOverrideExporter;
+use common::configuration::Tracing;
+
 struct BracketedTime;
 
 impl FormatTime for BracketedTime {
@@ -29,7 +34,7 @@ struct BracketedFormatter;
 
 impl<S, N> FormatEvent<S, N> for BracketedFormatter
 where
-    S: Subscriber + for<'a> tracing_subscriber::registry::LookupSpan<'a>,
+    S: Subscriber + for<'a> LookupSpan<'a>,
     N: for<'a> FormatFields<'a> + 'static,
 {
     fn format_event(
@@ -43,36 +48,116 @@ where
 
         write!(
             writer,
-            "[{}] ",
+            "[{}]",
             event.metadata().level().to_string().to_lowercase()
         )?;
 
+        // Extract request_id from span context if present
+        if let Some(scope) = ctx.event_scope() {
+            for span in scope.from_root() {
+                let extensions = span.extensions();
+                if let Some(fields) = extensions.get::<FormattedFields<N>>() {
+                    let fields_str = fields.fields.as_str();
+                    // Look for request_id in the formatted fields
+                    if let Some(start) = fields_str.find("request_id=") {
+                        let rest = &fields_str[start + 11..]; // Skip "request_id="
+                        let end = rest.find(|c: char| c.is_whitespace()).unwrap_or(rest.len());
+                        let rid = &rest[..end];
+                        write!(writer, " request_id={}", rid)?;
+                        break;
+                    }
+                }
+            }
+        }
+
+        write!(writer, " ")?;
         ctx.field_format().format_fields(writer.by_ref(), event)?;
 
         writeln!(writer)
     }
 }
 
+use tracing_subscriber::fmt::FormattedFields;
+
 static INIT_LOGGER: OnceLock<SdkTracerProvider> = OnceLock::new();
 
-pub fn init_tracer() -> &'static SdkTracerProvider {
+pub fn init_tracer(tracing_config: Option<&Tracing>) -> &'static SdkTracerProvider {
     INIT_LOGGER.get_or_init(|| {
         global::set_text_map_propagator(TraceContextPropagator::new());
-        // Install stdout exporter pipeline to be able to retrieve the collected spans.
-        // For the demonstration, use `Sampler::AlwaysOn` sampler to sample all traces.
-        let provider = SdkTracerProvider::builder()
-            .with_simple_exporter(SpanExporter::default())
-            .build();
 
-        global::set_tracer_provider(provider.clone());
+        // Get OTEL endpoint and sampling from config.yaml tracing section
+        let otel_endpoint = tracing_config.and_then(|t| t.opentracing_grpc_endpoint.clone());
 
-        tracing_subscriber::fmt()
-            .with_env_filter(
-                EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info")),
-            )
-            .event_format(BracketedFormatter)
-            .init();
+        let random_sampling = tracing_config.and_then(|t| t.random_sampling).unwrap_or(0);
 
-        provider
+        let tracing_enabled = random_sampling > 0 && otel_endpoint.is_some();
+        eprintln!(
+            "initializing tracing: tracing_enabled={}, otel_endpoint={:?}, random_sampling={}",
+            tracing_enabled, otel_endpoint, random_sampling
+        );
+
+        // Create OTLP exporter to send spans to collector
+        if tracing_enabled {
+            // Set service name via environment if not already set
+            if std::env::var("OTEL_SERVICE_NAME").is_err() {
+                std::env::set_var("OTEL_SERVICE_NAME", "plano");
+            }
+
+            // Create ServiceNameOverrideExporter to support per-span service names
+            // This allows spans to have different service names (e.g., plano(orchestrator),
+            // plano(filter), plano(llm)) by setting the "service.name.override" attribute
+            let exporter = ServiceNameOverrideExporter::new(otel_endpoint.as_ref().unwrap());
+
+            let provider = SdkTracerProvider::builder()
+                .with_batch_exporter(exporter)
+                .build();
+
+            global::set_tracer_provider(provider.clone());
+
+            // Create OpenTelemetry tracing layer using TracerProvider trait
+            use opentelemetry::trace::TracerProvider as _;
+            let telemetry_layer =
+                tracing_opentelemetry::layer().with_tracer(provider.tracer("brightstaff"));
+
+            // Combine the OpenTelemetry layer with fmt layer using the registry
+            let env_filter =
+                EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"));
+
+            // Create fmt layer with span field formatting enabled (no ANSI to keep fields parseable)
+            let fmt_layer = tracing_subscriber::fmt::layer()
+                .event_format(BracketedFormatter)
+                .fmt_fields(format::DefaultFields::new())
+                .with_ansi(false);
+
+            let subscriber = tracing_subscriber::registry()
+                .with(telemetry_layer)
+                .with(env_filter)
+                .with(fmt_layer);
+
+            tracing::subscriber::set_global_default(subscriber)
+                .expect("Failed to set tracing subscriber");
+
+            provider
+        } else {
+            // Tracing disabled - use no-op provider
+            let provider = SdkTracerProvider::builder().build();
+            global::set_tracer_provider(provider.clone());
+
+            let env_filter =
+                EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"));
+
+            // Create fmt layer with span field formatting enabled (no ANSI to keep fields parseable)
+            let fmt_layer = tracing_subscriber::fmt::layer()
+                .event_format(BracketedFormatter)
+                .fmt_fields(format::DefaultFields::new())
+                .with_ansi(false);
+
+            tracing_subscriber::registry()
+                .with(env_filter)
+                .with(fmt_layer)
+                .init();
+
+            provider
+        }
     })
 }
diff --git a/crates/common/Cargo.toml b/crates/common/Cargo.toml
index bcff9b32..cb471bd6 100644
--- a/crates/common/Cargo.toml
+++ b/crates/common/Cargo.toml
@@ -21,14 +21,8 @@ url = "2.5.4"
 hermesllm = { version = "0.1.0", path = "../hermesllm" }
 serde_with = "3.13.0"
 
-# Optional dependencies for trace collection (not available in WASM)
-tokio = { version = "1.44", features = ["sync", "time"], optional = true }
-reqwest = { version = "0.12", features = ["json"], optional = true }
-tracing = { version = "0.1", optional = true }
-
 [features]
 default = []
-trace-collection = ["tokio", "reqwest", "tracing"]
 
 [dev-dependencies]
 pretty_assertions = "1.4.1"
diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs
index c600ed5d..ccca89c3 100644
--- a/crates/common/src/configuration.rs
+++ b/crates/common/src/configuration.rs
@@ -90,6 +90,8 @@ pub struct Overrides {
 pub struct Tracing {
     pub sampling_rate: Option<f64>,
     pub trace_arch_internal: Option<bool>,
+    pub random_sampling: Option<u32>,
+    pub opentracing_grpc_endpoint: Option<String>,
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash, Default)]
diff --git a/crates/common/src/consts.rs b/crates/common/src/consts.rs
index 23260747..cafc8e80 100644
--- a/crates/common/src/consts.rs
+++ b/crates/common/src/consts.rs
@@ -30,7 +30,6 @@ pub const ARCH_MODEL_PREFIX: &str = "Arch";
 pub const HALLUCINATION_TEMPLATE: &str =
     "It seems I'm missing some information. Could you provide the following details ";
 pub const OTEL_COLLECTOR_HTTP: &str = "opentelemetry_collector_http";
-pub const OTEL_POST_PATH: &str = "/v1/traces";
 pub const LLM_ROUTE_HEADER: &str = "x-arch-llm-route";
 pub const ENVOY_RETRY_HEADER: &str = "x-envoy-max-retries";
 pub const BRIGHT_STAFF_SERVICE_NAME: &str = "brightstaff";
diff --git a/crates/common/src/traces/collector.rs b/crates/common/src/traces/collector.rs
deleted file mode 100644
index e26f544e..00000000
--- a/crates/common/src/traces/collector.rs
+++ /dev/null
@@ -1,292 +0,0 @@
-use super::resource_span_builder::ResourceSpanBuilder;
-use super::shapes::Span;
-use std::collections::{HashMap, VecDeque};
-use std::sync::Arc;
-use tokio::sync::Mutex;
-use tokio::time::{interval, Duration};
-use tracing::{debug, error, warn};
-
-/// Parse W3C traceparent header into trace_id and parent_span_id
-/// Format: "00-{trace_id}-{parent_span_id}-01"
-///
-/// Returns (trace_id, Option<parent_span_id>)
-/// - parent_span_id is None if it's all zeros (0000000000000000), indicating a root span
-pub fn parse_traceparent(traceparent: &str) -> (String, Option<String>) {
-    let parts: Vec<&str> = traceparent.split('-').collect();
-    if parts.len() == 4 {
-        let trace_id = parts[1].to_string();
-        let parent_span_id = parts[2].to_string();
-
-        // If parent_span_id is all zeros, this is a root span with no parent
-        let parent = if parent_span_id == "0000000000000000" {
-            None
-        } else {
-            Some(parent_span_id)
-        };
-
-        (trace_id, parent)
-    } else {
-        warn!("Invalid traceparent format: {}", traceparent);
-        // Return empty trace ID and None for parent if parsing fails
-        (String::new(), None)
-    }
-}
-
-/// Collects and batches spans, flushing them to an OTEL collector
-///
-/// Supports multiple services, with each service (e.g., "archgw(routing)", "archgw(llm)")
-/// maintaining its own span queue. Flushes all services together periodically.
-///
-/// Tracing can be enabled/disabled in two ways:
-/// 1. Via arch_config.yaml: presence of `tracing` configuration section
-/// 2. Via environment variable: `OTEL_TRACING_ENABLED=true/false`
-///
-/// When disabled, span recording and flushing are no-ops.
-pub struct TraceCollector {
-    /// Spans grouped by service name
-    /// Key: service name (e.g., "archgw(routing)", "archgw(llm)")
-    /// Value: queue of spans for that service
-    spans_by_service: Arc<Mutex<HashMap<String, VecDeque<Span>>>>,
-    flush_interval: Duration,
-    otel_url: String,
-    /// Whether tracing is enabled
-    enabled: bool,
-}
-
-impl TraceCollector {
-    /// Create a new trace collector
-    ///
-    /// # Arguments
-    /// * `enabled` - Whether tracing is enabled
-    ///   - `Some(true)` - Force enable tracing
-    ///   - `Some(false)` - Force disable tracing
-    ///   - `None` - Check `OTEL_TRACING_ENABLED` env var (defaults to true if not set)
-    ///
-    /// Other parameters are read from environment variables:
-    /// - `TRACE_FLUSH_INTERVAL_MS` - Flush interval in milliseconds (default: 1000)
-    /// - `OTEL_COLLECTOR_URL` - OTEL collector endpoint (default: http://localhost:9903/v1/traces)
-    pub fn new(enabled: Option<bool>) -> Self {
-        let flush_interval_ms = std::env::var("TRACE_FLUSH_INTERVAL_MS")
-            .ok()
-            .and_then(|s| s.parse().ok())
-            .unwrap_or(1000);
-
-        let otel_url = std::env::var("OTEL_COLLECTOR_URL")
-            .unwrap_or_else(|_| "http://localhost:9903/v1/traces".to_string());
-
-        // Determine if tracing is enabled:
-        // 1. Use explicit parameter if provided
-        // 2. Otherwise check OTEL_TRACING_ENABLED env var
-        // 3. Default to false if neither is set (tracing opt-in, not opt-out)
-        let enabled = enabled.unwrap_or_else(|| {
-            std::env::var("OTEL_TRACING_ENABLED")
-                .ok()
-                .and_then(|s| s.parse().ok())
-                .unwrap_or(false)
-        });
-
-        debug!(
-            "TraceCollector initialized: flush_interval={}ms, url={}, enabled={}",
-            flush_interval_ms, otel_url, enabled
-        );
-
-        Self {
-            spans_by_service: Arc::new(Mutex::new(HashMap::new())),
-            flush_interval: Duration::from_millis(flush_interval_ms),
-            otel_url,
-            enabled,
-        }
-    }
-
-    /// Record a span for a specific service
-    ///
-    /// # Arguments
-    /// * `service_name` - Name of the service (e.g., "archgw(routing)", "archgw(llm)")
-    /// * `span` - The span to record
-    pub fn record_span(&self, service_name: impl Into<String>, span: Span) {
-        // Skip recording if tracing is disabled
-        if !self.enabled {
-            return;
-        }
-
-        let service_name = service_name.into();
-
-        // Use try_lock to avoid blocking in async contexts
-        // If the lock is held, we skip recording (telemetry shouldn't block the app)
-        if let Ok(mut spans_by_service) = self.spans_by_service.try_lock() {
-            // Get or create the queue for this service
-            let spans = spans_by_service
-                .entry(service_name)
-                .or_insert_with(VecDeque::new);
-
-            spans.push_back(span);
-        } else {
-            // Lock contention - skip recording this span
-            debug!("Skipped span recording due to lock contention");
-        }
-        // Flushing is handled by the periodic background flusher (see `start_background_flusher`).
-    }
-
-    /// Flush all buffered spans to the OTEL collector
-    /// Builds ResourceSpans for each service with spans
-    pub async fn flush(&self) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
-        // Skip flushing if tracing is disabled
-        if !self.enabled {
-            return Ok(());
-        }
-
-        let mut spans_by_service = self.spans_by_service.lock().await;
-
-        if spans_by_service.is_empty() {
-            return Ok(());
-        }
-
-        // Snapshot and drain all services' spans
-        let service_batches: Vec<(String, Vec<Span>)> = spans_by_service
-            .iter_mut()
-            .filter_map(|(service_name, spans)| {
-                if spans.is_empty() {
-                    None
-                } else {
-                    Some((service_name.clone(), spans.drain(..).collect()))
-                }
-            })
-            .collect();
-
-        drop(spans_by_service); // Release lock before HTTP call
-
-        if service_batches.is_empty() {
-            return Ok(());
-        }
-
-        let total_spans: usize = service_batches.iter().map(|(_, spans)| spans.len()).sum();
-        debug!(
-            "Flushing {} spans across {} services to OTEL collector",
-            total_spans,
-            service_batches.len()
-        );
-
-        // Build canonical OTEL payload structure - one ResourceSpan per service
-        let resource_spans = self.build_resource_spans(service_batches);
-
-        match self.send_to_otel(resource_spans).await {
-            Ok(_) => {
-                debug!("Successfully flushed {} spans", total_spans);
-                Ok(())
-            }
-            Err(e) => {
-                warn!("Failed to send spans to OTEL collector: {:?}", e);
-                Err(e)
-            }
-        }
-    }
-
-    /// Build OTEL-compliant resource spans from collected spans, one ResourceSpan per service
-    fn build_resource_spans(
-        &self,
-        service_batches: Vec<(String, Vec<Span>)>,
-    ) -> Vec<super::shapes::ResourceSpan> {
-        service_batches
-            .into_iter()
-            .map(|(service_name, spans)| {
-                ResourceSpanBuilder::new(&service_name)
-                    .add_spans(spans)
-                    .build()
-            })
-            .collect()
-    }
-
-    /// Send resource spans to OTEL collector
-    /// Serializes as {"resourceSpans": [...]} per OTEL spec
-    async fn send_to_otel(
-        &self,
-        resource_spans: Vec<super::shapes::ResourceSpan>,
-    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
-        let client = reqwest::Client::new();
-
-        // Create OTEL payload with proper structure
-        let payload = serde_json::json!({
-            "resourceSpans": resource_spans
-        });
-
-        let response = client
-            .post(&self.otel_url)
-            .header("Content-Type", "application/json")
-            .json(&payload)
-            .timeout(Duration::from_secs(5))
-            .send()
-            .await?;
-
-        if !response.status().is_success() {
-            warn!(
-                "OTEL collector returned non-success status: {}",
-                response.status()
-            );
-            return Err(format!("OTEL collector error: {}", response.status()).into());
-        }
-
-        Ok(())
-    }
-
-    /// Start a background task that periodically flushes traces
-    /// Returns a join handle that can be used to stop the flusher
-    pub fn start_background_flusher(self: Arc<Self>) -> tokio::task::JoinHandle<()> {
-        let flush_interval = self.flush_interval;
-
-        tokio::spawn(async move {
-            let mut ticker = interval(flush_interval);
-
-            loop {
-                ticker.tick().await;
-
-                if let Err(e) = self.flush().await {
-                    error!("Background trace flush failed: {:?}", e);
-                }
-            }
-        })
-    }
-
-    /// Get current number of buffered spans across all services (for testing/monitoring)
-    pub async fn buffered_count(&self) -> usize {
-        self.spans_by_service
-            .lock()
-            .await
-            .values()
-            .map(|spans| spans.len())
-            .sum()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::traces::SpanBuilder;
-
-    #[tokio::test]
-    async fn test_collector_basic() {
-        let collector = TraceCollector::new(Some(true));
-
-        let span = SpanBuilder::new("test_operation")
-            .with_trace_id("abc123")
-            .build();
-
-        collector.record_span("test-service", span);
-
-        assert_eq!(collector.buffered_count().await, 1);
-    }
-
-    #[tokio::test]
-    async fn test_collector_auto_flush() {
-        // Since batch-triggered flush behavior was removed, record two spans and verify both are buffered
-        let collector = Arc::new(TraceCollector::new(Some(true)));
-
-        let span1 = SpanBuilder::new("test1").build();
-        let span2 = SpanBuilder::new("test2").build();
-
-        collector.record_span("test-service", span1);
-        collector.record_span("test-service", span2);
-
-        // With no batch-triggered flush, both spans should remain buffered
-        assert_eq!(collector.buffered_count().await, 2);
-    }
-}
diff --git a/crates/common/src/traces/mod.rs b/crates/common/src/traces/mod.rs
index 6181f194..e9177073 100644
--- a/crates/common/src/traces/mod.rs
+++ b/crates/common/src/traces/mod.rs
@@ -5,12 +5,6 @@ mod constants;
 mod resource_span_builder;
 mod span_builder;
 
-#[cfg(feature = "trace-collection")]
-mod collector;
-
-#[cfg(all(test, feature = "trace-collection"))]
-mod tests;
-
 // Re-export original types
 pub use shapes::{
     Attribute, AttributeValue, Event, Resource, ResourceSpan, Scope, ScopeSpan, Span, Traceparent,
@@ -21,6 +15,3 @@ pub use shapes::{
 pub use constants::*;
 pub use resource_span_builder::ResourceSpanBuilder;
 pub use span_builder::{generate_random_span_id, SpanBuilder, SpanKind};
-
-#[cfg(feature = "trace-collection")]
-pub use collector::{parse_traceparent, TraceCollector};
diff --git a/crates/common/src/traces/tests/mock_otel_collector.rs b/crates/common/src/traces/tests/mock_otel_collector.rs
deleted file mode 100644
index 8c8e770d..00000000
--- a/crates/common/src/traces/tests/mock_otel_collector.rs
+++ /dev/null
@@ -1,96 +0,0 @@
-//! Mock OTEL Collector for testing trace output
-//!
-//! This module provides a simple HTTP server that mimics an OTEL collector.
-//! It exposes three endpoints:
-//! - POST /v1/traces: Capture incoming OTLP JSON payloads
-//! - GET /v1/traces: Return all captured payloads as JSON array
-//! - DELETE /v1/traces: Clear all captured payloads
-//!
-//! Each test creates its own MockOtelCollector instance.
-
-use axum::{
-    extract::State,
-    http::StatusCode,
-    routing::{delete, get, post},
-    Json, Router,
-};
-use serde_json::Value;
-use std::sync::Arc;
-use tokio::sync::RwLock;
-
-type SharedTraces = Arc<RwLock<Vec<Value>>>;
-
-/// POST /v1/traces - capture incoming OTLP payload
-async fn post_traces(State(traces): State<SharedTraces>, Json(payload): Json<Value>) -> StatusCode {
-    traces.write().await.push(payload);
-    StatusCode::OK
-}
-
-/// GET /v1/traces - return all captured payloads
-async fn get_traces(State(traces): State<SharedTraces>) -> Json<Vec<Value>> {
-    Json(traces.read().await.clone())
-}
-
-/// DELETE /v1/traces - clear all captured payloads
-async fn delete_traces(State(traces): State<SharedTraces>) -> StatusCode {
-    traces.write().await.clear();
-    StatusCode::NO_CONTENT
-}
-
-/// Mock OTEL collector server
-pub struct MockOtelCollector {
-    address: String,
-    client: reqwest::Client,
-    #[allow(dead_code)]
-    server_handle: tokio::task::JoinHandle<()>,
-}
-
-impl MockOtelCollector {
-    /// Create and start a new mock collector on a random port
-    pub async fn start() -> Self {
-        let traces = Arc::new(RwLock::new(Vec::new()));
-
-        let app = Router::new()
-            .route("/v1/traces", post(post_traces))
-            .route("/v1/traces", get(get_traces))
-            .route("/v1/traces", delete(delete_traces))
-            .with_state(traces.clone());
-
-        let listener = tokio::net::TcpListener::bind("127.0.0.1:0")
-            .await
-            .expect("Failed to bind to random port");
-
-        let addr = listener.local_addr().expect("Failed to get local address");
-        let address = format!("http://127.0.0.1:{}", addr.port());
-
-        let server_handle = tokio::spawn(async move {
-            axum::serve(listener, app).await.expect("Server failed");
-        });
-
-        // Give server a moment to start
-        tokio::time::sleep(tokio::time::Duration::from_millis(50)).await;
-
-        Self {
-            address,
-            client: reqwest::Client::new(),
-            server_handle,
-        }
-    }
-
-    /// Get the address of the collector
-    pub fn address(&self) -> &str {
-        &self.address
-    }
-
-    /// GET /v1/traces - fetch all captured payloads
-    pub async fn get_traces(&self) -> Vec<Value> {
-        self.client
-            .get(format!("{}/v1/traces", self.address))
-            .send()
-            .await
-            .expect("Failed to GET traces")
-            .json()
-            .await
-            .expect("Failed to parse traces JSON")
-    }
-}
diff --git a/crates/common/src/traces/tests/mod.rs b/crates/common/src/traces/tests/mod.rs
deleted file mode 100644
index 7bba42f8..00000000
--- a/crates/common/src/traces/tests/mod.rs
+++ /dev/null
@@ -1,4 +0,0 @@
-mod mock_otel_collector;
-mod trace_integration_test;
-
-pub use mock_otel_collector::MockOtelCollector;
diff --git a/crates/common/src/traces/tests/trace_integration_test.rs b/crates/common/src/traces/tests/trace_integration_test.rs
deleted file mode 100644
index 5f41a2c3..00000000
--- a/crates/common/src/traces/tests/trace_integration_test.rs
+++ /dev/null
@@ -1,342 +0,0 @@
-//! Integration tests for OpenTelemetry tracing in router.rs
-//!
-//! These tests validate that the spans created for LLM requests contain
-//! all expected attributes and events by checking the raw JSON payloads
-//! sent to the mock OTEL collector.
-//!
-//! ## Test Design
-//! Each test creates its own MockOtelCollector and TraceCollector:
-//! 1. Start MockOtelCollector on random port
-//! 2. Create TraceCollector with 500ms flush interval
-//! 3. Record spans using TraceCollector
-//! 4. Flush and wait (500ms + 200ms buffer = 700ms total) for spans to arrive
-//! 5. Get raw JSON payloads (GET /v1/traces) and validate structure
-//! 6. Test cleanup happens automatically when collectors are dropped
-//!
-//! ## Serial Execution
-//! Tests use the `#[serial]` attribute to run sequentially because they
-//! use global environment variables (OTEL_COLLECTOR_URL, OTEL_TRACING_ENABLED,
-//! TRACE_FLUSH_INTERVAL_MS). This ensures test isolation without requiring
-//! the `--test-threads=1` command line flag.
-
-const FLUSH_INTERVAL_MS: u64 = 50;
-const FLUSH_BUFFER_MS: u64 = 50;
-const TOTAL_WAIT_MS: u64 = FLUSH_INTERVAL_MS + FLUSH_BUFFER_MS;
-
-use crate::traces::{SpanBuilder, SpanKind, TraceCollector};
-use serde_json::Value;
-use serial_test::serial;
-use std::sync::Arc;
-
-use super::MockOtelCollector;
-
-/// Helper to extract all spans from OTLP JSON payloads
-fn extract_spans(payloads: &[Value]) -> Vec<&Value> {
-    let mut spans = Vec::new();
-    for payload in payloads {
-        if let Some(resource_spans) = payload.get("resourceSpans").and_then(|v| v.as_array()) {
-            for resource_span in resource_spans {
-                if let Some(scope_spans) =
-                    resource_span.get("scopeSpans").and_then(|v| v.as_array())
-                {
-                    for scope_span in scope_spans {
-                        if let Some(span_list) = scope_span.get("spans").and_then(|v| v.as_array())
-                        {
-                            spans.extend(span_list.iter());
-                        }
-                    }
-                }
-            }
-        }
-    }
-    spans
-}
-
-/// Helper to get string attribute value from a span
-fn get_string_attr<'a>(span: &'a Value, key: &str) -> Option<&'a str> {
-    span.get("attributes")
-        .and_then(|attrs| attrs.as_array())
-        .and_then(|attrs| {
-            attrs
-                .iter()
-                .find(|attr| attr.get("key").and_then(|k| k.as_str()) == Some(key))
-        })
-        .and_then(|attr| attr.get("value"))
-        .and_then(|v| v.get("stringValue"))
-        .and_then(|v| v.as_str())
-}
-
-#[tokio::test]
-#[serial]
-async fn test_llm_span_contains_basic_attributes() {
-    // Start mock OTEL collector
-    let mock_collector = MockOtelCollector::start().await;
-
-    // Create TraceCollector pointing to mock with 500ms flush intervalc
-    std::env::set_var(
-        "OTEL_COLLECTOR_URL",
-        format!("{}/v1/traces", mock_collector.address()),
-    );
-    std::env::set_var("OTEL_TRACING_ENABLED", "true");
-    std::env::set_var("TRACE_FLUSH_INTERVAL_MS", "500");
-    let trace_collector = Arc::new(TraceCollector::new(Some(true)));
-
-    // Create a test span simulating router.rs behavior
-    let span = SpanBuilder::new("POST /v1/chat/completions >> /v1/chat/completions")
-        .with_kind(SpanKind::Client)
-        .with_trace_id("test-trace-123")
-        .with_attribute("http.method", "POST")
-        .with_attribute("http.target", "/v1/chat/completions")
-        .with_attribute("http.upstream_target", "/v1/chat/completions")
-        .with_attribute("llm.model", "gpt-4o")
-        .with_attribute("llm.provider", "openai")
-        .with_attribute("llm.is_streaming", "true")
-        .with_attribute("llm.temperature", "0.7")
-        .build();
-
-    trace_collector.record_span("archgw(llm)", span);
-
-    // Flush and wait for spans to arrive (500ms flush interval + 200ms buffer)
-    trace_collector.flush().await.expect("Failed to flush");
-    tokio::time::sleep(tokio::time::Duration::from_millis(TOTAL_WAIT_MS)).await;
-
-    let payloads = mock_collector.get_traces().await;
-    let spans = extract_spans(&payloads);
-
-    assert_eq!(spans.len(), 1, "Expected exactly one span");
-
-    let span = spans[0];
-    // Validate HTTP attributes
-    assert_eq!(get_string_attr(span, "http.method"), Some("POST"));
-    assert_eq!(
-        get_string_attr(span, "http.target"),
-        Some("/v1/chat/completions")
-    );
-
-    // Validate LLM attributes
-    assert_eq!(get_string_attr(span, "llm.model"), Some("gpt-4o"));
-    assert_eq!(get_string_attr(span, "llm.provider"), Some("openai"));
-    assert_eq!(get_string_attr(span, "llm.is_streaming"), Some("true"));
-    assert_eq!(get_string_attr(span, "llm.temperature"), Some("0.7"));
-}
-
-#[tokio::test]
-#[serial]
-async fn test_llm_span_contains_tool_information() {
-    let mock_collector = MockOtelCollector::start().await;
-    std::env::set_var(
-        "OTEL_COLLECTOR_URL",
-        format!("{}/v1/traces", mock_collector.address()),
-    );
-    std::env::set_var("OTEL_TRACING_ENABLED", "true");
-    std::env::set_var("TRACE_FLUSH_INTERVAL_MS", "500");
-    let trace_collector = Arc::new(TraceCollector::new(Some(true)));
-
-    let tools_formatted = "get_weather(...)\nsearch_web(...)\ncalculate(...)";
-
-    let span = SpanBuilder::new("POST /v1/chat/completions")
-        .with_trace_id("test-trace-tools")
-        .with_attribute("llm.request.tools", tools_formatted)
-        .with_attribute("llm.model", "gpt-4o")
-        .build();
-
-    trace_collector.record_span("archgw(llm)", span);
-    trace_collector.flush().await.expect("Failed to flush");
-    tokio::time::sleep(tokio::time::Duration::from_millis(TOTAL_WAIT_MS)).await;
-
-    let payloads = mock_collector.get_traces().await;
-    let spans = extract_spans(&payloads);
-
-    assert!(!spans.is_empty(), "No spans captured");
-
-    let span = spans[0];
-    let tools = get_string_attr(span, "llm.request.tools");
-
-    assert!(tools.is_some(), "Tools attribute missing");
-    assert!(tools.unwrap().contains("get_weather(...)"));
-    assert!(tools.unwrap().contains("search_web(...)"));
-    assert!(tools.unwrap().contains("calculate(...)"));
-    assert!(
-        tools.unwrap().contains('\n'),
-        "Tools should be newline-separated"
-    );
-}
-
-#[tokio::test]
-#[serial]
-async fn test_llm_span_contains_user_message_preview() {
-    let mock_collector = MockOtelCollector::start().await;
-    std::env::set_var(
-        "OTEL_COLLECTOR_URL",
-        format!("{}/v1/traces", mock_collector.address()),
-    );
-    std::env::set_var("OTEL_TRACING_ENABLED", "true");
-    std::env::set_var("TRACE_FLUSH_INTERVAL_MS", "500");
-    let trace_collector = Arc::new(TraceCollector::new(Some(true)));
-
-    let long_message =
-        "This is a very long user message that should be truncated to 50 characters in the span";
-    let preview = if long_message.len() > 50 {
-        format!("{}...", &long_message[..50])
-    } else {
-        long_message.to_string()
-    };
-
-    let span = SpanBuilder::new("POST /v1/messages")
-        .with_trace_id("test-trace-preview")
-        .with_attribute("llm.request.user_message_preview", &preview)
-        .build();
-
-    trace_collector.record_span("archgw(llm)", span);
-    trace_collector.flush().await.expect("Failed to flush");
-    tokio::time::sleep(tokio::time::Duration::from_millis(TOTAL_WAIT_MS)).await;
-
-    let payloads = mock_collector.get_traces().await;
-    let spans = extract_spans(&payloads);
-    let span = spans[0];
-
-    let message_preview = get_string_attr(span, "llm.request.user_message_preview");
-
-    assert!(message_preview.is_some());
-    assert!(message_preview.unwrap().len() <= 53); // 50 chars + "..."
-    assert!(message_preview.unwrap().contains("..."));
-}
-
-#[tokio::test]
-#[serial]
-async fn test_llm_span_contains_time_to_first_token() {
-    let mock_collector = MockOtelCollector::start().await;
-    std::env::set_var(
-        "OTEL_COLLECTOR_URL",
-        format!("{}/v1/traces", mock_collector.address()),
-    );
-    std::env::set_var("OTEL_TRACING_ENABLED", "true");
-    std::env::set_var("TRACE_FLUSH_INTERVAL_MS", "500");
-    let trace_collector = Arc::new(TraceCollector::new(Some(true)));
-
-    let ttft_ms = "245"; // milliseconds as string
-
-    let span = SpanBuilder::new("POST /v1/chat/completions")
-        .with_trace_id("test-trace-ttft")
-        .with_attribute("llm.is_streaming", "true")
-        .with_attribute("llm.time_to_first_token_ms", ttft_ms)
-        .build();
-
-    trace_collector.record_span("archgw(llm)", span);
-    trace_collector.flush().await.expect("Failed to flush");
-    tokio::time::sleep(tokio::time::Duration::from_millis(TOTAL_WAIT_MS)).await;
-
-    let payloads = mock_collector.get_traces().await;
-    let spans = extract_spans(&payloads);
-    let span = spans[0];
-
-    // Check TTFT attribute
-    let ttft_attr = get_string_attr(span, "llm.time_to_first_token_ms");
-    assert_eq!(ttft_attr, Some("245"));
-}
-
-#[tokio::test]
-#[serial]
-async fn test_llm_span_contains_upstream_path() {
-    let mock_collector = MockOtelCollector::start().await;
-    std::env::set_var(
-        "OTEL_COLLECTOR_URL",
-        format!("{}/v1/traces", mock_collector.address()),
-    );
-    std::env::set_var("OTEL_TRACING_ENABLED", "true");
-    std::env::set_var("TRACE_FLUSH_INTERVAL_MS", "500");
-    let trace_collector = Arc::new(TraceCollector::new(Some(true)));
-
-    // Test Zhipu provider with path transformation
-    let span = SpanBuilder::new("POST /v1/chat/completions >> /api/paas/v4/chat/completions")
-        .with_trace_id("test-trace-upstream")
-        .with_attribute("http.upstream_target", "/api/paas/v4/chat/completions")
-        .with_attribute("llm.provider", "zhipu")
-        .with_attribute("llm.model", "glm-4")
-        .build();
-
-    trace_collector.record_span("archgw(llm)", span);
-    trace_collector.flush().await.expect("Failed to flush");
-    tokio::time::sleep(tokio::time::Duration::from_millis(TOTAL_WAIT_MS)).await;
-
-    let payloads = mock_collector.get_traces().await;
-    let spans = extract_spans(&payloads);
-    let span = spans[0];
-
-    // Operation name should show the transformation
-    let name = span.get("name").and_then(|v| v.as_str());
-    assert!(name.is_some());
-    assert!(
-        name.unwrap().contains(">>"),
-        "Operation name should show path transformation"
-    );
-
-    // Check upstream target attribute
-    let upstream = get_string_attr(span, "http.upstream_target");
-    assert_eq!(upstream, Some("/api/paas/v4/chat/completions"));
-}
-
-#[tokio::test]
-#[serial]
-async fn test_llm_span_multiple_services() {
-    let mock_collector = MockOtelCollector::start().await;
-    std::env::set_var(
-        "OTEL_COLLECTOR_URL",
-        format!("{}/v1/traces", mock_collector.address()),
-    );
-    std::env::set_var("OTEL_TRACING_ENABLED", "true");
-    std::env::set_var("TRACE_FLUSH_INTERVAL_MS", "500");
-    let trace_collector = Arc::new(TraceCollector::new(Some(true)));
-
-    // Create spans for different services
-    let llm_span = SpanBuilder::new("LLM Request")
-        .with_trace_id("test-multi")
-        .with_attribute("service", "llm")
-        .build();
-
-    let routing_span = SpanBuilder::new("Routing Decision")
-        .with_trace_id("test-multi")
-        .with_attribute("service", "routing")
-        .build();
-
-    trace_collector.record_span("archgw(llm)", llm_span);
-    trace_collector.record_span("archgw(routing)", routing_span);
-    trace_collector.flush().await.expect("Failed to flush");
-    tokio::time::sleep(tokio::time::Duration::from_millis(TOTAL_WAIT_MS)).await;
-
-    let payloads = mock_collector.get_traces().await;
-    let all_spans = extract_spans(&payloads);
-
-    assert_eq!(all_spans.len(), 2, "Should have captured both spans");
-}
-
-#[tokio::test]
-#[serial]
-async fn test_tracing_disabled_produces_no_spans() {
-    let mock_collector = MockOtelCollector::start().await;
-
-    // Create TraceCollector with tracing DISABLED
-    std::env::set_var(
-        "OTEL_COLLECTOR_URL",
-        format!("{}/v1/traces", mock_collector.address()),
-    );
-    std::env::set_var("OTEL_TRACING_ENABLED", "false");
-    std::env::set_var("TRACE_FLUSH_INTERVAL_MS", "500");
-    let trace_collector = Arc::new(TraceCollector::new(Some(false)));
-
-    let span = SpanBuilder::new("Test Span")
-        .with_trace_id("test-disabled")
-        .build();
-
-    trace_collector.record_span("archgw(llm)", span);
-    trace_collector.flush().await.ok(); // Should be no-op when disabled
-    tokio::time::sleep(tokio::time::Duration::from_millis(TOTAL_WAIT_MS)).await;
-
-    let payloads = mock_collector.get_traces().await;
-    let all_spans = extract_spans(&payloads);
-    assert_eq!(
-        all_spans.len(),
-        0,
-        "No spans should be captured when tracing is disabled"
-    );
-}
diff --git a/crates/llm_gateway/src/filter_context.rs b/crates/llm_gateway/src/filter_context.rs
index 3a1f7b84..4bcd9955 100644
--- a/crates/llm_gateway/src/filter_context.rs
+++ b/crates/llm_gateway/src/filter_context.rs
@@ -74,7 +74,7 @@ impl RootContext for FilterContext {
 
     fn create_http_context(&self, context_id: u32) -> Option<Box<dyn HttpContext>> {
         trace!(
-            "||| create_http_context called with context_id: {:?} |||",
+            "create_http_context called with context_id: {:?}",
             context_id
         );
 
@@ -107,10 +107,7 @@ impl Context for FilterContext {
         _body_size: usize,
         _num_trailers: usize,
     ) {
-        trace!(
-            "||| on_http_call_response called with token_id: {:?} |||",
-            token_id
-        );
+        trace!("on_http_call_response called with token_id: {:?}", token_id);
 
         let _callout_data = self
             .callouts
@@ -119,7 +116,7 @@ impl Context for FilterContext {
             .expect("invalid token_id");
 
         if let Some(status) = self.get_http_call_response_header(":status") {
-            trace!("trace response status: {:?}", status);
+            trace!("response status: {:?}", status);
         };
     }
 }
diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs
index 0543d3d1..d3116233 100644
--- a/crates/llm_gateway/src/stream_context.rs
+++ b/crates/llm_gateway/src/stream_context.rs
@@ -97,7 +97,7 @@ impl StreamContext {
             .as_ref()
             .filter(|id| !id.is_empty())
             .cloned()
-            .unwrap_or_else(|| "NO_REQUEST_ID".to_string())
+            .unwrap_or_else(|| "no_request_id".to_string())
     }
     fn llm_provider(&self) -> &LlmProvider {
         self.llm_provider
@@ -145,14 +145,14 @@ impl StreamContext {
                 match self.llm_providers.default() {
                     Some(default_provider) => {
                         info!(
-                            "[PLANO_REQ_ID:{}] Provider selection failed, using default provider",
+                            "request_id={}: provider selection failed, using default provider",
                             self.request_identifier()
                         );
                         default_provider
                     }
                     None => {
                         error!(
-                            "[PLANO_REQ_ID:{}] PROVIDER_SELECTION_FAILED: Error='{}' and no default provider configured",
+                            "request_id={}: provider selection failed, error='{}' and no default provider configured",
                             self.request_identifier(),
                             err
                         );
@@ -165,7 +165,7 @@ impl StreamContext {
         self.llm_provider = Some(provider);
 
         info!(
-            "[PLANO_REQ_ID:{}] PROVIDER_SELECTION: Hint='{}' -> Selected='{}'",
+            "request_id={}: provider selected, hint='{}' selected='{}'",
             self.request_identifier(),
             self.get_http_request_header(ARCH_PROVIDER_HINT_HEADER)
                 .unwrap_or("none".to_string()),
@@ -180,12 +180,12 @@ impl StreamContext {
             // Check if client provided an Authorization header
             if self.get_http_request_header("Authorization").is_none() {
                 warn!(
-                    "[PLANO_REQ_ID:{}] AUTH_PASSTHROUGH: passthrough_auth enabled but no Authorization header present in client request",
+                    "request_id={}: passthrough_auth enabled but no authorization header present in client request",
                     self.request_identifier()
                 );
             } else {
                 debug!(
-                    "[PLANO_REQ_ID:{}] AUTH_PASSTHROUGH: preserving client Authorization header for provider '{}'",
+                    "request_id={}: preserving client authorization header for provider '{}'",
                     self.request_identifier(),
                     self.llm_provider().name
                 );
@@ -249,7 +249,11 @@ impl StreamContext {
     }
 
     fn send_server_error(&self, error: ServerError, override_status_code: Option<StatusCode>) {
-        warn!("server error occurred: {}", error);
+        warn!(
+            "request_id={}: server error occurred: {}",
+            self.request_identifier(),
+            error
+        );
         self.send_http_response(
             override_status_code
                 .unwrap_or(StatusCode::INTERNAL_SERVER_ERROR)
@@ -269,7 +273,7 @@ impl StreamContext {
         let token_count = tokenizer::token_count(model, json_string).unwrap_or(0);
 
         debug!(
-            "[PLANO_REQ_ID:{}] TOKEN_COUNT: model='{}' input_tokens={}",
+            "request_id={}: token count, model='{}' input_tokens={}",
             self.request_identifier(),
             model,
             token_count
@@ -283,7 +287,7 @@ impl StreamContext {
         // Check if rate limiting needs to be applied.
         if let Some(selector) = self.ratelimit_selector.take() {
             info!(
-                "[PLANO_REQ_ID:{}] RATELIMIT_CHECK: model='{}' selector='{}:{}'",
+                "request_id={}: ratelimit check, model='{}' selector='{}:{}'",
                 self.request_identifier(),
                 model,
                 selector.key,
@@ -296,7 +300,7 @@ impl StreamContext {
             )?;
         } else {
             debug!(
-                "[PLANO_REQ_ID:{}] RATELIMIT_SKIP: model='{}' (no selector)",
+                "request_id={}: ratelimit skip, model='{}' (no selector)",
                 self.request_identifier(),
                 model
             );
@@ -315,7 +319,7 @@ impl StreamContext {
                 Ok(duration) => {
                     let duration_ms = duration.as_millis();
                     info!(
-                        "[PLANO_REQ_ID:{}] TIME_TO_FIRST_TOKEN: {}ms",
+                        "request_id={}: time to first token {}ms",
                         self.request_identifier(),
                         duration_ms
                     );
@@ -324,7 +328,7 @@ impl StreamContext {
                 }
                 Err(e) => {
                     warn!(
-                        "[PLANO_REQ_ID:{}] TIME_MEASUREMENT_ERROR: {:?}",
+                        "request_id={}: time measurement error: {:?}",
                         self.request_identifier(),
                         e
                     );
@@ -340,7 +344,7 @@ impl StreamContext {
                 // Convert the duration to milliseconds
                 let duration_ms = duration.as_millis();
                 info!(
-                    "[PLANO_REQ_ID:{}] REQUEST_COMPLETE: latency={}ms tokens={}",
+                    "request_id={}: request complete, latency={}ms tokens={}",
                     self.request_identifier(),
                     duration_ms,
                     self.response_tokens
@@ -356,7 +360,7 @@ impl StreamContext {
                     self.metrics.time_per_output_token.record(tpot);
 
                     info!(
-                        "[PLANO_REQ_ID:{}] TOKEN_THROUGHPUT: time_per_token={}ms tokens_per_second={}",
+                        "request_id={}: token throughput, time_per_token={}ms tokens_per_second={}",
                         self.request_identifier(),
                         tpot,
                         1000 / tpot
@@ -366,7 +370,11 @@ impl StreamContext {
                 }
             }
             Err(e) => {
-                warn!("SystemTime error: {:?}", e);
+                warn!(
+                    "request_id={}: system time error: {:?}",
+                    self.request_identifier(),
+                    e
+                );
             }
         }
         // Record the output sequence length
@@ -379,7 +387,7 @@ impl StreamContext {
         if self.streaming_response {
             let chunk_size = body_size;
             debug!(
-                "[PLANO_REQ_ID:{}] UPSTREAM_RESPONSE_CHUNK: streaming=true chunk_size={}",
+                "request_id={}: upstream response chunk, streaming=true chunk_size={}",
                 self.request_identifier(),
                 chunk_size
             );
@@ -387,7 +395,7 @@ impl StreamContext {
                 Some(chunk) => chunk,
                 None => {
                     warn!(
-                        "[PLANO_REQ_ID:{}] UPSTREAM_RESPONSE_ERROR: empty chunk, size={}",
+                        "request_id={}: upstream response error, empty chunk size={}",
                         self.request_identifier(),
                         chunk_size
                     );
@@ -397,7 +405,7 @@ impl StreamContext {
 
             if streaming_chunk.len() != chunk_size {
                 warn!(
-                    "[PLANO_REQ_ID:{}] UPSTREAM_RESPONSE_MISMATCH: expected={} actual={}",
+                    "request_id={}: upstream response size mismatch, expected={} actual={}",
                     self.request_identifier(),
                     chunk_size,
                     streaming_chunk.len()
@@ -409,14 +417,17 @@ impl StreamContext {
                 return Err(Action::Continue);
             }
             debug!(
-                "[PLANO_REQ_ID:{}] UPSTREAM_RESPONSE_COMPLETE: streaming=false body_size={}",
+                "request_id={}: upstream response complete, streaming=false body_size={}",
                 self.request_identifier(),
                 body_size
             );
             match self.get_http_response_body(0, body_size) {
                 Some(body) => Ok(body),
                 None => {
-                    warn!("non streaming response body empty");
+                    warn!(
+                        "request_id={}: non streaming response body empty",
+                        self.request_identifier()
+                    );
                     Err(Action::Continue)
                 }
             }
@@ -429,7 +440,7 @@ impl StreamContext {
         provider_id: ProviderId,
     ) -> Result<Vec<u8>, Action> {
         debug!(
-            "[PLANO_REQ_ID:{}] STREAMING_PROCESS: client={:?} provider_id={:?} chunk_size={}",
+            "request_id={}: streaming process, client={:?} provider_id={:?} chunk_size={}",
             self.request_identifier(),
             self.client_api,
             provider_id,
@@ -460,7 +471,11 @@ impl StreamContext {
                     {
                         Ok(buffer) => Some(buffer),
                         Err(e) => {
-                            warn!("Failed to create SSE buffer: {}", e);
+                            warn!(
+                                "request_id={}: failed to create sse buffer: {}",
+                                self.request_identifier(),
+                                e
+                            );
                             return Err(Action::Continue);
                         }
                     };
@@ -477,7 +492,7 @@ impl StreamContext {
                             Ok(events) => {
                                 if has_buffered {
                                     debug!(
-                                        "[PLANO_REQ_ID:{}] SSE_INCOMPLETE_BUFFERED: {} bytes buffered for next chunk",
+                                        "request_id={}: sse incomplete buffered, {} bytes buffered for next chunk",
                                         self.request_identifier(),
                                         buffered_size
                                     );
@@ -486,7 +501,7 @@ impl StreamContext {
                             }
                             Err(e) => {
                                 warn!(
-                                    "[PLANO_REQ_ID:{}] SSE_CHUNK_PROCESS_ERROR: {}",
+                                    "request_id={}: sse chunk process error: {}",
                                     self.request_identifier(),
                                     e
                                 );
@@ -495,7 +510,10 @@ impl StreamContext {
                         }
                     }
                     None => {
-                        warn!("SSE chunk processor unexpectedly missing");
+                        warn!(
+                            "request_id={}: sse chunk processor unexpectedly missing",
+                            self.request_identifier()
+                        );
                         return Err(Action::Continue);
                     }
                 };
@@ -510,7 +528,7 @@ impl StreamContext {
 
                                 if provider_response.is_final() {
                                     debug!(
-                                        "[PLANO_REQ_ID:{}] STREAMING_FINAL_CHUNK: total_tokens={}",
+                                        "request_id={}: streaming final chunk, total_tokens={}",
                                         self.request_identifier(),
                                         self.response_tokens
                                     );
@@ -520,7 +538,7 @@ impl StreamContext {
                                     let estimated_tokens = content.len() / 4;
                                     self.response_tokens += estimated_tokens.max(1);
                                     debug!(
-                                        "[PLANO_REQ_ID:{}] STREAMING_TOKEN_UPDATE: delta_chars={} estimated_tokens={} total_tokens={}",
+                                        "request_id={}: streaming token update, delta_chars={} estimated_tokens={} total_tokens={}",
                                         self.request_identifier(),
                                         content.len(),
                                         estimated_tokens.max(1),
@@ -530,7 +548,7 @@ impl StreamContext {
                             }
                             Err(e) => {
                                 warn!(
-                                    "[PLANO_REQ_ID:{}] STREAMING_CHUNK_ERROR: {}",
+                                    "request_id={}: streaming chunk error: {}",
                                     self.request_identifier(),
                                     e
                                 );
@@ -552,7 +570,7 @@ impl StreamContext {
                         if !bytes.is_empty() {
                             let content = String::from_utf8_lossy(&bytes);
                             debug!(
-                                "[PLANO_REQ_ID:{}] UPSTREAM_TRANSFORMED_CLIENT_RESPONSE: size={} content={}",
+                                "request_id={}: upstream transformed client response, size={} content={}",
                                 self.request_identifier(),
                                 bytes.len(),
                                 content
@@ -561,13 +579,19 @@ impl StreamContext {
                         Ok(bytes)
                     }
                     None => {
-                        warn!("SSE buffer unexpectedly missing after initialization");
+                        warn!(
+                            "request_id={}: sse buffer unexpectedly missing after initialization",
+                            self.request_identifier()
+                        );
                         Err(Action::Continue)
                     }
                 }
             }
             None => {
-                warn!("Missing client_api for non-streaming response");
+                warn!(
+                    "request_id={}: missing client_api for non-streaming response",
+                    self.request_identifier()
+                );
                 Err(Action::Continue)
             }
         }
@@ -590,7 +614,7 @@ impl StreamContext {
                 Ok(buffer) => Some(buffer),
                 Err(e) => {
                     warn!(
-                        "[PLANO_REQ_ID:{}] BEDROCK_BUFFER_INIT_ERROR: {}",
+                        "request_id={}: bedrock buffer init error: {}",
                         self.request_identifier(),
                         e
                     );
@@ -620,7 +644,7 @@ impl StreamContext {
                                 let estimated_tokens = content.len() / 4;
                                 self.response_tokens += estimated_tokens.max(1);
                                 debug!(
-                                    "[PLANO_REQ_ID:{}] BEDROCK_TOKEN_UPDATE: delta_chars={} estimated_tokens={} total_tokens={}",
+                                    "request_id={}: bedrock token update, delta_chars={} estimated_tokens={} total_tokens={}",
                                     self.request_identifier(),
                                     content.len(),
                                     estimated_tokens.max(1),
@@ -638,7 +662,7 @@ impl StreamContext {
                         }
                         Err(e) => {
                             warn!(
-                                "[PLANO_REQ_ID:{}] BEDROCK_FRAME_CONVERSION_ERROR: {}",
+                                "request_id={}: bedrock frame conversion error: {}",
                                 self.request_identifier(),
                                 e
                             );
@@ -648,7 +672,7 @@ impl StreamContext {
                 Some(DecodedFrame::Incomplete) => {
                     // Incomplete frame - buffer retains partial data, wait for more bytes
                     debug!(
-                        "[PLANO_REQ_ID:{}] BEDROCK_INCOMPLETE_FRAME: waiting for more data",
+                        "request_id={}: bedrock incomplete frame, waiting for more data",
                         self.request_identifier()
                     );
                     break;
@@ -656,7 +680,7 @@ impl StreamContext {
                 None => {
                     // Decode error
                     warn!(
-                        "[PLANO_REQ_ID:{}] BEDROCK_DECODE_ERROR",
+                        "request_id={}: bedrock decode error",
                         self.request_identifier()
                     );
                     return Err(Action::Continue);
@@ -671,7 +695,7 @@ impl StreamContext {
                 if !bytes.is_empty() {
                     let content = String::from_utf8_lossy(&bytes);
                     debug!(
-                        "[PLANO_REQ_ID:{}] UPSTREAM_TRANSFORMED_CLIENT_RESPONSE: size={} content={}",
+                        "request_id={}: upstream transformed client response, size={} content={}",
                         self.request_identifier(),
                         bytes.len(),
                         content
@@ -681,7 +705,7 @@ impl StreamContext {
             }
             None => {
                 warn!(
-                    "[PLANO_REQ_ID:{}] BEDROCK_BUFFER_MISSING",
+                    "request_id={}: bedrock buffer missing",
                     self.request_identifier()
                 );
                 Err(Action::Continue)
@@ -695,7 +719,7 @@ impl StreamContext {
         provider_id: ProviderId,
     ) -> Result<Vec<u8>, Action> {
         debug!(
-            "[PLANO_REQ_ID:{}] NON_STREAMING_PROCESS: provider_id={:?} body_size={}",
+            "request_id={}: non-streaming process, provider_id={:?} body_size={}",
             self.request_identifier(),
             provider_id,
             body.len()
@@ -707,7 +731,7 @@ impl StreamContext {
                     Ok(response) => response,
                     Err(e) => {
                         warn!(
-                            "[PLANO_REQ_ID:{}] UPSTREAM_RESPONSE_PARSE_ERROR: {} | body: {}",
+                            "request_id={}: upstream response parse error: {} | body: {}",
                             self.request_identifier(),
                             e,
                             String::from_utf8_lossy(body)
@@ -722,7 +746,7 @@ impl StreamContext {
             }
             None => {
                 warn!(
-                    "[PLANO_REQ_ID:{}] UPSTREAM_RESPONSE_ERROR: missing client_api",
+                    "request_id={}: upstream response error, missing client_api",
                     self.request_identifier()
                 );
                 return Err(Action::Continue);
@@ -734,7 +758,7 @@ impl StreamContext {
             response.extract_usage_counts()
         {
             debug!(
-                "[PLANO_REQ_ID:{}] RESPONSE_USAGE: prompt_tokens={} completion_tokens={} total_tokens={}",
+                "request_id={}: response usage, prompt_tokens={} completion_tokens={} total_tokens={}",
                 self.request_identifier(),
                 prompt_tokens,
                 completion_tokens,
@@ -743,7 +767,7 @@ impl StreamContext {
             self.response_tokens = completion_tokens;
         } else {
             warn!(
-                "[PLANO_REQ_ID:{}] RESPONSE_USAGE: no usage information found",
+                "request_id={}: response usage, no usage information found",
                 self.request_identifier()
             );
         }
@@ -751,14 +775,18 @@ impl StreamContext {
         match serde_json::to_vec(&response) {
             Ok(bytes) => {
                 debug!(
-                    "[PLANO_REQ_ID:{}] CLIENT_RESPONSE_PAYLOAD: {}",
+                    "request_id={}: client response payload: {}",
                     self.request_identifier(),
                     String::from_utf8_lossy(&bytes)
                 );
                 Ok(bytes)
             }
             Err(e) => {
-                warn!("Failed to serialize normalized response: {}", e);
+                warn!(
+                    "request_id={}: failed to serialize normalized response: {}",
+                    self.request_identifier(),
+                    e
+                );
                 self.send_server_error(
                     ServerError::LogicError(format!("Response serialization error: {}", e)),
                     Some(StatusCode::INTERNAL_SERVER_ERROR),
@@ -819,7 +847,7 @@ impl HttpContext for StreamContext {
                 Some(provider_id.compatible_api_for_client(api, self.streaming_response));
 
             debug!(
-                "[PLANO_REQ_ID:{}] ROUTING_INFO: provider='{}' client_api={:?} resolved_api={:?} request_path='{}'",
+                "request_id={}: routing info, provider='{}' client_api={:?} resolved_api={:?} request_path='{}'",
                 self.request_identifier(),
                 provider.to_provider_id(),
                 api,
@@ -863,7 +891,7 @@ impl HttpContext for StreamContext {
 
     fn on_http_request_body(&mut self, body_size: usize, end_of_stream: bool) -> Action {
         debug!(
-            "[PLANO_REQ_ID:{}] REQUEST_BODY_CHUNK: bytes={} end_stream={}",
+            "request_id={}: request body chunk, bytes={} end_stream={}",
             self.request_identifier(),
             body_size,
             end_of_stream
@@ -902,14 +930,14 @@ impl HttpContext for StreamContext {
         let mut deserialized_client_request: ProviderRequestType = match self.client_api.as_ref() {
             Some(the_client_api) => {
                 info!(
-                    "[PLANO_REQ_ID:{}] CLIENT_REQUEST_RECEIVED: api={:?} body_size={}",
+                    "request_id={}: client request received, api={:?} body_size={}",
                     self.request_identifier(),
                     the_client_api,
                     body_bytes.len()
                 );
 
                 debug!(
-                    "[PLANO_REQ_ID:{}] CLIENT_REQUEST_PAYLOAD: {}",
+                    "request_id={}: client request payload: {}",
                     self.request_identifier(),
                     String::from_utf8_lossy(&body_bytes)
                 );
@@ -918,7 +946,7 @@ impl HttpContext for StreamContext {
                     Ok(deserialized) => deserialized,
                     Err(e) => {
                         warn!(
-                            "[PLANO_REQ_ID:{}] CLIENT_REQUEST_PARSE_ERROR: {} | body: {}",
+                            "request_id={}: client request parse error: {} | body: {}",
                             self.request_identifier(),
                             e,
                             String::from_utf8_lossy(&body_bytes)
@@ -953,7 +981,7 @@ impl HttpContext for StreamContext {
             Some(model_name) => model_name,
             None => {
                 warn!(
-                    "[PLANO_REQ_ID:{}] MODEL_RESOLUTION_ERROR: no model specified | req_model='{}' provider='{}' config_model={:?}",
+                    "request_id={}: model resolution error, no model specified | req_model='{}' provider='{}' config_model={:?}",
                     self.request_identifier(),
                     model_requested,
                     self.llm_provider().name,
@@ -981,7 +1009,7 @@ impl HttpContext for StreamContext {
         self.user_message = deserialized_client_request.get_recent_user_message();
 
         info!(
-            "[PLANO_REQ_ID:{}] MODEL_RESOLUTION: req_model='{}' -> resolved_model='{}' provider='{}' streaming={}",
+            "request_id={}: model resolved, req_model='{}' -> resolved_model='{}' provider='{}' streaming={}",
             self.request_identifier(),
             model_requested,
             resolved_model,
@@ -1008,56 +1036,68 @@ impl HttpContext for StreamContext {
         }
 
         // Convert chat completion request to llm provider specific request using provider interface
-        let serialized_body_bytes_upstream =
-            match self.resolved_api.as_ref() {
-                Some(upstream) => {
-                    info!(
-                    "[PLANO_REQ_ID:{}] UPSTREAM_TRANSFORM: client_api={:?} -> upstream_api={:?}",
-                    self.request_identifier(), self.client_api, upstream
+        let serialized_body_bytes_upstream = match self.resolved_api.as_ref() {
+            Some(upstream) => {
+                info!(
+                    "request_id={}: upstream transform, client_api={:?} -> upstream_api={:?}",
+                    self.request_identifier(),
+                    self.client_api,
+                    upstream
                 );
 
-                    match ProviderRequestType::try_from((deserialized_client_request, upstream)) {
-                        Ok(request) => {
-                            debug!(
-                                "[PLANO_REQ_ID:{}] UPSTREAM_REQUEST_PAYLOAD: {}",
-                                self.request_identifier(),
-                                String::from_utf8_lossy(&request.to_bytes().unwrap_or_default())
-                            );
+                match ProviderRequestType::try_from((deserialized_client_request, upstream)) {
+                    Ok(request) => {
+                        debug!(
+                            "request_id={}: upstream request payload: {}",
+                            self.request_identifier(),
+                            String::from_utf8_lossy(&request.to_bytes().unwrap_or_default())
+                        );
 
-                            match request.to_bytes() {
-                                Ok(bytes) => bytes,
-                                Err(e) => {
-                                    warn!("Failed to serialize request body: {}", e);
-                                    self.send_server_error(
-                                        ServerError::LogicError(format!(
-                                            "Request serialization error: {}",
-                                            e
-                                        )),
-                                        Some(StatusCode::BAD_REQUEST),
-                                    );
-                                    return Action::Pause;
-                                }
+                        match request.to_bytes() {
+                            Ok(bytes) => bytes,
+                            Err(e) => {
+                                warn!(
+                                    "request_id={}: failed to serialize request body: {}",
+                                    self.request_identifier(),
+                                    e
+                                );
+                                self.send_server_error(
+                                    ServerError::LogicError(format!(
+                                        "Request serialization error: {}",
+                                        e
+                                    )),
+                                    Some(StatusCode::BAD_REQUEST),
+                                );
+                                return Action::Pause;
                             }
                         }
-                        Err(e) => {
-                            warn!("Failed to create provider request: {}", e);
-                            self.send_server_error(
-                                ServerError::LogicError(format!("Provider request error: {}", e)),
-                                Some(StatusCode::BAD_REQUEST),
-                            );
-                            return Action::Pause;
-                        }
+                    }
+                    Err(e) => {
+                        warn!(
+                            "request_id={}: failed to create provider request: {}",
+                            self.request_identifier(),
+                            e
+                        );
+                        self.send_server_error(
+                            ServerError::LogicError(format!("Provider request error: {}", e)),
+                            Some(StatusCode::BAD_REQUEST),
+                        );
+                        return Action::Pause;
                     }
                 }
-                None => {
-                    warn!("No upstream API resolved");
-                    self.send_server_error(
-                        ServerError::LogicError("No upstream API resolved".into()),
-                        Some(StatusCode::BAD_REQUEST),
-                    );
-                    return Action::Pause;
-                }
-            };
+            }
+            None => {
+                warn!(
+                    "request_id={}: no upstream api resolved",
+                    self.request_identifier()
+                );
+                self.send_server_error(
+                    ServerError::LogicError("No upstream API resolved".into()),
+                    Some(StatusCode::BAD_REQUEST),
+                );
+                return Action::Pause;
+            }
+        };
 
         self.set_http_request_body(0, body_size, &serialized_body_bytes_upstream);
         Action::Continue
@@ -1070,7 +1110,7 @@ impl HttpContext for StreamContext {
                 self.upstream_status_code = StatusCode::from_u16(status_code).ok();
 
                 debug!(
-                    "[PLANO_REQ_ID:{}] UPSTREAM_RESPONSE_STATUS: {}",
+                    "request_id={}: upstream response status: {}",
                     self.request_identifier(),
                     status_code
                 );
@@ -1090,14 +1130,17 @@ impl HttpContext for StreamContext {
 
     fn on_http_response_body(&mut self, body_size: usize, end_of_stream: bool) -> Action {
         if self.request_body_sent_time.is_none() {
-            debug!("on_http_response_body: request body not sent, not doing any processing in llm filter");
+            debug!(
+                "request_id={}: request body not sent, skipping processing in llm filter",
+                self.request_identifier()
+            );
             return Action::Continue;
         }
 
         let current_time = get_current_time().unwrap();
         if end_of_stream && body_size == 0 {
             debug!(
-                "[PLANO_REQ_ID:{}] RESPONSE_BODY_COMPLETE: total_bytes={}",
+                "request_id={}: response body complete, total_bytes={}",
                 self.request_identifier(),
                 body_size
             );
@@ -1109,7 +1152,7 @@ impl HttpContext for StreamContext {
         if let Some(status_code) = &self.upstream_status_code {
             if status_code.is_client_error() || status_code.is_server_error() {
                 info!(
-                    "[PLANO_REQ_ID:{}] UPSTREAM_ERROR_RESPONSE: status={} body_size={}",
+                    "request_id={}: upstream error response, status={} body_size={}",
                     self.request_identifier(),
                     status_code.as_u16(),
                     body_size
@@ -1119,7 +1162,7 @@ impl HttpContext for StreamContext {
                 if body_size > 0 {
                     if let Ok(body) = self.read_raw_response_body(body_size) {
                         debug!(
-                            "[PLANO_REQ_ID:{}] UPSTREAM_ERROR_BODY: {}",
+                            "request_id={}: upstream error body: {}",
                             self.request_identifier(),
                             String::from_utf8_lossy(&body)
                         );
@@ -1141,7 +1184,7 @@ impl HttpContext for StreamContext {
                     None => "None".to_string(),
                 };
                 info!(
-                    "[PLANO_REQ_ID:{}], UNSUPPORTED API: {}",
+                    "request_id={}: unsupported api: {}",
                     self.request_identifier(),
                     api_info
                 );
@@ -1155,7 +1198,7 @@ impl HttpContext for StreamContext {
         };
 
         debug!(
-            "[PLANO_REQ_ID:{}] UPSTREAM_RAW_RESPONSE: body_size={} content={}",
+            "request_id={}: upstream raw response, body_size={} content={}",
             self.request_identifier(),
             body.len(),
             String::from_utf8_lossy(&body)
diff --git a/demos/use_cases/http_filter/README.md b/demos/use_cases/http_filter/README.md
index 89a5bd1a..fa683d9f 100644
--- a/demos/use_cases/http_filter/README.md
+++ b/demos/use_cases/http_filter/README.md
@@ -47,15 +47,15 @@ This brings up:
 - RAG Agent REST server on port 10505
 - Plano listener on port 8001 (and gateway on 12000)
 - Jaeger UI for viewing traces at http://localhost:16686
-- Open WebUI at http://localhost:8080 for interactive queries
+- AnythingLLM at http://localhost:3001 for interactive queries
 
 > Set `OPENAI_API_KEY` in your environment before running; `LLM_GATEWAY_ENDPOINT` defaults to `http://host.docker.internal:12000/v1`.
 
 ### 2. Test the system
 
-**Option A: Using Open WebUI (recommended)**
+**Option A: Using AnythingLLM (recommended)**
 
-Navigate to http://localhost:8080 and send queries through the chat interface.
+Navigate to http://localhost:3001 and send queries through the chat interface.
 
 **Option B: Using curl**
 ```bash
diff --git a/demos/use_cases/http_filter/docker-compose.yaml b/demos/use_cases/http_filter/docker-compose.yaml
index 469bf0d5..a5832b22 100644
--- a/demos/use_cases/http_filter/docker-compose.yaml
+++ b/demos/use_cases/http_filter/docker-compose.yaml
@@ -31,12 +31,17 @@ services:
       - "16686:16686"
       - "4317:4317"
       - "4318:4318"
-  open-web-ui:
-    image: dyrnq/open-webui:main
+  anythingllm:
+    image: mintplexlabs/anythingllm
     restart: always
     ports:
-      - "8080:8080"
+      - "3001:3001"
+    cap_add:
+      - SYS_ADMIN
     environment:
-      - DEFAULT_MODEL=gpt-4o-mini
-      - ENABLE_OPENAI_API=true
-      - OPENAI_API_BASE_URL=http://host.docker.internal:8001/v1
+      - STORAGE_DIR=/app/server/storage
+      - LLM_PROVIDER=generic-openai
+      - GENERIC_OPEN_AI_BASE_PATH=http://plano:8001/v1
+      - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
+      - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
+      - GENERIC_OPEN_AI_API_KEY=sk-placeholder
diff --git a/demos/use_cases/llm_routing/docker-compose.yaml b/demos/use_cases/llm_routing/docker-compose.yaml
index 3b294076..8e4a88b3 100644
--- a/demos/use_cases/llm_routing/docker-compose.yaml
+++ b/demos/use_cases/llm_routing/docker-compose.yaml
@@ -1,15 +1,34 @@
 services:
 
+  plano:
+    build:
+      context: ../../../
+      dockerfile: Dockerfile
+    ports:
+      - "12000:12000"
+      - "12001:12001"
+    environment:
+      - ARCH_CONFIG_PATH=/app/arch_config.yaml
+      - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
+      - OTEL_TRACING_GRPC_ENDPOINT=http://host.docker.internal:4317
+    volumes:
+      - ./config.yaml:/app/arch_config.yaml:ro
+      - /etc/ssl/cert.pem:/etc/ssl/cert.pem
 
-  open-web-ui:
-    image: dyrnq/open-webui:main
+  anythingllm:
+    image: mintplexlabs/anythingllm
     restart: always
     ports:
-      - "8080:8080"
+      - "3001:3001"
+    cap_add:
+      - SYS_ADMIN
     environment:
-      - DEFAULT_MODEL=gpt-4o-mini
-      - ENABLE_OPENAI_API=true
-      - OPENAI_API_BASE_URL=http://host.docker.internal:12000/v1
+      - STORAGE_DIR=/app/server/storage
+      - LLM_PROVIDER=generic-openai
+      - GENERIC_OPEN_AI_BASE_PATH=http://plano:12000/v1
+      - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
+      - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
+      - GENERIC_OPEN_AI_API_KEY=sk-placeholder
 
   jaeger:
     build:
diff --git a/demos/use_cases/mcp_filter/README.md b/demos/use_cases/mcp_filter/README.md
index 89a5bd1a..fa683d9f 100644
--- a/demos/use_cases/mcp_filter/README.md
+++ b/demos/use_cases/mcp_filter/README.md
@@ -47,15 +47,15 @@ This brings up:
 - RAG Agent REST server on port 10505
 - Plano listener on port 8001 (and gateway on 12000)
 - Jaeger UI for viewing traces at http://localhost:16686
-- Open WebUI at http://localhost:8080 for interactive queries
+- AnythingLLM at http://localhost:3001 for interactive queries
 
 > Set `OPENAI_API_KEY` in your environment before running; `LLM_GATEWAY_ENDPOINT` defaults to `http://host.docker.internal:12000/v1`.
 
 ### 2. Test the system
 
-**Option A: Using Open WebUI (recommended)**
+**Option A: Using AnythingLLM (recommended)**
 
-Navigate to http://localhost:8080 and send queries through the chat interface.
+Navigate to http://localhost:3001 and send queries through the chat interface.
 
 **Option B: Using curl**
 ```bash
diff --git a/demos/use_cases/mcp_filter/docker-compose.yaml b/demos/use_cases/mcp_filter/docker-compose.yaml
index 469bf0d5..5e94f277 100644
--- a/demos/use_cases/mcp_filter/docker-compose.yaml
+++ b/demos/use_cases/mcp_filter/docker-compose.yaml
@@ -16,6 +16,8 @@ services:
       context: ../../../
       dockerfile: Dockerfile
     ports:
+      - "11000:11000"
+      - "12001:12001"
       - "12000:12000"
       - "8001:8001"
     environment:
@@ -31,12 +33,17 @@ services:
       - "16686:16686"
       - "4317:4317"
       - "4318:4318"
-  open-web-ui:
-    image: dyrnq/open-webui:main
+  anythingllm:
+    image: mintplexlabs/anythingllm
     restart: always
     ports:
-      - "8080:8080"
+      - "3001:3001"
+    cap_add:
+      - SYS_ADMIN
     environment:
-      - DEFAULT_MODEL=gpt-4o-mini
-      - ENABLE_OPENAI_API=true
-      - OPENAI_API_BASE_URL=http://host.docker.internal:8001/v1
+      - STORAGE_DIR=/app/server/storage
+      - LLM_PROVIDER=generic-openai
+      - GENERIC_OPEN_AI_BASE_PATH=http://plano:8001/v1
+      - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
+      - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
+      - GENERIC_OPEN_AI_API_KEY=sk-placeholder
diff --git a/demos/use_cases/multi_agent_with_crewai_langchain/README.md b/demos/use_cases/multi_agent_with_crewai_langchain/README.md
index 82b5c9c5..e36af9a2 100644
--- a/demos/use_cases/multi_agent_with_crewai_langchain/README.md
+++ b/demos/use_cases/multi_agent_with_crewai_langchain/README.md
@@ -47,13 +47,13 @@ This starts:
 - **Plano** (ports 12000, 8001) - routing and orchestration
 - **CrewAI Flight Agent** (port 10520) - flight search
 - **LangChain Weather Agent** (port 10510) - weather forecasts
-- **Open WebUI** (port 8080) - chat interface
+- **AnythingLLM** (port 3001) - chat interface
 - **Jaeger** (port 16686) - distributed tracing
 
 ### Try It Out
 
 1. **Open the Chat Interface**
-   - Navigate to [http://localhost:8080](http://localhost:8080)
+   - Navigate to [http://localhost:3001](http://localhost:3001)
    - Create an account (stored locally)
 
 2. **Ask Multi-Agent Questions**
@@ -75,9 +75,9 @@ This starts:
 ## Architecture
 
 ```
-┌─────────────┐
-│  Open WebUI │ (Chat Interface)
-└──────┬──────┘
+┌──────────────┐
+│ AnythingLLM  │ (Chat Interface)
+└──────┬───────┘
        │
        v
 ┌─────────────┐
diff --git a/demos/use_cases/multi_agent_with_crewai_langchain/config.yaml b/demos/use_cases/multi_agent_with_crewai_langchain/config.yaml
index b3a204f3..8b63ccea 100644
--- a/demos/use_cases/multi_agent_with_crewai_langchain/config.yaml
+++ b/demos/use_cases/multi_agent_with_crewai_langchain/config.yaml
@@ -55,3 +55,4 @@ listeners:
 
 tracing:
   random_sampling: 100
+  opentracing_grpc_endpoint: http://jaeger:4317
diff --git a/demos/use_cases/multi_agent_with_crewai_langchain/docker-compose.yaml b/demos/use_cases/multi_agent_with_crewai_langchain/docker-compose.yaml
index e114442f..c233699b 100644
--- a/demos/use_cases/multi_agent_with_crewai_langchain/docker-compose.yaml
+++ b/demos/use_cases/multi_agent_with_crewai_langchain/docker-compose.yaml
@@ -9,7 +9,8 @@ services:
     environment:
       - ARCH_CONFIG_PATH=/app/arch_config.yaml
       - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
-      - OTEL_TRACING_HTTP_ENDPOINT=http://host.docker.internal:4318/v1/traces
+      - OTEL_TRACING_GRPC_ENDPOINT=http://jaeger:4317
+      - LOG_LEVEL=${LOG_LEVEL:-info}
     volumes:
       - ./config.yaml:/app/arch_config.yaml:ro
       - /etc/ssl/cert.pem:/etc/ssl/cert.pem
@@ -36,15 +37,20 @@ services:
       - LLM_GATEWAY_ENDPOINT=http://plano:12000/v1
     command: ["python", "-u", "langchain/weather_agent.py"]
 
-  open-web-ui:
-    image: dyrnq/open-webui:main
+  anythingllm:
+    image: mintplexlabs/anythingllm
     restart: always
     ports:
-      - "8080:8080"
+      - "3001:3001"
+    cap_add:
+      - SYS_ADMIN
     environment:
-      - DEFAULT_MODEL=gpt-4o-mini
-      - ENABLE_OPENAI_API=true
-      - OPENAI_API_BASE_URL=http://plano:8001/v1
+      - STORAGE_DIR=/app/server/storage
+      - LLM_PROVIDER=generic-openai
+      - GENERIC_OPEN_AI_BASE_PATH=http://plano:8001/v1
+      - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
+      - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
+      - GENERIC_OPEN_AI_API_KEY=sk-placeholder
 
   jaeger:
     build:
@@ -53,4 +59,3 @@ services:
     ports:
       - "16686:16686"  # Jaeger UI
       - "4317:4317"    # OTLP gRPC receiver
-      - "4318:4318"    # OTLP HTTP receiver
diff --git a/demos/use_cases/preference_based_routing/README.md b/demos/use_cases/preference_based_routing/README.md
index eb785cf0..1af5ec31 100644
--- a/demos/use_cases/preference_based_routing/README.md
+++ b/demos/use_cases/preference_based_routing/README.md
@@ -6,7 +6,7 @@ This demo shows how you can use user preferences to route user prompts to approp
 Make sure your machine is up to date with [latest version of plano]([url](https://github.com/katanemo/plano/tree/main?tab=readme-ov-file#prerequisites)). And you have activated the virtual environment.
 
 
-1. start the openwebui
+1. start anythingllm
 ```bash
 (venv) $ cd demos/use_cases/preference_based_routing
 (venv) $ docker compose up -d
@@ -24,7 +24,7 @@ Make sure your machine is up to date with [latest version of plano]([url](https:
 ...
 ```
 
-3. open openwebui http://localhost:8080/
+3. open AnythingLLM http://localhost:3001/
 
 # Testing out preference based routing
 
diff --git a/demos/use_cases/preference_based_routing/docker-compose.yaml b/demos/use_cases/preference_based_routing/docker-compose.yaml
index 8387ccac..b4bc6a6b 100644
--- a/demos/use_cases/preference_based_routing/docker-compose.yaml
+++ b/demos/use_cases/preference_based_routing/docker-compose.yaml
@@ -1,14 +1,37 @@
 services:
 
-  open-web-ui:
-    image: dyrnq/open-webui:main
+  plano:
+    build:
+      context: ../../../
+      dockerfile: Dockerfile
+    ports:
+      - "12000:12000"
+      - "12001:12001"
+    environment:
+      - ARCH_CONFIG_PATH=/app/arch_config.yaml
+      - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
+      - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:?ANTHROPIC_API_KEY environment variable is required but not set}
+      - OTEL_TRACING_GRPC_ENDPOINT=http://host.docker.internal:4317
+      - OTEL_TRACING_ENABLED=true
+      - RUST_LOG=debug
+    volumes:
+      - ./config.yaml:/app/arch_config.yaml:ro
+      - /etc/ssl/cert.pem:/etc/ssl/cert.pem
+
+  anythingllm:
+    image: mintplexlabs/anythingllm
     restart: always
     ports:
-      - "8080:8080"
+      - "3001:3001"
+    cap_add:
+      - SYS_ADMIN
     environment:
-      - DEFAULT_MODELS=gpt-4o-mini
-      - ENABLE_OPENAI_API=true
-      - OPENAI_API_BASE_URL=http://host.docker.internal:12000/v1
+      - STORAGE_DIR=/app/server/storage
+      - LLM_PROVIDER=generic-openai
+      - GENERIC_OPEN_AI_BASE_PATH=http://plano:12000/v1
+      - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
+      - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
+      - GENERIC_OPEN_AI_API_KEY=sk-placeholder
 
   jaeger:
     build:
@@ -18,12 +41,12 @@ services:
       - "4317:4317"
       - "4318:4318"
 
-  prometheus:
-    build:
-      context: ../../shared/prometheus
+  # prometheus:
+  #   build:
+  #     context: ../../shared/prometheus
 
-  grafana:
-    build:
-      context: ../../shared/grafana
-    ports:
-      - "3000:3000"
+  # grafana:
+  #   build:
+  #     context: ../../shared/grafana
+  #   ports:
+  #     - "3000:3000"
diff --git a/docs/source/resources/includes/arch_config_full_reference_rendered.yaml b/docs/source/resources/includes/arch_config_full_reference_rendered.yaml
index 62e7ab96..abd909a0 100644
--- a/docs/source/resources/includes/arch_config_full_reference_rendered.yaml
+++ b/docs/source/resources/includes/arch_config_full_reference_rendered.yaml
@@ -130,5 +130,6 @@ prompt_targets:
     required: true
     type: int
 tracing:
+  opentracing_grpc_endpoint: http://host.docker.internal:4317
   random_sampling: 100
 version: v0.3.0
diff --git a/tests/e2e/docker-compose.yaml b/tests/e2e/docker-compose.yaml
index 31dd28de..fa171051 100644
--- a/tests/e2e/docker-compose.yaml
+++ b/tests/e2e/docker-compose.yaml
@@ -16,4 +16,4 @@ services:
       - OPENAI_API_KEY=${OPENAI_API_KEY:?error}
       - MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
       - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:?error}
-      - OTEL_TRACING_HTTP_ENDPOINT=http://host.docker.internal:4318/v1/traces
+      - OTEL_TRACING_GRPC_ENDPOINT=http://host.docker.internal:4317
diff --git a/tests/rest/tracing.rest b/tests/rest/tracing.rest
deleted file mode 100644
index e277dedd..00000000
--- a/tests/rest/tracing.rest
+++ /dev/null
@@ -1,31 +0,0 @@
-POST http://localhost:4318/v1/traces
-Content-Type: application/json
-
-{
-  "resourceSpans": [
-    {
-      "resource": {
-        "attributes": [
-          { "key": "service.name", "value": { "stringValue": "upstream-llm" } }
-        ]
-      },
-      "scopeSpans": [
-        {
-          "scope": { "name": "default", "version": "1.0", "attributes": [] },
-          "spans": [
-            {
-              "traceId": "fa8f7c410c28092faafbd7d4a2f5e742",
-              "spanId": "4dc43055a07410d6",
-              "parentSpanId": "f0acd74216a5e179",
-              "name": "archgw",
-              "startTimeUnixNano": "1731363782228270000",
-              "endTimeUnixNano": "1731363787843156000",
-              "kind": 1,
-              "attributes": []
-            }
-          ]
-        }
-      ]
-    }
-  ]
-}