enable state management for v1/responses (#631)

* first commit with tests to enable state mamangement via memory * fixed logs to follow the conversational flow a bit better * added support for supabase * added the state_storage_v1_responses flag, and use that to store state appropriately * cleaned up logs and fixed issue with connectivity for llm gateway in weather forecast demo * fixed mixed inputs from openai v1/responses api (#632) * fixed mixed inputs from openai v1/responses api * removing tracing from model-alias-rouing * handling additional input types from openairs --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local> * resolving PR comments --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local>
2026-05-15 11:02:39 +02:00 · 2025-12-17 12:18:38 -08:00 · 2025-12-17 12:18:38 -08:00 · d5a273f740
commit d5a273f740
parent 33e90dd338
26 changed files with 2687 additions and 76 deletions
--- a/crates/brightstaff/src/handlers/router_chat.rs
+++ b/crates/brightstaff/src/handlers/router_chat.rs
@ -1,4 +1,5 @@
 use common::configuration::ModelUsagePreference;
+use common::consts::{REQUEST_ID_HEADER};
 use common::traces::{TraceCollector, SpanKind, SpanBuilder, parse_traceparent};
 use hermesllm::clients::endpoints::SupportedUpstreamAPIs;
 use hermesllm::{ProviderRequest, ProviderRequestType};
@ -43,6 +44,10 @@ pub async fn router_chat_get_upstream_model(
 ) -> Result<RoutingResult, RoutingError> {
    // Clone metadata for routing before converting (which consumes client_request)
    let routing_metadata = client_request.metadata().clone();
+    let request_id = request_headers
+        .get(REQUEST_ID_HEADER)
+        .and_then(|value| value.to_str().ok())
+        .unwrap_or("unknown");

    // Convert to ChatCompletionsRequest for routing (regardless of input type)
    let chat_request = match ProviderRequestType::try_from((
@ -73,7 +78,8 @@ pub async fn router_chat_get_upstream_model(
    };

    debug!(
-        "[ARCH_ROUTER REQ]: {}",
+        "[PLANO_REQ_ID: {}]: ROUTER_REQ: {}",
+        request_id,
        &serde_json::to_string(&chat_request).unwrap()
    );

@ -114,14 +120,13 @@ pub async fn router_chat_get_upstream_model(
    };

    info!(
-        "request received, request type: chat_completion, usage preferences from request: {}, request path: {}, latest message: {}",
+        "[PLANO_REQ_ID: {}] | ROUTER_REQ | Usage preferences from request: {}, request_path: {}, latest message: {}",
+        request_id,
        usage_preferences.is_some(),
        request_path,
        latest_message_for_log
    );

-    debug!("usage preferences from request: {:?}", usage_preferences);
-
    // Capture start time for routing span
    let routing_start_time = std::time::Instant::now();
    let routing_start_system_time = std::time::SystemTime::now();
@ -153,7 +158,8 @@ pub async fn router_chat_get_upstream_model(
            None => {
                // No route determined, use default model from request
                info!(
-                    "No route determined, using default model from request: {}",
+                    "[PLANO_REQ_ID: {}] | ROUTER_REQ | No route determined, using default model from request: {}",
+                    request_id,
                    chat_request.model
                );