enable state management for v1/responses (#631)

* first commit with tests to enable state mamangement via memory * fixed logs to follow the conversational flow a bit better * added support for supabase * added the state_storage_v1_responses flag, and use that to store state appropriately * cleaned up logs and fixed issue with connectivity for llm gateway in weather forecast demo * fixed mixed inputs from openai v1/responses api (#632) * fixed mixed inputs from openai v1/responses api * removing tracing from model-alias-rouing * handling additional input types from openairs --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local> * resolving PR comments --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local>
2026-05-08 15:22:43 +02:00 · 2025-12-17 12:18:38 -08:00 · 2025-12-17 12:18:38 -08:00 · d5a273f740
commit d5a273f740
parent 33e90dd338
26 changed files with 2687 additions and 76 deletions
--- a/crates/hermesllm/src/apis/streaming_shapes/responses_api_streaming_buffer.rs
+++ b/crates/hermesllm/src/apis/streaming_shapes/responses_api_streaming_buffer.rs
@ -59,6 +59,11 @@ pub struct ResponsesAPIStreamBuffer {
    model: Option<String>,
    created_at: Option<i64>,

+    /// Full response metadata from upstream (tools, temperature, etc.)
+    /// This is extracted from the first upstream event and used to build
+    /// complete response.created and response.in_progress events
+    upstream_response_metadata: Option<ResponsesAPIResponse>,
+
    /// Lifecycle state flags
    created_emitted: bool,
    in_progress_emitted: bool,
@ -88,6 +93,7 @@ impl ResponsesAPIStreamBuffer {
            response_id: None,
            model: None,
            created_at: None,
+            upstream_response_metadata: None,
            created_emitted: false,
            in_progress_emitted: false,
            output_items_added: HashMap::new(),
@ -171,6 +177,15 @@ impl ResponsesAPIStreamBuffer {

    /// Build the base response object with current state
    fn build_response(&self, status: ResponseStatus) -> ResponsesAPIResponse {
+        // If we have upstream metadata, use it as a base and update status/output
+        if let Some(upstream) = &self.upstream_response_metadata {
+            let mut response = upstream.clone();
+            response.status = status;
+            // Don't update output here - will be set in finalize()
+            return response;
+        }
+
+        // Fallback: build a minimal response from local state
        ResponsesAPIResponse {
            id: self.response_id.clone().unwrap_or_default(),
            object: "response".to_string(),
@ -293,24 +308,40 @@ impl ResponsesAPIStreamBuffer {
        // Build final response
        let mut output_items = Vec::new();

-        // Add tool calls to output
-        for (item_id, arguments) in &self.function_arguments {
-            let output_index = self.output_items_added.iter()
-                .find(|(_, id)| *id == item_id)
-                .map(|(idx, _)| *idx)
-                .unwrap_or(0);
+        // Build complete output array by iterating through all output indices in order
+        let max_output_index = self.output_items_added.keys().max().copied().unwrap_or(-1);

-            let (call_id, name) = self.tool_call_metadata.get(&output_index)
-                .cloned()
-                .unwrap_or_else(|| (format!("call_{}", uuid::Uuid::new_v4()), "unknown".to_string()));
+        for output_index in 0..=max_output_index {
+            if let Some(item_id) = self.output_items_added.get(&output_index) {
+                // Check if this is a function call
+                if let Some(arguments) = self.function_arguments.get(item_id) {
+                    let (call_id, name) = self.tool_call_metadata.get(&output_index)
+                        .cloned()
+                        .unwrap_or_else(|| (format!("call_{}", uuid::Uuid::new_v4()), "unknown".to_string()));

-            output_items.push(OutputItem::FunctionCall {
-                id: item_id.clone(),
-                status: OutputItemStatus::Completed,
-                call_id,
-                name: Some(name),
-                arguments: Some(arguments.clone()),
-            });
+                    output_items.push(OutputItem::FunctionCall {
+                        id: item_id.clone(),
+                        status: OutputItemStatus::Completed,
+                        call_id,
+                        name: Some(name),
+                        arguments: Some(arguments.clone()),
+                    });
+                }
+                // Check if this is a text message
+                else if let Some(text) = self.text_content.get(item_id) {
+                    use crate::apis::openai_responses::OutputContent;
+                    output_items.push(OutputItem::Message {
+                        id: item_id.clone(),
+                        status: OutputItemStatus::Completed,
+                        role: "assistant".to_string(),
+                        content: vec![OutputContent::OutputText {
+                            text: text.clone(),
+                            annotations: vec![],
+                            logprobs: None,
+                        }],
+                    });
+                }
+            }
        }

        let mut final_response = self.build_response(ResponseStatus::Completed);
@ -365,6 +396,24 @@ impl SseStreamBufferTrait for ResponsesAPIStreamBuffer {

        let mut events = Vec::new();

+        // Capture upstream metadata from ResponseCreated or ResponseInProgress if present
+        match stream_event {
+            ResponsesAPIStreamEvent::ResponseCreated { response, .. } |
+            ResponsesAPIStreamEvent::ResponseInProgress { response, .. } => {
+                if self.upstream_response_metadata.is_none() {
+                    // Store the full upstream response as our metadata template
+                    self.upstream_response_metadata = Some(response.clone());
+                    // Also extract basic fields
+                    self.response_id = Some(response.id.clone());
+                    self.model = Some(response.model.clone());
+                    self.created_at = Some(response.created_at);
+                }
+                // Don't emit these - we'll generate our own lifecycle events
+                return;
+            }
+            _ => {}
+        }
+
        // Emit lifecycle events if not yet emitted
        if !self.created_emitted {
            // Initialize metadata from first event if needed