enable state management for v1/responses (#631)

* first commit with tests to enable state mamangement via memory

* fixed logs to follow the conversational flow a bit better

* added support for supabase

* added the state_storage_v1_responses flag, and use that to store state appropriately

* cleaned up logs and fixed issue with connectivity for llm gateway in weather forecast demo

* fixed mixed inputs from openai v1/responses api (#632)

* fixed mixed inputs from openai v1/responses api

* removing tracing from model-alias-rouing

* handling additional input types from openairs

---------

Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local>

* resolving PR comments

---------

Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local>
This commit is contained in:
Salman Paracha 2025-12-17 12:18:38 -08:00 committed by GitHub
parent 33e90dd338
commit d5a273f740
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
26 changed files with 2687 additions and 76 deletions

View file

@ -1,4 +1,5 @@
use common::configuration::ModelUsagePreference;
use common::consts::{REQUEST_ID_HEADER};
use common::traces::{TraceCollector, SpanKind, SpanBuilder, parse_traceparent};
use hermesllm::clients::endpoints::SupportedUpstreamAPIs;
use hermesllm::{ProviderRequest, ProviderRequestType};
@ -43,6 +44,10 @@ pub async fn router_chat_get_upstream_model(
) -> Result<RoutingResult, RoutingError> {
// Clone metadata for routing before converting (which consumes client_request)
let routing_metadata = client_request.metadata().clone();
let request_id = request_headers
.get(REQUEST_ID_HEADER)
.and_then(|value| value.to_str().ok())
.unwrap_or("unknown");
// Convert to ChatCompletionsRequest for routing (regardless of input type)
let chat_request = match ProviderRequestType::try_from((
@ -73,7 +78,8 @@ pub async fn router_chat_get_upstream_model(
};
debug!(
"[ARCH_ROUTER REQ]: {}",
"[PLANO_REQ_ID: {}]: ROUTER_REQ: {}",
request_id,
&serde_json::to_string(&chat_request).unwrap()
);
@ -114,14 +120,13 @@ pub async fn router_chat_get_upstream_model(
};
info!(
"request received, request type: chat_completion, usage preferences from request: {}, request path: {}, latest message: {}",
"[PLANO_REQ_ID: {}] | ROUTER_REQ | Usage preferences from request: {}, request_path: {}, latest message: {}",
request_id,
usage_preferences.is_some(),
request_path,
latest_message_for_log
);
debug!("usage preferences from request: {:?}", usage_preferences);
// Capture start time for routing span
let routing_start_time = std::time::Instant::now();
let routing_start_system_time = std::time::SystemTime::now();
@ -153,7 +158,8 @@ pub async fn router_chat_get_upstream_model(
None => {
// No route determined, use default model from request
info!(
"No route determined, using default model from request: {}",
"[PLANO_REQ_ID: {}] | ROUTER_REQ | No route determined, using default model from request: {}",
request_id,
chat_request.model
);