mirror of
https://github.com/katanemo/plano.git
synced 2026-05-15 11:02:39 +02:00
enable state management for v1/responses (#631)
* first commit with tests to enable state mamangement via memory * fixed logs to follow the conversational flow a bit better * added support for supabase * added the state_storage_v1_responses flag, and use that to store state appropriately * cleaned up logs and fixed issue with connectivity for llm gateway in weather forecast demo * fixed mixed inputs from openai v1/responses api (#632) * fixed mixed inputs from openai v1/responses api * removing tracing from model-alias-rouing * handling additional input types from openairs --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local> * resolving PR comments --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local>
This commit is contained in:
parent
33e90dd338
commit
d5a273f740
26 changed files with 2687 additions and 76 deletions
|
|
@ -1,4 +1,5 @@
|
|||
use common::configuration::ModelUsagePreference;
|
||||
use common::consts::{REQUEST_ID_HEADER};
|
||||
use common::traces::{TraceCollector, SpanKind, SpanBuilder, parse_traceparent};
|
||||
use hermesllm::clients::endpoints::SupportedUpstreamAPIs;
|
||||
use hermesllm::{ProviderRequest, ProviderRequestType};
|
||||
|
|
@ -43,6 +44,10 @@ pub async fn router_chat_get_upstream_model(
|
|||
) -> Result<RoutingResult, RoutingError> {
|
||||
// Clone metadata for routing before converting (which consumes client_request)
|
||||
let routing_metadata = client_request.metadata().clone();
|
||||
let request_id = request_headers
|
||||
.get(REQUEST_ID_HEADER)
|
||||
.and_then(|value| value.to_str().ok())
|
||||
.unwrap_or("unknown");
|
||||
|
||||
// Convert to ChatCompletionsRequest for routing (regardless of input type)
|
||||
let chat_request = match ProviderRequestType::try_from((
|
||||
|
|
@ -73,7 +78,8 @@ pub async fn router_chat_get_upstream_model(
|
|||
};
|
||||
|
||||
debug!(
|
||||
"[ARCH_ROUTER REQ]: {}",
|
||||
"[PLANO_REQ_ID: {}]: ROUTER_REQ: {}",
|
||||
request_id,
|
||||
&serde_json::to_string(&chat_request).unwrap()
|
||||
);
|
||||
|
||||
|
|
@ -114,14 +120,13 @@ pub async fn router_chat_get_upstream_model(
|
|||
};
|
||||
|
||||
info!(
|
||||
"request received, request type: chat_completion, usage preferences from request: {}, request path: {}, latest message: {}",
|
||||
"[PLANO_REQ_ID: {}] | ROUTER_REQ | Usage preferences from request: {}, request_path: {}, latest message: {}",
|
||||
request_id,
|
||||
usage_preferences.is_some(),
|
||||
request_path,
|
||||
latest_message_for_log
|
||||
);
|
||||
|
||||
debug!("usage preferences from request: {:?}", usage_preferences);
|
||||
|
||||
// Capture start time for routing span
|
||||
let routing_start_time = std::time::Instant::now();
|
||||
let routing_start_system_time = std::time::SystemTime::now();
|
||||
|
|
@ -153,7 +158,8 @@ pub async fn router_chat_get_upstream_model(
|
|||
None => {
|
||||
// No route determined, use default model from request
|
||||
info!(
|
||||
"No route determined, using default model from request: {}",
|
||||
"[PLANO_REQ_ID: {}] | ROUTER_REQ | No route determined, using default model from request: {}",
|
||||
request_id,
|
||||
chat_request.model
|
||||
);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue