diff --git a/.github/workflows/e2e_archgw.yml b/.github/workflows/e2e_archgw.yml index cf6cc702..b18735a6 100644 --- a/.github/workflows/e2e_archgw.yml +++ b/.github/workflows/e2e_archgw.yml @@ -37,6 +37,7 @@ jobs: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }} GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} run: | docker compose up | tee &> archgw.logs & @@ -63,5 +64,6 @@ jobs: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }} GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} run: | docker compose down diff --git a/.github/workflows/e2e_test_preference_based_routing.yml b/.github/workflows/e2e_test_preference_based_routing.yml index 5faa9ee7..f0e2b106 100644 --- a/.github/workflows/e2e_test_preference_based_routing.yml +++ b/.github/workflows/e2e_test_preference_based_routing.yml @@ -53,6 +53,7 @@ jobs: MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }} GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} ARCH_API_KEY: ${{ secrets.ARCH_API_KEY }} + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} run: | source venv/bin/activate cd demos/shared/test_runner && sh run_demo_tests.sh use_cases/preference_based_routing diff --git a/.github/workflows/e2e_tests.yml b/.github/workflows/e2e_tests.yml index 576a7fc3..df715e00 100644 --- a/.github/workflows/e2e_tests.yml +++ b/.github/workflows/e2e_tests.yml @@ -30,6 +30,7 @@ jobs: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }} GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} run: | python -mvenv venv source venv/bin/activate && cd tests/e2e && bash run_e2e_tests.sh diff --git a/arch/envoy.template.yaml b/arch/envoy.template.yaml index 53044770..5c2fd420 100644 --- a/arch/envoy.template.yaml +++ b/arch/envoy.template.yaml @@ -526,13 +526,13 @@ static_resources: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 - - name: claude + - name: anthropic connect_timeout: 0.5s type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN load_assignment: - cluster_name: claude + cluster_name: anthropic endpoints: - lb_endpoints: - endpoint: diff --git a/arch/supervisord.conf b/arch/supervisord.conf index bec147cc..df25eea9 100644 --- a/arch/supervisord.conf +++ b/arch/supervisord.conf @@ -9,7 +9,7 @@ stdout_logfile_maxbytes=0 stderr_logfile_maxbytes=0 [program:envoy] -command=/bin/sh -c "python /app/config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:debug 2>&1 | tee /var/log//envoy.log" +command=/bin/sh -c "python /app/config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:debug --log-format '[%%Y-%%m-%%d %%T.%%e][%%l] %%v' 2>&1 | tee /var/log//envoy.log" stdout_logfile=/dev/stdout redirect_stderr=true stdout_logfile_maxbytes=0 diff --git a/arch/tools/cli/config_generator.py b/arch/tools/cli/config_generator.py index 8346acc0..0f157ea1 100644 --- a/arch/tools/cli/config_generator.py +++ b/arch/tools/cli/config_generator.py @@ -8,12 +8,12 @@ from urllib.parse import urlparse SUPPORTED_PROVIDERS = [ "arch", - "claude", "deepseek", "groq", "mistral", "openai", "gemini", + "anthropic", ] diff --git a/crates/brightstaff/src/handlers/chat_completions.rs b/crates/brightstaff/src/handlers/chat_completions.rs index d0e5910a..fff07c22 100644 --- a/crates/brightstaff/src/handlers/chat_completions.rs +++ b/crates/brightstaff/src/handlers/chat_completions.rs @@ -4,6 +4,8 @@ use bytes::Bytes; use common::configuration::ModelUsagePreference; use common::consts::ARCH_PROVIDER_HINT_HEADER; use hermesllm::apis::openai::ChatCompletionsRequest; +use hermesllm::clients::SupportedAPIs; +use hermesllm::{ProviderRequest, ProviderRequestType}; use http_body_util::combinators::BoxBody; use http_body_util::{BodyExt, Full, StreamBody}; use hyper::body::Frame; @@ -22,66 +24,61 @@ fn full>(chunk: T) -> BoxBody { .boxed() } -pub async fn chat_completions( +pub async fn chat( request: Request, router_service: Arc, - llm_provider_endpoint: String, + full_qualified_llm_provider_url: String, ) -> Result>, hyper::Error> { + let request_path = request.uri().path().to_string(); let mut request_headers = request.headers().clone(); - let chat_request_bytes = request.collect().await?.to_bytes(); debug!("Received request body (raw utf8): {}", String::from_utf8_lossy(&chat_request_bytes)); + let mut client_request = match ProviderRequestType::try_from((&chat_request_bytes[..], &SupportedAPIs::from_endpoint(request_path.as_str()).unwrap())) { + Ok(request) => request, + Err(err) => { + warn!("Failed to parse request as ProviderRequestType: {}", err); + let err_msg = format!("Failed to parse request: {}", err); + let mut bad_request = Response::new(full(err_msg)); + *bad_request.status_mut() = StatusCode::BAD_REQUEST; + return Ok(bad_request); + } + }; - let chat_request_parsed = serde_json::from_slice::(&chat_request_bytes) - .inspect_err(|err| { - warn!( - "Failed to parse request body as JSON: err: {}, str: {}", - err, - String::from_utf8_lossy(&chat_request_bytes) - ) - }) - .unwrap_or_else(|_| { - warn!( - "Failed to parse request body as JSON: {}", - String::from_utf8_lossy(&chat_request_bytes) - ); - serde_json::Value::Null - }); + // Clone metadata for routing and remove archgw_preference_config from original + let routing_metadata = client_request.metadata().clone(); - if chat_request_parsed == serde_json::Value::Null { - warn!("Request body is not valid JSON"); - let err_msg = "Request body is not valid JSON".to_string(); - let mut bad_request = Response::new(full(err_msg)); - *bad_request.status_mut() = StatusCode::BAD_REQUEST; - return Ok(bad_request); + if client_request.remove_metadata_key("archgw_preference_config") { + debug!("Removed archgw_preference_config from metadata"); } - let chat_completion_request: ChatCompletionsRequest = - serde_json::from_value(chat_request_parsed.clone()).unwrap(); + let client_request_bytes_for_upstream = ProviderRequestType::to_bytes(&client_request).unwrap(); - // remove metadata from the request - let mut chat_request_user_preferences_removed = chat_request_parsed; - if let Some(metadata) = chat_request_user_preferences_removed.get_mut("metadata") { - debug!("Removing metadata from request"); - if let Some(m) = metadata.as_object_mut() { - m.remove("archgw_preference_config"); - debug!("Removed archgw_preference_config from metadata"); - } - - // if metadata is empty, remove it - if metadata.as_object().map_or(false, |m| m.is_empty()) { - debug!("Removing empty metadata from request"); - chat_request_user_preferences_removed - .as_object_mut() - .map(|m| m.remove("metadata")); - } - } + // Convert to ChatCompletionsRequest regardless of input type (clone to avoid moving original) + let chat_completions_request_for_arch_router: ChatCompletionsRequest = + match ProviderRequestType::try_from((client_request, &SupportedAPIs::OpenAIChatCompletions(hermesllm::apis::OpenAIApi::ChatCompletions))) { + Ok(ProviderRequestType::ChatCompletionsRequest(req)) => req, + Ok(ProviderRequestType::MessagesRequest(_)) => { + // This should not happen after conversion to OpenAI format + warn!("Unexpected: got MessagesRequest after converting to OpenAI format"); + let err_msg = "Request conversion failed".to_string(); + let mut bad_request = Response::new(full(err_msg)); + *bad_request.status_mut() = StatusCode::BAD_REQUEST; + return Ok(bad_request); + }, + Err(err) => { + warn!("Failed to convert request to ChatCompletionsRequest: {}", err); + let err_msg = format!("Failed to convert request: {}", err); + let mut bad_request = Response::new(full(err_msg)); + *bad_request.status_mut() = StatusCode::BAD_REQUEST; + return Ok(bad_request); + } + }; debug!( - "arch-router request received: {}", - &serde_json::to_string(&chat_completion_request).unwrap() + "[BRIGHTSTAFF -> ARCH_ROUTER] REQ: {}", + &serde_json::to_string(&chat_completions_request_for_arch_router).unwrap() ); let trace_parent = request_headers @@ -90,7 +87,7 @@ pub async fn chat_completions( .map(|(_, value)| value.to_str().unwrap_or_default().to_string()); let usage_preferences_str: Option = - chat_completion_request.metadata.and_then(|metadata| { + routing_metadata.as_ref().and_then(|metadata| { metadata .get("archgw_preference_config") .map(|value| value.to_string()) @@ -101,7 +98,7 @@ pub async fn chat_completions( .and_then(|s| serde_yaml::from_str(s).ok()); let latest_message_for_log = - chat_completion_request + chat_completions_request_for_arch_router .messages .last() .map_or("None".to_string(), |msg| { @@ -126,7 +123,7 @@ pub async fn chat_completions( let model_name = match router_service .determine_route( - &chat_completion_request.messages, + &chat_completions_request_for_arch_router.messages, trace_parent.clone(), usage_preferences, ) @@ -137,9 +134,9 @@ pub async fn chat_completions( None => { debug!( "No route determined, using default model from request: {}", - chat_completion_request.model + chat_completions_request_for_arch_router.model ); - chat_completion_request.model.clone() + chat_completions_request_for_arch_router.model.clone() } }, Err(err) => { @@ -151,8 +148,8 @@ pub async fn chat_completions( }; debug!( - "sending request to llm provider: {}, with model hint: {}", - llm_provider_endpoint, model_name + "[BRIGHTSTAFF -> ARCH_ROUTER] URL: {}, Model Hint: {}", + full_qualified_llm_provider_url, model_name ); request_headers.insert( @@ -166,17 +163,13 @@ pub async fn chat_completions( header::HeaderValue::from_str(&trace_parent).unwrap(), ); } - - let chat_request_parsed_bytes = - serde_json::to_string(&chat_request_user_preferences_removed).unwrap(); - // remove content-length header if it exists request_headers.remove(header::CONTENT_LENGTH); let llm_response = match reqwest::Client::new() - .post(llm_provider_endpoint) + .post(full_qualified_llm_provider_url) .headers(request_headers) - .body(chat_request_parsed_bytes) + .body(client_request_bytes_for_upstream) .send() .await { diff --git a/crates/brightstaff/src/main.rs b/crates/brightstaff/src/main.rs index 34fa3aa3..d3843125 100644 --- a/crates/brightstaff/src/main.rs +++ b/crates/brightstaff/src/main.rs @@ -1,9 +1,10 @@ -use brightstaff::handlers::chat_completions::chat_completions; +use brightstaff::handlers::chat_completions::chat; use brightstaff::handlers::models::list_models; use brightstaff::router::llm_router::RouterService; use brightstaff::utils::tracing::init_tracer; use bytes::Bytes; use common::configuration::Configuration; +use common::consts::{CHAT_COMPLETIONS_PATH, MESSAGES_PATH}; use http_body_util::{combinators::BoxBody, BodyExt, Empty}; use hyper::body::Incoming; use hyper::server::conn::http1; @@ -67,10 +68,10 @@ async fn main() -> Result<(), Box> { &serde_json::to_string(arch_config.as_ref()).unwrap() ); - let llm_provider_endpoint = env::var("LLM_PROVIDER_ENDPOINT") - .unwrap_or_else(|_| "http://localhost:12001/v1/chat/completions".to_string()); + let llm_provider_url = env::var("LLM_PROVIDER_ENDPOINT") + .unwrap_or_else(|_| "http://localhost:12001".to_string()); - info!("llm provider endpoint: {}", llm_provider_endpoint); + info!("llm provider url: {}", llm_provider_url); info!("listening on http://{}", bind_address); let listener = TcpListener::bind(bind_address).await?; @@ -88,7 +89,7 @@ async fn main() -> Result<(), Box> { let router_service: Arc = Arc::new(RouterService::new( arch_config.llm_providers.clone(), - llm_provider_endpoint.clone(), + llm_provider_url.clone() + CHAT_COMPLETIONS_PATH, routing_model_name, routing_llm_provider, )); @@ -99,19 +100,21 @@ async fn main() -> Result<(), Box> { let io = TokioIo::new(stream); let router_service: Arc = Arc::clone(&router_service); - let llm_provider_endpoint = llm_provider_endpoint.clone(); + let llm_provider_url = llm_provider_url.clone(); let llm_providers = llm_providers.clone(); let service = service_fn(move |req| { + let router_service = Arc::clone(&router_service); let parent_cx = extract_context_from_request(&req); - let llm_provider_endpoint = llm_provider_endpoint.clone(); + let llm_provider_url = llm_provider_url.clone(); let llm_providers = llm_providers.clone(); async move { match (req.method(), req.uri().path()) { - (&Method::POST, "/v1/chat/completions") => { - chat_completions(req, router_service, llm_provider_endpoint) + (&Method::POST, CHAT_COMPLETIONS_PATH | MESSAGES_PATH) => { + let fully_qualified_url = format!("{}{}", llm_provider_url, req.uri().path()); + chat(req, router_service, fully_qualified_url) .with_context(parent_cx) .await } diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs index 20d2623b..93f4fd38 100644 --- a/crates/common/src/configuration.rs +++ b/crates/common/src/configuration.rs @@ -149,8 +149,8 @@ pub struct EmbeddingProviver { pub enum LlmProviderType { #[serde(rename = "arch")] Arch, - #[serde(rename = "claude")] - Claude, + #[serde(rename = "anthropic")] + Anthropic, #[serde(rename = "deepseek")] Deepseek, #[serde(rename = "groq")] @@ -167,7 +167,7 @@ impl Display for LlmProviderType { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { LlmProviderType::Arch => write!(f, "arch"), - LlmProviderType::Claude => write!(f, "claude"), + LlmProviderType::Anthropic => write!(f, "anthropic"), LlmProviderType::Deepseek => write!(f, "deepseek"), LlmProviderType::Groq => write!(f, "groq"), LlmProviderType::Gemini => write!(f, "gemini"), diff --git a/crates/common/src/consts.rs b/crates/common/src/consts.rs index 3ff2ce5e..0eb5a036 100644 --- a/crates/common/src/consts.rs +++ b/crates/common/src/consts.rs @@ -12,6 +12,7 @@ pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider"; pub const MESSAGES_KEY: &str = "messages"; pub const ARCH_PROVIDER_HINT_HEADER: &str = "x-arch-llm-provider-hint"; pub const CHAT_COMPLETIONS_PATH: &str = "/v1/chat/completions"; +pub const MESSAGES_PATH: &str = "/v1/messages"; pub const HEALTHZ_PATH: &str = "/healthz"; pub const X_ARCH_STATE_HEADER: &str = "x-arch-state"; pub const X_ARCH_API_RESPONSE: &str = "x-arch-api-response-message"; diff --git a/crates/common/src/routing.rs b/crates/common/src/routing.rs index f4baf896..2e9bac09 100644 --- a/crates/common/src/routing.rs +++ b/crates/common/src/routing.rs @@ -33,6 +33,7 @@ pub fn get_llm_provider( return provider; } + if llm_providers.default().is_some() { return llm_providers.default().unwrap(); } diff --git a/crates/common/src/tokenizer.rs b/crates/common/src/tokenizer.rs index ded885d5..9b11f639 100644 --- a/crates/common/src/tokenizer.rs +++ b/crates/common/src/tokenizer.rs @@ -2,7 +2,7 @@ use log::debug; #[allow(dead_code)] pub fn token_count(model_name: &str, text: &str) -> Result { - debug!("getting token count model={}", model_name); + debug!("TOKENIZER: computing token count for model={}", model_name); //HACK: add support for tokenizing mistral and other models //filed issue https://github.com/katanemo/arch/issues/222 diff --git a/crates/hermesllm/src/apis/anthropic.rs b/crates/hermesllm/src/apis/anthropic.rs index 0ffe4e8d..ae61e2fe 100644 --- a/crates/hermesllm/src/apis/anthropic.rs +++ b/crates/hermesllm/src/apis/anthropic.rs @@ -1,9 +1,14 @@ +use crate::providers::response::TokenUsage; use serde::{Deserialize, Serialize}; use serde_json::Value; use serde_with::skip_serializing_none; use std::collections::HashMap; use super::ApiDefinition; +use crate::providers::request::{ProviderRequest, ProviderRequestError}; +use crate::providers::response::{ProviderResponse, ProviderStreamResponse}; +use crate::clients::transformer::ExtractText; +use crate::{MESSAGES_PATH}; // Enum for all supported Anthropic APIs #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] @@ -17,13 +22,13 @@ pub enum AnthropicApi { impl ApiDefinition for AnthropicApi { fn endpoint(&self) -> &'static str { match self { - AnthropicApi::Messages => "/v1/messages", + AnthropicApi::Messages => MESSAGES_PATH, } } fn from_endpoint(endpoint: &str) -> Option { match endpoint { - "/v1/messages" => Some(AnthropicApi::Messages), + MESSAGES_PATH => Some(AnthropicApi::Messages), _ => None, } } @@ -186,6 +191,19 @@ pub enum MessagesContentBlock { }, } +impl ExtractText for Vec { + fn extract_text(&self) -> String { + self.iter() + .filter_map(|block| match block { + MessagesContentBlock::Text { text } => Some(text.as_str()), + _ => None, + }) + .collect::>() + .join("\n") + } +} + + #[derive(Serialize, Deserialize, Debug, Clone)] #[serde(rename_all = "snake_case")] pub enum MessagesImageSource { @@ -220,6 +238,15 @@ pub enum MessagesMessageContent { Blocks(Vec), } +impl ExtractText for MessagesMessageContent { + fn extract_text(&self) -> String { + match self { + MessagesMessageContent::Single(text) => text.clone(), + MessagesMessageContent::Blocks(parts) => parts.extract_text() + } + } +} + #[derive(Serialize, Deserialize, Debug, Clone)] #[serde(untagged)] pub enum MessagesSystemPrompt { @@ -369,6 +396,121 @@ impl MessagesRequest { } } +impl TryFrom<&[u8]> for MessagesRequest { + type Error = serde_json::Error; + + fn try_from(bytes: &[u8]) -> Result { + serde_json::from_slice(bytes) + } +} + +impl TokenUsage for MessagesResponse { + fn completion_tokens(&self) -> usize { + self.usage.output_tokens as usize + } + fn prompt_tokens(&self) -> usize { + self.usage.input_tokens as usize + } + fn total_tokens(&self) -> usize { + (self.usage.input_tokens + self.usage.output_tokens) as usize + } +} + +impl ProviderResponse for MessagesResponse { + fn usage(&self) -> Option<&dyn TokenUsage> { + Some(self) + } + fn extract_usage_counts(&self) -> Option<(usize, usize, usize)> { + Some((self.usage.input_tokens as usize, self.usage.output_tokens as usize, (self.usage.input_tokens + self.usage.output_tokens) as usize)) + } +} + +impl ProviderRequest for MessagesRequest { + fn model(&self) -> &str { + &self.model + } + + fn set_model(&mut self, model: String) { + self.model = model; + } + + fn is_streaming(&self) -> bool { + self.stream.unwrap_or(false) + } + + fn extract_messages_text(&self) -> String { + let mut text_parts = Vec::new(); + + // Include system prompt if present + if let Some(system) = &self.system { + match system { + MessagesSystemPrompt::Single(s) => text_parts.push(s.clone()), + MessagesSystemPrompt::Blocks(blocks) => { + for block in blocks { + if let MessagesContentBlock::Text { text } = block { + text_parts.push(text.clone()); + } + } + } + } + } + + // Extract text from all messages + for message in &self.messages { + match &message.content { + MessagesMessageContent::Single(text) => text_parts.push(text.clone()), + MessagesMessageContent::Blocks(blocks) => { + for block in blocks { + if let MessagesContentBlock::Text { text } = block { + text_parts.push(text.clone()); + } + } + } + } + } + + text_parts.join(" ") + } + + fn get_recent_user_message(&self) -> Option { + // Find the most recent user message + for message in self.messages.iter().rev() { + if message.role == MessagesRole::User { + match &message.content { + MessagesMessageContent::Single(text) => return Some(text.clone()), + MessagesMessageContent::Blocks(blocks) => { + for block in blocks { + if let MessagesContentBlock::Text { text } = block { + return Some(text.clone()); + } + } + } + } + } + } + None + } + + fn to_bytes(&self) -> Result, ProviderRequestError> { + serde_json::to_vec(self).map_err(|e| ProviderRequestError { + message: format!("Failed to serialize MessagesRequest: {}", e), + source: Some(Box::new(e)), + }) + } + + fn metadata(&self) -> &Option> { + return &self.metadata; + } + + fn remove_metadata_key(&mut self, key: &str) -> bool { + if let Some(ref mut metadata) = self.metadata { + metadata.remove(key).is_some() + } else { + false + } + } +} + impl MessagesResponse { pub fn api_type() -> AnthropicApi { AnthropicApi::Messages @@ -381,6 +523,54 @@ impl MessagesStreamEvent { } } +impl MessagesRole { + pub fn as_str(&self) -> &'static str { + match self { + MessagesRole::User => "user", + MessagesRole::Assistant => "assistant", + } + } +} + +// Implement ProviderStreamResponse for MessagesStreamEvent +impl ProviderStreamResponse for MessagesStreamEvent { + fn content_delta(&self) -> Option<&str> { + match self { + MessagesStreamEvent::ContentBlockDelta { delta, .. } => { + if let MessagesContentDelta::TextDelta { text } = delta { + Some(text) + } else { + None + } + } + _ => None, + } + } + + fn is_final(&self) -> bool { + matches!(self, MessagesStreamEvent::MessageStop) + } + + fn role(&self) -> Option<&str> { + match self { + MessagesStreamEvent::MessageStart { message } => Some(message.role.as_str()), + _ => None, + } + } + + fn event_type(&self) -> Option<&str> { + Some(match self { + MessagesStreamEvent::MessageStart { .. } => "message_start", + MessagesStreamEvent::ContentBlockStart { .. } => "content_block_start", + MessagesStreamEvent::ContentBlockDelta { .. } => "content_block_delta", + MessagesStreamEvent::ContentBlockStop { .. } => "content_block_stop", + MessagesStreamEvent::MessageDelta { .. } => "message_delta", + MessagesStreamEvent::MessageStop => "message_stop", + MessagesStreamEvent::Ping => "ping", + }) + } +} + #[cfg(test)] mod tests { use super::*; @@ -878,13 +1068,13 @@ mod tests { let api = AnthropicApi::Messages; // Test trait methods - assert_eq!(api.endpoint(), "/v1/messages"); + assert_eq!(api.endpoint(), MESSAGES_PATH); assert!(api.supports_streaming()); assert!(api.supports_tools()); assert!(api.supports_vision()); // Test from_endpoint trait method - let found_api = AnthropicApi::from_endpoint("/v1/messages"); + let found_api = AnthropicApi::from_endpoint(MESSAGES_PATH); assert_eq!(found_api, Some(AnthropicApi::Messages)); let not_found = AnthropicApi::from_endpoint("/v1/unknown"); diff --git a/crates/hermesllm/src/apis/mod.rs b/crates/hermesllm/src/apis/mod.rs index 78b634d5..b175988c 100644 --- a/crates/hermesllm/src/apis/mod.rs +++ b/crates/hermesllm/src/apis/mod.rs @@ -1,110 +1,9 @@ pub mod anthropic; pub mod openai; - -// Re-export all types for convenience pub use anthropic::*; pub use openai::*; -/// Common trait that all API definitions must implement -/// -/// This trait ensures consistency across different AI provider API definitions -/// and makes it easy to add new providers like Gemini, Claude, etc. -/// -/// Note: This is different from the `ApiProvider` enum in `clients::endpoints` -/// which represents provider identification, while this trait defines API capabilities. -/// -/// # Benefits -/// -/// - **Consistency**: All API providers implement the same interface -/// - **Extensibility**: Easy to add new providers without breaking existing code -/// - **Type Safety**: Compile-time guarantees that all providers implement required methods -/// - **Discoverability**: Clear documentation of what capabilities each API supports -/// -/// # Example implementation for a new provider: -/// -/// ```rust,ignore -/// use serde::{Deserialize, Serialize}; -/// use super::ApiDefinition; -/// -/// #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -/// pub enum GeminiApi { -/// GenerateContent, -/// ChatCompletions, -/// } -/// -/// impl GeminiApi { -/// pub fn endpoint(&self) -> &'static str { -/// match self { -/// GeminiApi::GenerateContent => "/v1/models/gemini-pro:generateContent", -/// GeminiApi::ChatCompletions => "/v1/models/gemini-pro:chat", -/// } -/// } -/// -/// pub fn from_endpoint(endpoint: &str) -> Option { -/// match endpoint { -/// "/v1/models/gemini-pro:generateContent" => Some(GeminiApi::GenerateContent), -/// "/v1/models/gemini-pro:chat" => Some(GeminiApi::ChatCompletions), -/// _ => None, -/// } -/// } -/// -/// pub fn supports_streaming(&self) -> bool { -/// match self { -/// GeminiApi::GenerateContent => true, -/// GeminiApi::ChatCompletions => true, -/// } -/// } -/// -/// pub fn supports_tools(&self) -> bool { -/// match self { -/// GeminiApi::GenerateContent => true, -/// GeminiApi::ChatCompletions => false, -/// } -/// } -/// -/// pub fn supports_vision(&self) -> bool { -/// match self { -/// GeminiApi::GenerateContent => true, -/// GeminiApi::ChatCompletions => false, -/// } -/// } -/// } -/// -/// impl ApiDefinition for GeminiApi { -/// fn endpoint(&self) -> &'static str { -/// self.endpoint() -/// } -/// -/// fn from_endpoint(endpoint: &str) -> Option { -/// Self::from_endpoint(endpoint) -/// } -/// -/// fn supports_streaming(&self) -> bool { -/// self.supports_streaming() -/// } -/// -/// fn supports_tools(&self) -> bool { -/// self.supports_tools() -/// } -/// -/// fn supports_vision(&self) -> bool { -/// self.supports_vision() -/// } -/// } -/// -/// // Now you can use generic code that works with any API: -/// fn print_api_info(api: &T) { -/// println!("Endpoint: {}", api.endpoint()); -/// println!("Supports streaming: {}", api.supports_streaming()); -/// println!("Supports tools: {}", api.supports_tools()); -/// println!("Supports vision: {}", api.supports_vision()); -/// } -/// -/// // Works with both OpenAI and Anthropic (and future Gemini) -/// print_api_info(&OpenAIApi::ChatCompletions); -/// print_api_info(&AnthropicApi::Messages); -/// print_api_info(&GeminiApi::GenerateContent); -/// ``` + pub trait ApiDefinition { /// Returns the endpoint path for this API fn endpoint(&self) -> &'static str; @@ -132,6 +31,7 @@ pub trait ApiDefinition { #[cfg(test)] mod tests { use super::*; + use crate::{CHAT_COMPLETIONS_PATH, MESSAGES_PATH}; #[test] fn test_generic_api_functionality() { @@ -150,8 +50,8 @@ mod tests { fn test_api_detection_from_endpoints() { // Test that we can detect APIs from endpoints using the trait let endpoints = vec![ - "/v1/chat/completions", - "/v1/messages", + CHAT_COMPLETIONS_PATH, + MESSAGES_PATH, "/v1/unknown" ]; diff --git a/crates/hermesllm/src/apis/openai.rs b/crates/hermesllm/src/apis/openai.rs index 2471fc35..7e89acd2 100644 --- a/crates/hermesllm/src/apis/openai.rs +++ b/crates/hermesllm/src/apis/openai.rs @@ -5,11 +5,11 @@ use std::collections::HashMap; use std::fmt::Display; use thiserror::Error; - - use crate::providers::request::{ProviderRequest, ProviderRequestError}; -use crate::providers::response::{ProviderResponse, ProviderStreamResponse, TokenUsage, SseStreamIter}; +use crate::providers::response::{ProviderResponse, ProviderStreamResponse, TokenUsage}; use super::ApiDefinition; +use crate::clients::transformer::{ExtractText}; +use crate::{CHAT_COMPLETIONS_PATH}; // ============================================================================ // OPENAI API ENUMERATION @@ -28,13 +28,13 @@ pub enum OpenAIApi { impl ApiDefinition for OpenAIApi { fn endpoint(&self) -> &'static str { match self { - OpenAIApi::ChatCompletions => "/v1/chat/completions", + OpenAIApi::ChatCompletions => CHAT_COMPLETIONS_PATH, } } fn from_endpoint(endpoint: &str) -> Option { match endpoint { - "/v1/chat/completions" => Some(OpenAIApi::ChatCompletions), + CHAT_COMPLETIONS_PATH => Some(OpenAIApi::ChatCompletions), _ => None, } } @@ -81,7 +81,7 @@ pub struct ChatCompletionsRequest { // Maximum tokens in the response has been deprecated, but we keep it for compatibility pub max_tokens: Option, pub modalities: Option>, - pub metadata: Option>, + pub metadata: Option>, pub n: Option, pub presence_penalty: Option, pub parallel_tool_calls: Option, @@ -174,6 +174,28 @@ pub enum MessageContent { Parts(Vec), } +// Content Extraction +impl ExtractText for MessageContent { + fn extract_text(&self) -> String { + match self { + MessageContent::Text(text) => text.clone(), + MessageContent::Parts(parts) => parts.extract_text() + } + } +} + +impl ExtractText for Vec { + fn extract_text(&self) -> String { + self.iter() + .filter_map(|part| match part { + ContentPart::Text { text } => Some(text.as_str()), + _ => None, + }) + .collect::>() + .join("\n") + } +} + impl Display for MessageContent { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { @@ -328,6 +350,7 @@ pub struct ChatCompletionsResponse { pub choices: Vec, pub usage: Usage, pub system_fingerprint: Option, + pub service_tier: Option, } /// Finish reason for completion @@ -576,6 +599,18 @@ impl ProviderRequest for ChatCompletionsRequest { source: Some(Box::new(e)), }) } + + fn metadata(&self) -> &Option> { + return &self.metadata; + } + + fn remove_metadata_key(&mut self, key: &str) -> bool { + if let Some(ref mut metadata) = self.metadata { + metadata.remove(key).is_some() + } else { + false + } + } } /// Implementation of ProviderResponse for ChatCompletionsResponse @@ -593,68 +628,6 @@ impl ProviderResponse for ChatCompletionsResponse { } } -// ============================================================================ -// OPENAI SSE STREAMING ITERATOR -// ============================================================================ - -/// OpenAI-specific SSE streaming iterator -/// Handles OpenAI's specific SSE format and ChatCompletionsStreamResponse parsing -pub struct OpenAISseIter -where - I: Iterator, - I::Item: AsRef, -{ - sse_stream: SseStreamIter, -} - -impl OpenAISseIter -where - I: Iterator, - I::Item: AsRef, -{ - pub fn new(sse_stream: SseStreamIter) -> Self { - Self { sse_stream } - } -} - -impl Iterator for OpenAISseIter -where - I: Iterator, - I::Item: AsRef, -{ - type Item = Result, Box>; - - fn next(&mut self) -> Option { - for line in &mut self.sse_stream.lines { - let line = line.as_ref(); - if line.is_empty() { - continue; - } - - if line.starts_with("data: ") { - let data = &line[6..]; // Remove "data: " prefix - if data == "[DONE]" { - return None; - } - - // Skip ping messages (usually from other providers, but handle gracefully) - if data == r#"{"type": "ping"}"# { - continue; - } - - // OpenAI-specific parsing of ChatCompletionsStreamResponse - match serde_json::from_str::(data) { - Ok(response) => return Some(Ok(Box::new(response))), - Err(e) => return Some(Err(Box::new( - OpenAIStreamError::InvalidStreamingData(format!("Error parsing OpenAI streaming data: {}, data: {}", e, data)) - ))), - } - } - } - None - } -} - // Direct implementation of ProviderStreamResponse trait on ChatCompletionsStreamResponse impl ProviderStreamResponse for ChatCompletionsStreamResponse { fn content_delta(&self) -> Option<&str> { @@ -680,6 +653,10 @@ impl ProviderStreamResponse for ChatCompletionsStreamResponse { Role::Tool => "tool", })) } + + fn event_type(&self) -> Option<&str> { + None // OpenAI doesn't use event types in SSE + } } @@ -982,13 +959,13 @@ mod tests { let api = OpenAIApi::ChatCompletions; // Test trait methods - assert_eq!(api.endpoint(), "/v1/chat/completions"); + assert_eq!(api.endpoint(), CHAT_COMPLETIONS_PATH); assert!(api.supports_streaming()); assert!(api.supports_tools()); assert!(api.supports_vision()); // Test from_endpoint - let found_api = OpenAIApi::from_endpoint("/v1/chat/completions"); + let found_api = OpenAIApi::from_endpoint(CHAT_COMPLETIONS_PATH); assert_eq!(found_api, Some(OpenAIApi::ChatCompletions)); let not_found = OpenAIApi::from_endpoint("/v1/unknown"); @@ -1139,4 +1116,84 @@ mod tests { let invalid_result: Result = serde_json::from_value(json!("invalid")); assert!(invalid_result.is_err()); } + + #[test] + fn test_chat_completions_response_with_service_tier() { + // Test that ChatCompletionsResponse can deserialize OpenAI responses with service_tier field + let json_response = r#"{ + "id": "chatcmpl-CAJc2Df6QCc7Mv3RP0Cf2xlbDV1x2", + "object": "chat.completion", + "created": 1756574706, + "model": "gpt-4o-2024-08-06", + "choices": [{ + "index": 0, + "message": { + "role": "assistant", + "content": "Test response content", + "annotations": [] + }, + "finish_reason": "stop" + }], + "usage": { + "prompt_tokens": 65, + "completion_tokens": 184, + "total_tokens": 249, + "prompt_tokens_details": { + "cached_tokens": 0, + "audio_tokens": 0 + }, + "completion_tokens_details": { + "reasoning_tokens": 0, + "audio_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0 + } + }, + "service_tier": "default", + "system_fingerprint": "fp_f33640a400" + }"#; + + let response: ChatCompletionsResponse = serde_json::from_str(json_response).unwrap(); + + assert_eq!(response.id, "chatcmpl-CAJc2Df6QCc7Mv3RP0Cf2xlbDV1x2"); + assert_eq!(response.object, "chat.completion"); + assert_eq!(response.created, 1756574706); + assert_eq!(response.model, "gpt-4o-2024-08-06"); + assert_eq!(response.service_tier, Some("default".to_string())); + assert_eq!(response.system_fingerprint, Some("fp_f33640a400".to_string())); + assert_eq!(response.choices.len(), 1); + assert_eq!(response.usage.prompt_tokens, 65); + assert_eq!(response.usage.completion_tokens, 184); + assert_eq!(response.usage.total_tokens, 249); + } + + #[test] + fn test_chat_completions_response_without_service_tier() { + // Test that ChatCompletionsResponse can deserialize responses without service_tier (backward compatibility) + let json_response = r#"{ + "id": "chatcmpl-123", + "object": "chat.completion", + "created": 1234567890, + "model": "gpt-4", + "choices": [{ + "index": 0, + "message": { + "role": "assistant", + "content": "Test response" + }, + "finish_reason": "stop" + }], + "usage": { + "prompt_tokens": 10, + "completion_tokens": 20, + "total_tokens": 30 + } + }"#; + + let response: ChatCompletionsResponse = serde_json::from_str(json_response).unwrap(); + + assert_eq!(response.id, "chatcmpl-123"); + assert_eq!(response.service_tier, None); // Should be None when not present + assert_eq!(response.system_fingerprint, None); + } } diff --git a/crates/hermesllm/src/clients/endpoints.rs b/crates/hermesllm/src/clients/endpoints.rs index bf0648a9..5af51fe0 100644 --- a/crates/hermesllm/src/clients/endpoints.rs +++ b/crates/hermesllm/src/clients/endpoints.rs @@ -6,12 +6,13 @@ //! # Examples //! //! ```rust -//! use hermesllm::clients::endpoints::{is_supported_endpoint, supported_endpoints}; +//! use hermesllm::clients::endpoints::supported_endpoints; //! //! // Check if we support an endpoint -//! assert!(is_supported_endpoint("/v1/chat/completions")); -//! assert!(is_supported_endpoint("/v1/messages")); -//! assert!(!is_supported_endpoint("/v1/unknown")); +//! use hermesllm::clients::endpoints::SupportedAPIs; +//! assert!(SupportedAPIs::from_endpoint("/v1/chat/completions").is_some()); +//! assert!(SupportedAPIs::from_endpoint("/v1/messages").is_some()); +//! assert!(!SupportedAPIs::from_endpoint("/v1/unknown").is_some()); //! //! // Get all supported endpoints //! let endpoints = supported_endpoints(); @@ -20,23 +21,81 @@ //! assert!(endpoints.contains(&"/v1/messages")); //! ``` -use crate::apis::{AnthropicApi, OpenAIApi, ApiDefinition}; +use crate::{apis::{AnthropicApi, ApiDefinition, OpenAIApi}, ProviderId}; +use std::fmt; -/// Check if the given endpoint path is supported -pub fn is_supported_endpoint(endpoint: &str) -> bool { - // Try OpenAI APIs - if OpenAIApi::from_endpoint(endpoint).is_some() { - return true; - } - - // Try Anthropic APIs - if AnthropicApi::from_endpoint(endpoint).is_some() { - return true; - } - - false +/// Unified enum representing all supported API endpoints across providers +#[derive(Debug, Clone, PartialEq)] +pub enum SupportedAPIs { + OpenAIChatCompletions(OpenAIApi), + AnthropicMessagesAPI(AnthropicApi), } +impl fmt::Display for SupportedAPIs { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + SupportedAPIs::OpenAIChatCompletions(api) => write!(f, "OpenAI API ({})", api.endpoint()), + SupportedAPIs::AnthropicMessagesAPI(api) => write!(f, "Anthropic API ({})", api.endpoint()), + } + } +} + +impl SupportedAPIs { + /// Create a SupportedApi from an endpoint path + pub fn from_endpoint(endpoint: &str) -> Option { + if let Some(openai_api) = OpenAIApi::from_endpoint(endpoint) { + return Some(SupportedAPIs::OpenAIChatCompletions(openai_api)); + } + + if let Some(anthropic_api) = AnthropicApi::from_endpoint(endpoint) { + return Some(SupportedAPIs::AnthropicMessagesAPI(anthropic_api)); + } + + None + } + + /// Get the endpoint path for this API + pub fn endpoint(&self) -> &'static str { + match self { + SupportedAPIs::OpenAIChatCompletions(api) => api.endpoint(), + SupportedAPIs::AnthropicMessagesAPI(api) => api.endpoint(), + } + } + + pub fn target_endpoint_for_provider(&self, provider_id: &ProviderId, request_path: &str) -> String { + let default_endpoint = "/v1/chat/completions".to_string(); + match self { + SupportedAPIs::AnthropicMessagesAPI(AnthropicApi::Messages) => { + match provider_id { + ProviderId::Anthropic => "/v1/messages".to_string(), + _ => default_endpoint, + } + } + _ => { + match provider_id { + ProviderId::Groq => { + if request_path.starts_with("/v1/") { + format!("/openai{}", request_path) + } else { + default_endpoint + } + } + ProviderId::Gemini => { + if request_path.starts_with("/v1/") { + "/v1beta/openai/chat/completions".to_string() + } else { + default_endpoint + } + } + _ => default_endpoint, + } + } + } + } +} + + + /// Get all supported endpoint paths pub fn supported_endpoints() -> Vec<&'static str> { let mut endpoints = Vec::new(); @@ -74,15 +133,15 @@ mod tests { #[test] fn test_is_supported_endpoint() { // OpenAI endpoints - assert!(is_supported_endpoint("/v1/chat/completions")); + assert!(SupportedAPIs::from_endpoint("/v1/chat/completions").is_some()); // Anthropic endpoints - assert!(is_supported_endpoint("/v1/messages")); + assert!(SupportedAPIs::from_endpoint("/v1/messages").is_some()); // Unsupported endpoints - assert!(!is_supported_endpoint("/v1/unknown")); - assert!(!is_supported_endpoint("/v2/chat")); - assert!(!is_supported_endpoint("")); + assert!(!SupportedAPIs::from_endpoint("/v1/unknown").is_some()); + assert!(!SupportedAPIs::from_endpoint("/v2/chat").is_some()); + assert!(!SupportedAPIs::from_endpoint("").is_some()); } #[test] diff --git a/crates/hermesllm/src/clients/mod.rs b/crates/hermesllm/src/clients/mod.rs index eb3032ce..73972445 100644 --- a/crates/hermesllm/src/clients/mod.rs +++ b/crates/hermesllm/src/clients/mod.rs @@ -4,6 +4,6 @@ pub mod endpoints; // Re-export the main items for easier access pub use lib::*; -pub use endpoints::{is_supported_endpoint, supported_endpoints, identify_provider}; +pub use endpoints::{SupportedAPIs, identify_provider}; // Note: transformer module contains TryFrom trait implementations that are automatically available diff --git a/crates/hermesllm/src/clients/transformer.rs b/crates/hermesllm/src/clients/transformer.rs index 23ca26ee..8170a53d 100644 --- a/crates/hermesllm/src/clients/transformer.rs +++ b/crates/hermesllm/src/clients/transformer.rs @@ -44,8 +44,6 @@ use serde_json::Value; use std::time::{SystemTime, UNIX_EPOCH}; - -// Import centralized types use crate::apis::*; use super::TransformError; @@ -61,7 +59,7 @@ const DEFAULT_MAX_TOKENS: u32 = 4096; // ============================================================================ /// Trait for extracting text content from various types -trait ExtractText { +pub trait ExtractText { fn extract_text(&self) -> String; } @@ -213,6 +211,7 @@ impl TryFrom for ChatCompletionsResponse { choices: vec![choice], usage, system_fingerprint: None, + service_tier: None, }) } } @@ -541,40 +540,6 @@ impl Into for MessagesRole { } } -// Content Extraction -impl ExtractText for MessageContent { - fn extract_text(&self) -> String { - match self { - MessageContent::Text(text) => text.clone(), - MessageContent::Parts(parts) => parts.extract_text() - } - } -} - -impl ExtractText for Vec { - fn extract_text(&self) -> String { - self.iter() - .filter_map(|part| match part { - ContentPart::Text { text } => Some(text.as_str()), - _ => None, - }) - .collect::>() - .join("\n") - } -} - -impl ExtractText for Vec { - fn extract_text(&self) -> String { - self.iter() - .filter_map(|block| match block { - MessagesContentBlock::Text { text } => Some(text.as_str()), - _ => None, - }) - .collect::>() - .join("\n") - } -} - // Content Utilities impl ContentUtils for Vec { fn extract_tool_calls(&self) -> Result>, TransformError> { diff --git a/crates/hermesllm/src/lib.rs b/crates/hermesllm/src/lib.rs index b4ad9932..a9e8c48e 100644 --- a/crates/hermesllm/src/lib.rs +++ b/crates/hermesllm/src/lib.rs @@ -4,12 +4,16 @@ pub mod providers; pub mod apis; pub mod clients; - // Re-export important types and traits pub use providers::request::{ProviderRequestType, ProviderRequest, ProviderRequestError}; -pub use providers::response::{ProviderResponseType, ProviderResponse, ProviderStreamResponse, ProviderStreamResponseIter, ProviderResponseError, TokenUsage}; +pub use providers::response::{ProviderResponseType, ProviderStreamResponseType, ProviderResponse, ProviderStreamResponse, ProviderResponseError, TokenUsage, SseEvent, SseStreamIter}; pub use providers::id::ProviderId; -pub use providers::adapters::{has_compatible_api, supported_apis}; + + +//TODO: Refactor such that commons doesn't depend on Hermes. For now this will clean up strings +pub const CHAT_COMPLETIONS_PATH: &str = "/v1/chat/completions"; +pub const MESSAGES_PATH: &str = "/v1/messages"; + #[cfg(test)] mod tests { @@ -23,72 +27,51 @@ mod tests { assert_eq!(ProviderId::from("arch"), ProviderId::Arch); } - #[test] - fn test_provider_api_compatibility() { - assert!(has_compatible_api(&ProviderId::OpenAI, "/v1/chat/completions")); - assert!(!has_compatible_api(&ProviderId::OpenAI, "/v1/embeddings")); - } - - #[test] - fn test_provider_supported_apis() { - let apis = supported_apis(&ProviderId::OpenAI); - assert!(apis.contains(&"/v1/chat/completions")); - - // Test that provider supports the expected API endpoints - assert!(has_compatible_api(&ProviderId::OpenAI, "/v1/chat/completions")); - } - - #[test] - fn test_provider_request_parsing() { - // Test with a sample JSON request - let json_request = r#"{ - "model": "gpt-4", - "messages": [ - { - "role": "system", - "content": "You are a helpful assistant" - }, - { - "role": "user", - "content": "Hello!" - } - ] - }"#; - - let result: Result = ProviderRequestType::try_from(json_request.as_bytes()); - assert!(result.is_ok()); - - let request = result.unwrap(); - assert_eq!(request.model(), "gpt-4"); - assert_eq!(request.get_recent_user_message(), Some("Hello!".to_string())); - } - #[test] fn test_provider_streaming_response() { // Test streaming response parsing with sample SSE data - let sse_data = r#"data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"gpt-4","choices":[{"index":0,"delta":{"role":"assistant","content":"Hello"},"finish_reason":null}]} + let sse_data = r#"data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"gpt-4","choices":[{"index":0,"delta":{"role":"assistant","content":"Hello"},"finish_reason":null}]} -data: [DONE] -"#; + data: [DONE] + "#; - let result = ProviderStreamResponseIter::try_from((sse_data.as_bytes(), &ProviderId::OpenAI)); - assert!(result.is_ok()); + use crate::clients::endpoints::SupportedAPIs; + let client_api = SupportedAPIs::OpenAIChatCompletions(crate::apis::OpenAIApi::ChatCompletions); + let upstream_api = SupportedAPIs::OpenAIChatCompletions(crate::apis::OpenAIApi::ChatCompletions); - let mut streaming_response = result.unwrap(); + // Test the new simplified architecture - create SseStreamIter directly + let sse_iter = SseStreamIter::try_from(sse_data.as_bytes()); + assert!(sse_iter.is_ok()); - // Test that we can iterate over chunks - it's just an iterator now! - let first_chunk = streaming_response.next(); - assert!(first_chunk.is_some()); + let mut streaming_iter = sse_iter.unwrap(); - let chunk_result = first_chunk.unwrap(); - assert!(chunk_result.is_ok()); + // Test that we can iterate over SseEvents + let first_event = streaming_iter.next(); + assert!(first_event.is_some()); - let chunk = chunk_result.unwrap(); - assert_eq!(chunk.content_delta(), Some("Hello")); - assert!(!chunk.is_final()); + let sse_event = first_event.unwrap(); - // Test that stream ends properly - let final_chunk = streaming_response.next(); - assert!(final_chunk.is_none()); + // Test SseEvent properties + assert!(!sse_event.is_done()); + assert!(sse_event.data.as_ref().unwrap().contains("Hello")); + + // Test that we can parse the event into a provider stream response + let transformed_event = SseEvent::try_from((sse_event, &client_api, &upstream_api)); + if let Err(e) = &transformed_event { + println!("Transform error: {:?}", e); + } + assert!(transformed_event.is_ok()); + + let transformed_event = transformed_event.unwrap(); + let provider_response = transformed_event.provider_response(); + assert!(provider_response.is_ok()); + + let stream_response = provider_response.unwrap(); + assert_eq!(stream_response.content_delta(), Some("Hello")); + assert!(!stream_response.is_final()); + + // Test that stream ends properly with [DONE] (SseStreamIter should stop before [DONE]) + let final_event = streaming_iter.next(); + assert!(final_event.is_none()); // Should be None because iterator stops at [DONE] } } diff --git a/crates/hermesllm/src/providers/adapters.rs b/crates/hermesllm/src/providers/adapters.rs deleted file mode 100644 index a001cf09..00000000 --- a/crates/hermesllm/src/providers/adapters.rs +++ /dev/null @@ -1,39 +0,0 @@ -use crate::providers::id::ProviderId; - -#[derive(Debug, Clone)] -pub enum AdapterType { - OpenAICompatible, - // Future: Claude, Gemini, etc. -} - -/// Provider adapter configuration -#[derive(Debug, Clone)] -pub struct ProviderConfig { - pub supported_apis: &'static [&'static str], - pub adapter_type: AdapterType, -} - -/// Check if provider has compatible API -pub fn has_compatible_api(provider_id: &ProviderId, api_path: &str) -> bool { - let config = get_provider_config(provider_id); - config.supported_apis.iter().any(|&supported| supported == api_path) -} - -/// Get supported APIs for provider -pub fn supported_apis(provider_id: &ProviderId) -> Vec<&'static str> { - let config = get_provider_config(provider_id); - config.supported_apis.to_vec() -} - -/// Get provider configuration -pub fn get_provider_config(provider_id: &ProviderId) -> ProviderConfig { - match provider_id { - ProviderId::OpenAI | ProviderId::Groq | ProviderId::Mistral | ProviderId::Deepseek - | ProviderId::Arch | ProviderId::Gemini | ProviderId::Claude | ProviderId::GitHub => { - ProviderConfig { - supported_apis: &["/v1/chat/completions"], - adapter_type: AdapterType::OpenAICompatible, - } - } - } -} diff --git a/crates/hermesllm/src/providers/id.rs b/crates/hermesllm/src/providers/id.rs index 2c0c494e..26933adc 100644 --- a/crates/hermesllm/src/providers/id.rs +++ b/crates/hermesllm/src/providers/id.rs @@ -1,4 +1,6 @@ use std::fmt::Display; +use crate::clients::endpoints::SupportedAPIs; +use crate::apis::{OpenAIApi, AnthropicApi}; /// Provider identifier enum - simple enum for identifying providers #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] @@ -8,7 +10,7 @@ pub enum ProviderId { Deepseek, Groq, Gemini, - Claude, + Anthropic, GitHub, Arch, } @@ -21,7 +23,7 @@ impl From<&str> for ProviderId { "deepseek" => ProviderId::Deepseek, "groq" => ProviderId::Groq, "gemini" => ProviderId::Gemini, - "claude" => ProviderId::Claude, + "anthropic" => ProviderId::Anthropic, "github" => ProviderId::GitHub, "arch" => ProviderId::Arch, _ => panic!("Unknown provider: {}", value), @@ -29,6 +31,21 @@ impl From<&str> for ProviderId { } } +impl ProviderId { + /// Given a client API, return the compatible upstream API for this provider + pub fn compatible_api_for_client(&self, client_api: &SupportedAPIs) -> SupportedAPIs { + match (self, client_api) { + // Claude/Anthropic providers natively support Anthropic APIs + (ProviderId::Anthropic, SupportedAPIs::AnthropicMessagesAPI(_)) => SupportedAPIs::AnthropicMessagesAPI(AnthropicApi::Messages), + (ProviderId::Anthropic, SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions)) => SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions), + + // OpenAI-compatible providers only support OpenAI chat completions + (ProviderId::OpenAI | ProviderId::Groq | ProviderId::Mistral | ProviderId::Deepseek | ProviderId::Arch | ProviderId::Gemini | ProviderId::GitHub, SupportedAPIs::AnthropicMessagesAPI(_)) => SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions), + (ProviderId::OpenAI | ProviderId::Groq | ProviderId::Mistral | ProviderId::Deepseek | ProviderId::Arch | ProviderId::Gemini | ProviderId::GitHub, SupportedAPIs::OpenAIChatCompletions(_)) => SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions), + } + } +} + impl Display for ProviderId { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { @@ -37,7 +54,7 @@ impl Display for ProviderId { ProviderId::Deepseek => write!(f, "Deepseek"), ProviderId::Groq => write!(f, "Groq"), ProviderId::Gemini => write!(f, "Gemini"), - ProviderId::Claude => write!(f, "Claude"), + ProviderId::Anthropic => write!(f, "Anthropic"), ProviderId::GitHub => write!(f, "GitHub"), ProviderId::Arch => write!(f, "Arch"), } diff --git a/crates/hermesllm/src/providers/mod.rs b/crates/hermesllm/src/providers/mod.rs index 4abccc0c..601af955 100644 --- a/crates/hermesllm/src/providers/mod.rs +++ b/crates/hermesllm/src/providers/mod.rs @@ -6,9 +6,7 @@ pub mod id; pub mod request; pub mod response; -pub mod adapters; pub use id::ProviderId; pub use request::{ProviderRequestType, ProviderRequest, ProviderRequestError} ; -pub use response::{ProviderResponseType, ProviderStreamResponseIter, ProviderResponse, ProviderStreamResponse, TokenUsage }; -pub use adapters::*; +pub use response::{ProviderResponseType, ProviderResponse, ProviderStreamResponse, TokenUsage }; diff --git a/crates/hermesllm/src/providers/request.rs b/crates/hermesllm/src/providers/request.rs index 1eb39416..adde81f4 100644 --- a/crates/hermesllm/src/providers/request.rs +++ b/crates/hermesllm/src/providers/request.rs @@ -1,41 +1,17 @@ - use crate::apis::openai::ChatCompletionsRequest; -use super::{ProviderId, get_provider_config, AdapterType}; +use crate::apis::anthropic::MessagesRequest; +use crate::clients::endpoints::SupportedAPIs; + +use serde_json::Value; use std::error::Error; use std::fmt; +use std::collections::HashMap; +#[derive(Clone)] pub enum ProviderRequestType { ChatCompletionsRequest(ChatCompletionsRequest), - //MessagesRequest(MessagesRequest), + MessagesRequest(MessagesRequest), //add more request types here } - -impl TryFrom<&[u8]> for ProviderRequestType { - type Error = std::io::Error; - - // if passing bytes without provider id we assume the request is in OpenAI format - fn try_from(bytes: &[u8]) -> Result { - let chat_completion_request: ChatCompletionsRequest = ChatCompletionsRequest::try_from(bytes) - .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?; - Ok(ProviderRequestType::ChatCompletionsRequest(chat_completion_request)) - } -} - -impl TryFrom<(&[u8], &ProviderId)> for ProviderRequestType { - type Error = std::io::Error; - - fn try_from((bytes, provider_id): (&[u8], &ProviderId)) -> Result { - let config = get_provider_config(provider_id); - match config.adapter_type { - AdapterType::OpenAICompatible => { - let chat_completion_request: ChatCompletionsRequest = ChatCompletionsRequest::try_from(bytes) - .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?; - Ok(ProviderRequestType::ChatCompletionsRequest(chat_completion_request)) - } - // Future: handle other adapter types like Claude - } - } -} - pub trait ProviderRequest: Send + Sync { /// Extract the model name from the request fn model(&self) -> &str; @@ -54,46 +30,129 @@ pub trait ProviderRequest: Send + Sync { /// Convert the request to bytes for transmission fn to_bytes(&self) -> Result, ProviderRequestError>; + + fn metadata(&self) -> &Option>; + + /// Remove a metadata key from the request and return true if the key was present + fn remove_metadata_key(&mut self, key: &str) -> bool; } impl ProviderRequest for ProviderRequestType { fn model(&self) -> &str { match self { Self::ChatCompletionsRequest(r) => r.model(), + Self::MessagesRequest(r) => r.model(), } } fn set_model(&mut self, model: String) { match self { Self::ChatCompletionsRequest(r) => r.set_model(model), + Self::MessagesRequest(r) => r.set_model(model), } } fn is_streaming(&self) -> bool { match self { Self::ChatCompletionsRequest(r) => r.is_streaming(), + Self::MessagesRequest(r) => r.is_streaming(), } } fn extract_messages_text(&self) -> String { match self { Self::ChatCompletionsRequest(r) => r.extract_messages_text(), + Self::MessagesRequest(r) => r.extract_messages_text(), } } fn get_recent_user_message(&self) -> Option { match self { Self::ChatCompletionsRequest(r) => r.get_recent_user_message(), + Self::MessagesRequest(r) => r.get_recent_user_message(), } } fn to_bytes(&self) -> Result, ProviderRequestError> { match self { Self::ChatCompletionsRequest(r) => r.to_bytes(), + Self::MessagesRequest(r) => r.to_bytes(), + } + } + + fn metadata(&self) -> &Option> { + match self { + Self::ChatCompletionsRequest(r) => r.metadata(), + Self::MessagesRequest(r) => r.metadata(), + } + } + + fn remove_metadata_key(&mut self, key: &str) -> bool { + match self { + Self::ChatCompletionsRequest(r) => r.remove_metadata_key(key), + Self::MessagesRequest(r) => r.remove_metadata_key(key), } } } +/// Parse the client API from a byte slice. +impl TryFrom<(&[u8], &SupportedAPIs)> for ProviderRequestType { + type Error = std::io::Error; + + fn try_from((bytes, client_api): (&[u8], &SupportedAPIs)) -> Result { + // Use SupportedApi to determine the appropriate request type + match client_api { + SupportedAPIs::OpenAIChatCompletions(_) => { + let chat_completion_request: ChatCompletionsRequest = ChatCompletionsRequest::try_from(bytes) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?; + Ok(ProviderRequestType::ChatCompletionsRequest(chat_completion_request)) + } + SupportedAPIs::AnthropicMessagesAPI(_) => { + let messages_request: MessagesRequest = MessagesRequest::try_from(bytes) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?; + Ok(ProviderRequestType::MessagesRequest(messages_request)) + } + } + } +} + +/// Conversion from one ProviderRequestType to a different ProviderRequestType (SupportedAPIs) +impl TryFrom<(ProviderRequestType, &SupportedAPIs)> for ProviderRequestType { + type Error = ProviderRequestError; + + fn try_from((request, upstream_api): (ProviderRequestType, &SupportedAPIs)) -> Result { + match (request, upstream_api) { + // Same API - no conversion needed, just clone the reference + (ProviderRequestType::ChatCompletionsRequest(chat_req), SupportedAPIs::OpenAIChatCompletions(_)) => { + Ok(ProviderRequestType::ChatCompletionsRequest(chat_req)) + } + (ProviderRequestType::MessagesRequest(messages_req), SupportedAPIs::AnthropicMessagesAPI(_)) => { + Ok(ProviderRequestType::MessagesRequest(messages_req)) + } + + // Cross-API conversion - cloning is necessary for transformation + (ProviderRequestType::ChatCompletionsRequest(chat_req), SupportedAPIs::AnthropicMessagesAPI(_)) => { + let messages_req = MessagesRequest::try_from(chat_req) + .map_err(|e| ProviderRequestError { + message: format!("Failed to convert ChatCompletionsRequest to MessagesRequest: {}", e), + source: Some(Box::new(e)) + })?; + Ok(ProviderRequestType::MessagesRequest(messages_req)) + } + + (ProviderRequestType::MessagesRequest(messages_req), SupportedAPIs::OpenAIChatCompletions(_)) => { + let chat_req = ChatCompletionsRequest::try_from(messages_req) + .map_err(|e| ProviderRequestError { + message: format!("Failed to convert MessagesRequest to ChatCompletionsRequest: {}", e), + source: Some(Box::new(e)) + })?; + Ok(ProviderRequestType::ChatCompletionsRequest(chat_req)) + } + } + } +} + + /// Error types for provider operations #[derive(Debug)] @@ -113,3 +172,194 @@ impl Error for ProviderRequestError { self.source.as_ref().map(|e| e.as_ref() as &(dyn Error + 'static)) } } + + +#[cfg(test)] +mod tests { + use super::*; + use crate::clients::endpoints::SupportedAPIs; + use crate::apis::anthropic::AnthropicApi::Messages; + use crate::apis::openai::OpenAIApi::ChatCompletions; + use crate::apis::anthropic::MessagesRequest as AnthropicMessagesRequest; + use crate::apis::openai::{ChatCompletionsRequest}; + use crate::clients::transformer::ExtractText; + use serde_json::json; + + #[test] + fn test_openai_request_from_bytes() { + let req = json!({ + "model": "gpt-4", + "messages": [ + {"role": "system", "content": "You are a helpful assistant"}, + {"role": "user", "content": "Hello!"} + ] + }); + let bytes = serde_json::to_vec(&req).unwrap(); + let api = SupportedAPIs::OpenAIChatCompletions(ChatCompletions); + let result = ProviderRequestType::try_from((bytes.as_slice(), &api)); + assert!(result.is_ok()); + match result.unwrap() { + ProviderRequestType::ChatCompletionsRequest(r) => { + assert_eq!(r.model, "gpt-4"); + assert_eq!(r.messages.len(), 2); + }, + _ => panic!("Expected ChatCompletionsRequest variant"), + } + } + + #[test] + fn test_anthropic_request_from_bytes_with_endpoint() { + let req = json!({ + "model": "claude-3-sonnet", + "system": "You are a helpful assistant", + "max_tokens": 100, + "messages": [ + {"role": "user", "content": "Hello!"} + ] + }); + let bytes = serde_json::to_vec(&req).unwrap(); + let endpoint = SupportedAPIs::AnthropicMessagesAPI(Messages); + let result = ProviderRequestType::try_from((bytes.as_slice(), &endpoint)); + assert!(result.is_ok()); + match result.unwrap() { + ProviderRequestType::MessagesRequest(r) => { + assert_eq!(r.model, "claude-3-sonnet"); + assert_eq!(r.messages.len(), 1); + }, + _ => panic!("Expected MessagesRequest variant"), + } + } + + #[test] + fn test_openai_request_from_bytes_with_endpoint() { + let req = json!({ + "model": "gpt-4", + "messages": [ + {"role": "system", "content": "You are a helpful assistant"}, + {"role": "user", "content": "Hello!"} + ] + }); + let bytes = serde_json::to_vec(&req).unwrap(); + let endpoint = SupportedAPIs::OpenAIChatCompletions(ChatCompletions); + let result = ProviderRequestType::try_from((bytes.as_slice(), &endpoint)); + assert!(result.is_ok()); + match result.unwrap() { + ProviderRequestType::ChatCompletionsRequest(r) => { + assert_eq!(r.model, "gpt-4"); + assert_eq!(r.messages.len(), 2); + }, + _ => panic!("Expected ChatCompletionsRequest variant"), + } + } + + #[test] + fn test_anthropic_request_from_bytes_wrong_endpoint() { + let req = json!({ + "model": "claude-3-sonnet", + "system": "You are a helpful assistant", + "messages": [ + {"role": "user", "content": "Hello!"} + ] + }); + let bytes = serde_json::to_vec(&req).unwrap(); + // Intentionally use OpenAI endpoint for Anthropic payload + let endpoint = SupportedAPIs::OpenAIChatCompletions(ChatCompletions); + let result = ProviderRequestType::try_from((bytes.as_slice(), &endpoint)); + // Should parse as ChatCompletionsRequest, not error + assert!(result.is_ok()); + match result.unwrap() { + ProviderRequestType::ChatCompletionsRequest(r) => { + assert_eq!(r.model, "claude-3-sonnet"); + assert_eq!(r.messages.len(), 1); + }, + _ => panic!("Expected ChatCompletionsRequest variant"), + } + } + + #[test] + fn test_v1_messages_to_v1_chat_completions_roundtrip() { + let anthropic_req = AnthropicMessagesRequest { + model: "claude-3-sonnet".to_string(), + system: Some(crate::apis::anthropic::MessagesSystemPrompt::Single("You are a helpful assistant".to_string())), + messages: vec![ + crate::apis::anthropic::MessagesMessage { + role: crate::apis::anthropic::MessagesRole::User, + content: crate::apis::anthropic::MessagesMessageContent::Single("Hello!".to_string()), + } + ], + max_tokens: 128, + container: None, + mcp_servers: None, + service_tier: None, + thinking: None, + temperature: Some(0.7), + top_p: Some(1.0), + top_k: None, + stream: Some(false), + stop_sequences: Some(vec!["\n".to_string()]), + tools: None, + tool_choice: None, + metadata: None, + }; + + let openai_req = ChatCompletionsRequest::try_from(anthropic_req.clone()).expect("Anthropic->OpenAI conversion failed"); + let anthropic_req2 = AnthropicMessagesRequest::try_from(openai_req).expect("OpenAI->Anthropic conversion failed"); + + assert_eq!(anthropic_req.model, anthropic_req2.model); + // Compare system prompt text if present + assert_eq!( + anthropic_req.system.as_ref().and_then(|s| match s { crate::apis::anthropic::MessagesSystemPrompt::Single(t) => Some(t), _ => None }), + anthropic_req2.system.as_ref().and_then(|s| match s { crate::apis::anthropic::MessagesSystemPrompt::Single(t) => Some(t), _ => None }) + ); + assert_eq!(anthropic_req.messages[0].role, anthropic_req2.messages[0].role); + // Compare message content text if present + assert_eq!( + anthropic_req.messages[0].content.extract_text(), + anthropic_req2.messages[0].content.extract_text() + ); + assert_eq!(anthropic_req.max_tokens, anthropic_req2.max_tokens); + } + + #[test] + fn test_v1_chat_completions_to_v1_messages_roundtrip() { + use crate::apis::anthropic::MessagesRequest as AnthropicMessagesRequest; + use crate::apis::openai::{ChatCompletionsRequest, Message, Role, MessageContent}; + + let openai_req = ChatCompletionsRequest { + model: "gpt-4".to_string(), + messages: vec![ + Message { + role: Role::System, + content: MessageContent::Text("You are a helpful assistant".to_string()), + name: None, + tool_calls: None, + tool_call_id: None, + }, + Message { + role: Role::User, + content: MessageContent::Text("Hello!".to_string()), + name: None, + tool_calls: None, + tool_call_id: None, + } + ], + temperature: Some(0.7), + top_p: Some(1.0), + max_tokens: Some(128), + stream: Some(false), + stop: Some(vec!["\n".to_string()]), + tools: None, + tool_choice: None, + parallel_tool_calls: None, + ..Default::default() + }; + + let anthropic_req = AnthropicMessagesRequest::try_from(openai_req.clone()).expect("OpenAI->Anthropic conversion failed"); + let openai_req2 = ChatCompletionsRequest::try_from(anthropic_req).expect("Anthropic->OpenAI conversion failed"); + + assert_eq!(openai_req.model, openai_req2.model); + assert_eq!(openai_req.messages[0].role, openai_req2.messages[0].role); + assert_eq!(openai_req.messages[0].content.extract_text(), openai_req2.messages[0].content.extract_text()); + assert_eq!(openai_req.max_tokens, openai_req2.max_tokens); + } +} diff --git a/crates/hermesllm/src/providers/response.rs b/crates/hermesllm/src/providers/response.rs index faca303f..13cad0cd 100644 --- a/crates/hermesllm/src/providers/response.rs +++ b/crates/hermesllm/src/providers/response.rs @@ -1,76 +1,37 @@ +use crate::providers::id::ProviderId; +use serde::{Serialize, Deserialize}; use std::error::Error; use std::fmt; +use std::convert::TryFrom; +use std::str::FromStr; use crate::apis::openai::ChatCompletionsResponse; -use crate::apis::OpenAISseIter; -use crate::providers::id::ProviderId; -use crate::providers::adapters::{get_provider_config, AdapterType}; +use crate::apis::openai::ChatCompletionsStreamResponse; +use crate::apis::anthropic::MessagesStreamEvent; +use crate::clients::endpoints::SupportedAPIs; +use crate::apis::anthropic::MessagesResponse; +/// Trait for token usage information +pub trait TokenUsage { + fn completion_tokens(&self) -> usize; + fn prompt_tokens(&self) -> usize; + fn total_tokens(&self) -> usize; +} + +#[derive(Serialize, Debug, Clone)] +#[serde(untagged)] pub enum ProviderResponseType { ChatCompletionsResponse(ChatCompletionsResponse), - //MessagesResponse(MessagesResponse), + MessagesResponse(MessagesResponse), } -pub enum ProviderStreamResponseIter { - ChatCompletionsStream(OpenAISseIter>), - //MessagesStream(AnthropicSseIter>), +#[derive(Serialize, Debug, Clone)] +#[serde(untagged)] +pub enum ProviderStreamResponseType { + ChatCompletionsStreamResponse(ChatCompletionsStreamResponse), + MessagesStreamEvent(MessagesStreamEvent), } -impl TryFrom<(&[u8], ProviderId)> for ProviderResponseType { - type Error = std::io::Error; - - fn try_from((bytes, provider_id): (&[u8], ProviderId)) -> Result { - let config = get_provider_config(&provider_id); - match config.adapter_type { - AdapterType::OpenAICompatible => { - let chat_completions_response: ChatCompletionsResponse = ChatCompletionsResponse::try_from(bytes) - .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?; - Ok(ProviderResponseType::ChatCompletionsResponse(chat_completions_response)) - } - // Future: handle other adapter types like Claude - } - } -} - -impl TryFrom<(&[u8], &ProviderId)> for ProviderStreamResponseIter { - type Error = Box; - - fn try_from((bytes, provider_id): (&[u8], &ProviderId)) -> Result { - let config = get_provider_config(provider_id); - - // Parse SSE (Server-Sent Events) streaming data - protocol layer - let s = std::str::from_utf8(bytes)?; - let lines: Vec = s.lines().map(|line| line.to_string()).collect(); - - match config.adapter_type { - AdapterType::OpenAICompatible => { - // Delegate to OpenAI-specific iterator implementation - let sse_container = SseStreamIter::new(lines.into_iter()); - let iter = crate::apis::openai::OpenAISseIter::new(sse_container); - Ok(ProviderStreamResponseIter::ChatCompletionsStream(iter)) - } - // Future: AdapterType::Claude => { - // let sse_container = SseStreamIter::new(lines.into_iter()); - // let iter = crate::apis::anthropic::AnthropicSseIter::new(sse_container); - // Ok(ProviderStreamResponseIter::MessagesStream(iter)) - // } - } - } -} - - -impl Iterator for ProviderStreamResponseIter { - type Item = Result, Box>; - - fn next(&mut self) -> Option { - match self { - ProviderStreamResponseIter::ChatCompletionsStream(iter) => iter.next(), - // Future: ProviderStreamResponseIter::MessagesStream(iter) => iter.next(), - } - } -} - - pub trait ProviderResponse: Send + Sync { /// Get usage information if available - returns dynamic trait object fn usage(&self) -> Option<&dyn TokenUsage>; @@ -81,6 +42,22 @@ pub trait ProviderResponse: Send + Sync { } } +impl ProviderResponse for ProviderResponseType { + fn usage(&self) -> Option<&dyn TokenUsage> { + match self { + ProviderResponseType::ChatCompletionsResponse(resp) => resp.usage(), + ProviderResponseType::MessagesResponse(resp) => resp.usage(), + } + } + + fn extract_usage_counts(&self) -> Option<(usize, usize, usize)> { + match self { + ProviderResponseType::ChatCompletionsResponse(resp) => resp.extract_usage_counts(), + ProviderResponseType::MessagesResponse(resp) => resp.extract_usage_counts(), + } + } +} + pub trait ProviderStreamResponse: Send + Sync { /// Get the content delta for this chunk fn content_delta(&self) -> Option<&str>; @@ -90,16 +67,313 @@ pub trait ProviderStreamResponse: Send + Sync { /// Get role information if available fn role(&self) -> Option<&str>; + + /// Get event type for SSE streaming (used by Anthropic) + fn event_type(&self) -> Option<&str>; +} + +impl ProviderStreamResponse for ProviderStreamResponseType { + fn content_delta(&self) -> Option<&str> { + match self { + ProviderStreamResponseType::ChatCompletionsStreamResponse(resp) => resp.content_delta(), + ProviderStreamResponseType::MessagesStreamEvent(resp) => resp.content_delta(), + } + } + + fn is_final(&self) -> bool { + match self { + ProviderStreamResponseType::ChatCompletionsStreamResponse(resp) => resp.is_final(), + ProviderStreamResponseType::MessagesStreamEvent(resp) => resp.is_final(), + } + } + + fn role(&self) -> Option<&str> { + match self { + ProviderStreamResponseType::ChatCompletionsStreamResponse(resp) => resp.role(), + ProviderStreamResponseType::MessagesStreamEvent(resp) => resp.role(), + } + } + + fn event_type(&self) -> Option<&str> { + match self { + ProviderStreamResponseType::ChatCompletionsStreamResponse(_resp) => None, // OpenAI doesn't use event types + ProviderStreamResponseType::MessagesStreamEvent(resp) => resp.event_type(), + } + } +} + +// ============================================================================ +// SSE EVENT CONTAINER +// ============================================================================ + +/// Represents a single Server-Sent Event with the complete wire format +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SseEvent { + #[serde(rename = "data")] + pub data: Option, // The JSON payload after "data: " + + #[serde(skip_serializing_if = "Option::is_none")] + pub event: Option, // Optional event type (e.g., "message_start", "content_block_delta") + + #[serde(skip_serializing, skip_deserializing)] + pub raw_line: String, // The complete line as received including "data: " prefix and "\n\n" + + #[serde(skip_serializing, skip_deserializing)] + pub sse_transform_buffer: String, // The complete line as received including "data: " prefix and "\n\n" + + #[serde(skip_serializing, skip_deserializing)] + pub provider_stream_response: Option, // Parsed provider stream response object +} + +impl SseEvent { + /// Check if this event represents the end of the stream + pub fn is_done(&self) -> bool { + self.data == Some("[DONE]".into()) + } + + /// Check if this event should be skipped during processing + /// This includes ping messages and other provider-specific events that don't contain content + pub fn should_skip(&self) -> bool { + // Skip ping messages (commonly used by providers for connection keep-alive) + self.data == Some(r#"{"type": "ping"}"#.into()) + } + + /// Check if this is an event-only SSE event (no data payload) + pub fn is_event_only(&self) -> bool { + self.event.is_some() && self.data.is_none() + } + + /// Get the parsed provider response if available + pub fn provider_response(&self) -> Result<&dyn ProviderStreamResponse, std::io::Error> { + self.provider_stream_response.as_ref() + .map(|resp| resp as &dyn ProviderStreamResponse) + .ok_or_else(|| { + std::io::Error::new(std::io::ErrorKind::NotFound, "Provider response not found") + }) + } + +} + +impl FromStr for SseEvent { + type Err = SseParseError; + + fn from_str(line: &str) -> Result { + if line.starts_with("data: ") { + let data: String = line[6..].to_string(); // Remove "data: " prefix + if data.is_empty() { + return Err(SseParseError { + message: "Empty data field is not a valid SSE event".to_string(), + }); + } + Ok(SseEvent { + data: Some(data), + event: None, + raw_line: line.to_string(), + sse_transform_buffer: line.to_string(), + provider_stream_response: None, + }) + } else if line.starts_with("event: ") { //used by Anthropic + let event_type = line[7..].to_string(); + if event_type.is_empty() { + return Err(SseParseError { + message: "Empty event field is not a valid SSE event".to_string(), + }); + } + Ok(SseEvent { + data: None, + event: Some(event_type), + raw_line: line.to_string(), + sse_transform_buffer: line.to_string(), + provider_stream_response: None, + }) + } else { + Err(SseParseError { + message: format!("Line does not start with 'data: ' or 'event: ': {}", line), + }) + } + } +} + +impl fmt::Display for SseEvent { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.sse_transform_buffer) + } +} + +// Into implementation to convert SseEvent to bytes for response buffer +impl Into> for SseEvent { + fn into(self) -> Vec { + format!("{}\n\n", self.sse_transform_buffer).into_bytes() + } } +// --- Response transformation logic for client API compatibility --- +impl TryFrom<(&[u8], &SupportedAPIs, &ProviderId)> for ProviderResponseType { + type Error = std::io::Error; + + fn try_from((bytes, client_api, provider_id): (&[u8], &SupportedAPIs, &ProviderId)) -> Result { + let upstream_api = provider_id.compatible_api_for_client(client_api); + match (&upstream_api, client_api) { + (SupportedAPIs::OpenAIChatCompletions(_), SupportedAPIs::OpenAIChatCompletions(_)) => { + let resp: ChatCompletionsResponse = ChatCompletionsResponse::try_from(bytes) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?; + Ok(ProviderResponseType::ChatCompletionsResponse(resp)) + } + (SupportedAPIs::AnthropicMessagesAPI(_), SupportedAPIs::AnthropicMessagesAPI(_)) => { + let resp: MessagesResponse = serde_json::from_slice(bytes) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?; + Ok(ProviderResponseType::MessagesResponse(resp)) + } + (SupportedAPIs::AnthropicMessagesAPI(_), SupportedAPIs::OpenAIChatCompletions(_)) => { + let anthropic_resp: MessagesResponse = serde_json::from_slice(bytes) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?; + + // Transform to OpenAI ChatCompletions format using the transformer + let chat_resp: ChatCompletionsResponse = anthropic_resp.try_into() + .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, format!("Transformation error: {}", e)))?; + Ok(ProviderResponseType::ChatCompletionsResponse(chat_resp)) + } + (SupportedAPIs::OpenAIChatCompletions(_), SupportedAPIs::AnthropicMessagesAPI(_)) => { + let openai_resp: ChatCompletionsResponse = ChatCompletionsResponse::try_from(bytes) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?; + + // Transform to Anthropic Messages format using the transformer + let messages_resp: MessagesResponse = openai_resp.try_into() + .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, format!("Transformation error: {}", e)))?; + Ok(ProviderResponseType::MessagesResponse(messages_resp)) + } + } + } +} + +// Stream response transformation logic for client API compatibility +impl TryFrom<(&[u8], &SupportedAPIs, &SupportedAPIs)> for ProviderStreamResponseType { + type Error = Box; + + fn try_from((bytes, client_api, upstream_api): (&[u8], &SupportedAPIs, &SupportedAPIs)) -> Result { + match (upstream_api, client_api) { + (SupportedAPIs::OpenAIChatCompletions(_), SupportedAPIs::OpenAIChatCompletions(_)) => { + let resp: crate::apis::openai::ChatCompletionsStreamResponse = serde_json::from_slice(bytes)?; + Ok(ProviderStreamResponseType::ChatCompletionsStreamResponse(resp)) + } + (SupportedAPIs::AnthropicMessagesAPI(_), SupportedAPIs::AnthropicMessagesAPI(_)) => { + let resp: crate::apis::anthropic::MessagesStreamEvent = serde_json::from_slice(bytes)?; + Ok(ProviderStreamResponseType::MessagesStreamEvent(resp)) + } + (SupportedAPIs::AnthropicMessagesAPI(_), SupportedAPIs::OpenAIChatCompletions(_)) => { + let anthropic_resp: crate::apis::anthropic::MessagesStreamEvent = serde_json::from_slice(bytes)?; + + // Transform to OpenAI ChatCompletions stream format using the transformer + let chat_resp: crate::apis::openai::ChatCompletionsStreamResponse = anthropic_resp.try_into()?; + Ok(ProviderStreamResponseType::ChatCompletionsStreamResponse(chat_resp)) + } + (SupportedAPIs::OpenAIChatCompletions(_), SupportedAPIs::AnthropicMessagesAPI(_)) => { + let openai_resp: crate::apis::openai::ChatCompletionsStreamResponse = serde_json::from_slice(bytes)?; + + // Transform to Anthropic Messages stream format using the transformer + let messages_resp: crate::apis::anthropic::MessagesStreamEvent = openai_resp.try_into()?; + Ok(ProviderStreamResponseType::MessagesStreamEvent(messages_resp)) + } + } + } +} + +// TryFrom implementation to convert raw bytes to SseEvent with parsed provider response +impl TryFrom<(SseEvent, &SupportedAPIs, &SupportedAPIs)> for SseEvent { + type Error = Box; + + fn try_from((sse_event, client_api, upstream_api): (SseEvent, &SupportedAPIs, &SupportedAPIs)) -> Result { + // Create a new transformed event based on the original + let mut transformed_event = sse_event; + + // If not [DONE] and has data, parse the data as a provider stream response (business logic layer) + if !transformed_event.is_done() && transformed_event.data.is_some() { + let data_str = transformed_event.data.as_ref().unwrap(); + let data_bytes = data_str.as_bytes(); + let transformed_response = ProviderStreamResponseType::try_from((data_bytes, client_api, upstream_api))?; + let transformed_json = serde_json::to_string(&transformed_response)?; + transformed_event.sse_transform_buffer = format!("data: {}\n\n", transformed_json); + transformed_event.provider_stream_response = Some(transformed_response); + } + + match (client_api, upstream_api) { + (SupportedAPIs::OpenAIChatCompletions(_), SupportedAPIs::OpenAIChatCompletions(_)) => { + // No transformation needed + } + (SupportedAPIs::AnthropicMessagesAPI(_), SupportedAPIs::AnthropicMessagesAPI(_)) => { + // No transformation needed + } + (SupportedAPIs::AnthropicMessagesAPI(_), SupportedAPIs::OpenAIChatCompletions(_)) => { + if let Some(provider_response) = &transformed_event.provider_stream_response { + if let Some(event_type) = provider_response.event_type() { + // This ensures the required Anthropic sequence: MessageStart → ContentBlockStart → ContentBlockDelta(s) + if event_type == "message_start" { + let content_block_start_json = serde_json::json!({ + "type": "content_block_start", + "index": 0, + "content_block": { + "type": "text", + "text": "" + } + }); + // Format as proper SSE: MessageStart first, then ContentBlockStart + transformed_event.sse_transform_buffer = format!( + "event: {}\n{}\nevent: content_block_start\ndata: {}\n\n", + event_type, + transformed_event.sse_transform_buffer, + content_block_start_json, + ); + } else if event_type == "message_delta" { + let content_block_stop_json = serde_json::json!({ + "type": "content_block_stop", + "index": 0 + }); + // Format as proper SSE: ContentBlockStop first, then MessageDelta + transformed_event.sse_transform_buffer = format!( + "event: content_block_stop\ndata: {}\n\nevent: {}\n{}", + content_block_stop_json, + event_type, + transformed_event.sse_transform_buffer + ); + } else { + transformed_event.sse_transform_buffer = format!("event: {}\n{}", event_type, transformed_event.sse_transform_buffer); + } + } + // If event_type is None, we just keep the data line as-is without an event line + // This handles cases where the transformation might not produce a valid event type + } + } + (SupportedAPIs::OpenAIChatCompletions(_), SupportedAPIs::AnthropicMessagesAPI(_)) => { + if transformed_event.is_event_only() && transformed_event.event.is_some() { + transformed_event.sse_transform_buffer = format!("\n"); // suppress the event upstream for OpenAI + } + } + } + + Ok(transformed_event) + } +} + +#[derive(Debug)] +pub struct SseParseError { + pub message: String, +} + +impl fmt::Display for SseParseError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "SSE parse error: {}", self.message) + } +} + +impl Error for SseParseError {} // ============================================================================ // GENERIC SSE STREAMING ITERATOR (Container Only) // ============================================================================ /// Generic SSE (Server-Sent Events) streaming iterator container -/// This is just a simple wrapper - actual Iterator implementation is delegated to provider-specific modules +/// Parses raw SSE lines into SseEvent objects pub struct SseStreamIter where I: Iterator, @@ -118,35 +392,45 @@ where } } +// TryFrom implementation to parse bytes into SseStreamIter +impl TryFrom<&[u8]> for SseStreamIter> { + type Error = Box; -impl ProviderResponse for ProviderResponseType { - fn usage(&self) -> Option<&dyn TokenUsage> { - match self { - ProviderResponseType::ChatCompletionsResponse(resp) => resp.usage(), - // Future: ProviderResponseType::MessagesResponse(resp) => resp.usage(), - } - } - - fn extract_usage_counts(&self) -> Option<(usize, usize, usize)> { - match self { - ProviderResponseType::ChatCompletionsResponse(resp) => resp.extract_usage_counts(), - // Future: ProviderResponseType::MessagesResponse(resp) => resp.extract_usage_counts(), - } + fn try_from(bytes: &[u8]) -> Result { + let s = std::str::from_utf8(bytes)?; + let lines: Vec = s.lines().map(|line| line.to_string()).collect(); + Ok(SseStreamIter::new(lines.into_iter())) } } -// Implement Send + Sync for the enum to match the original trait requirements -unsafe impl Send for ProviderStreamResponseIter {} -unsafe impl Sync for ProviderStreamResponseIter {} +impl Iterator for SseStreamIter +where + I: Iterator, + I::Item: AsRef, +{ + type Item = SseEvent; -/// Trait for token usage information -pub trait TokenUsage { - fn completion_tokens(&self) -> usize; - fn prompt_tokens(&self) -> usize; - fn total_tokens(&self) -> usize; + fn next(&mut self) -> Option { + for line in &mut self.lines { + let line_str = line.as_ref(); + + // Try to parse as either data: or event: line + if let Ok(event) = line_str.parse::() { + // For data: lines, check if this is the [DONE] marker - if so, end the stream + if event.data.is_some() && event.is_done() { + return None; + } + // For data: lines, skip events that should be filtered at the transport layer + if event.data.is_some() && event.should_skip() { + continue; + } + return Some(event); + } + } + None + } } - #[derive(Debug)] pub struct ProviderResponseError { pub message: String, @@ -165,3 +449,331 @@ impl Error for ProviderResponseError { self.source.as_ref().map(|e| e.as_ref() as &(dyn Error + 'static)) } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::clients::endpoints::SupportedAPIs; + use crate::providers::id::ProviderId; + use crate::apis::openai::OpenAIApi; + use crate::apis::anthropic::AnthropicApi; + use serde_json::json; + + #[test] + fn test_openai_response_from_bytes() { + let resp = json!({ + "id": "chatcmpl-123", + "object": "chat.completion", + "created": 1234567890, + "model": "gpt-4", + "choices": [ + { + "index": 0, + "message": { "role": "assistant", "content": "Hello!" }, + "finish_reason": "stop" + } + ], + "usage": { "prompt_tokens": 5, "completion_tokens": 7, "total_tokens": 12 }, + "system_fingerprint": null + }); + let bytes = serde_json::to_vec(&resp).unwrap(); + let result = ProviderResponseType::try_from((bytes.as_slice(), &SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions), &ProviderId::OpenAI)); + assert!(result.is_ok()); + match result.unwrap() { + ProviderResponseType::ChatCompletionsResponse(r) => { + assert_eq!(r.model, "gpt-4"); + assert_eq!(r.choices.len(), 1); + }, + _ => panic!("Expected ChatCompletionsResponse variant"), + } + } + + #[test] + fn test_anthropic_response_from_bytes() { + let resp = json!({ + "id": "msg_01ABC123", + "type": "message", + "role": "assistant", + "content": [ + { "type": "text", "text": "Hello! How can I help you today?" } + ], + "model": "claude-3-sonnet-20240229", + "stop_reason": "end_turn", + "usage": { "input_tokens": 10, "output_tokens": 25, "cache_creation_input_tokens": 5, "cache_read_input_tokens": 3 } + }); + let bytes = serde_json::to_vec(&resp).unwrap(); + let result = ProviderResponseType::try_from((bytes.as_slice(), &SupportedAPIs::AnthropicMessagesAPI(AnthropicApi::Messages), &ProviderId::Anthropic)); + assert!(result.is_ok()); + match result.unwrap() { + ProviderResponseType::MessagesResponse(r) => { + assert_eq!(r.model, "claude-3-sonnet-20240229"); + assert_eq!(r.content.len(), 1); + }, + _ => panic!("Expected MessagesResponse variant"), + } + } + + #[test] + fn test_anthropic_response_from_bytes_with_openai_provider() { + // OpenAI provider receives OpenAI response but client expects Anthropic format + // Upstream API = OpenAI, Client API = Anthropic -> parse OpenAI, convert to Anthropic + let resp = json!({ + "id": "chatcmpl-123", + "object": "chat.completion", + "created": 1234567890, + "model": "gpt-4", + "choices": [ + { + "index": 0, + "message": { "role": "assistant", "content": "Hello! How can I help you today?" }, + "finish_reason": "stop" + } + ], + "usage": { "prompt_tokens": 10, "completion_tokens": 25, "total_tokens": 35 } + }); + let bytes = serde_json::to_vec(&resp).unwrap(); + let result = ProviderResponseType::try_from((bytes.as_slice(), &SupportedAPIs::AnthropicMessagesAPI(AnthropicApi::Messages), &ProviderId::OpenAI)); + assert!(result.is_ok()); + match result.unwrap() { + ProviderResponseType::MessagesResponse(r) => { + assert_eq!(r.model, "gpt-4"); + assert_eq!(r.usage.input_tokens, 10); + assert_eq!(r.usage.output_tokens, 25); + }, + _ => panic!("Expected MessagesResponse variant"), + } + } + + #[test] + fn test_openai_response_from_bytes_with_claude_provider() { + // Claude provider using OpenAI-compatible API returns OpenAI format response + // Client API = OpenAI, Provider = Anthropic -> Anthropic returns OpenAI format via their compatible API + let resp = json!({ + "id": "chatcmpl-01ABC123", + "object": "chat.completion", + "created": 1677652288, + "model": "claude-3-sonnet-20240229", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "Hello! How can I help you today?" + }, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 10, + "completion_tokens": 25, + "total_tokens": 35 + } + }); + let bytes = serde_json::to_vec(&resp).unwrap(); + let result = ProviderResponseType::try_from((bytes.as_slice(), &SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions), &ProviderId::Anthropic)); + assert!(result.is_ok()); + match result.unwrap() { + ProviderResponseType::ChatCompletionsResponse(r) => { + assert_eq!(r.model, "claude-3-sonnet-20240229"); + assert_eq!(r.usage.prompt_tokens, 10); + assert_eq!(r.usage.completion_tokens, 25); + }, + _ => panic!("Expected ChatCompletionsResponse variant"), + } + } + + #[test] + fn test_sse_event_parsing() { + // Test valid SSE data line + let line = "data: {\"id\":\"test\",\"object\":\"chat.completion.chunk\"}\n\n"; + let event: Result = line.parse(); + assert!(event.is_ok()); + let event = event.unwrap(); + assert_eq!(event.data, Some("{\"id\":\"test\",\"object\":\"chat.completion.chunk\"}\n\n".to_string())); + + // Test conversion back to line using Display trait + let wire_format = event.to_string(); + assert_eq!(wire_format, "data: {\"id\":\"test\",\"object\":\"chat.completion.chunk\"}\n\n"); + + // Test [DONE] marker - should be valid SSE event + let done_line = "data: [DONE]"; + let done_result: Result = done_line.parse(); + assert!(done_result.is_ok()); + let done_event = done_result.unwrap(); + assert_eq!(done_event.data, Some("[DONE]".to_string())); + assert!(done_event.is_done()); // Test the helper method + + // Test non-DONE event + assert!(!event.is_done()); + + // Test empty data - should return error + let empty_line = "data: "; + let empty_result: Result = empty_line.parse(); + assert!(empty_result.is_err()); + + // Test non-data line - should return error + let comment_line = ": this is a comment"; + let comment_result: Result = comment_line.parse(); + assert!(comment_result.is_err()); + } + + #[test] + fn test_sse_event_serde() { + // Test serialization and deserialization with serde + let event = SseEvent { + data: Some(r#"{"id":"test","object":"chat.completion.chunk"}"#.to_string()), + event: None, + raw_line: r#"data: {"id":"test","object":"chat.completion.chunk"} + + "#.to_string(), + sse_transform_buffer: r#"data: {"id":"test","object":"chat.completion.chunk"} + + "#.to_string(), + provider_stream_response: None, + }; + + // Test JSON serialization - raw_line should be skipped + let json = serde_json::to_string(&event).unwrap(); + assert!(json.contains("test")); + assert!(json.contains("chat.completion.chunk")); + assert!(!json.contains("raw_line")); // Should be excluded from serialization + + // Test JSON deserialization + let deserialized: SseEvent = serde_json::from_str(&json).unwrap(); + assert_eq!(deserialized.data, event.data); + assert_eq!(deserialized.raw_line, ""); // Should be empty since it's skipped + + // Test round trip for data field only + assert_eq!(event.data, deserialized.data); + } + + #[test] + fn test_sse_event_should_skip() { + // Test ping message should be skipped + let ping_event = SseEvent { + data: Some(r#"{"type": "ping"}"#.to_string()), + event: None, + raw_line: r#"data: {"type": "ping"}"#.to_string(), + sse_transform_buffer: r#"data: {"type": "ping"}"#.to_string(), + provider_stream_response: None, + }; + assert!(ping_event.should_skip()); + assert!(!ping_event.is_done()); + + // Test normal event should not be skipped + let normal_event = SseEvent { + data: Some(r#"{"id": "test", "object": "chat.completion.chunk"}"#.to_string()), + event: Some("content_block_delta".to_string()), + raw_line: r#"data: {"id": "test", "object": "chat.completion.chunk"}"#.to_string(), + sse_transform_buffer: r#"data: {"id": "test", "object": "chat.completion.chunk"}"#.to_string(), + provider_stream_response: None, + }; + assert!(!normal_event.should_skip()); + assert!(!normal_event.is_done()); + + // Test [DONE] event should not be skipped (but is handled separately) + let done_event = SseEvent { + data: Some("[DONE]".to_string()), + event: None, + raw_line: "data: [DONE]".to_string(), + sse_transform_buffer: "data: [DONE]".to_string(), + provider_stream_response: None, + }; + assert!(!done_event.should_skip()); + assert!(done_event.is_done()); + } + + #[test] + fn test_sse_stream_iter_filters_ping_messages() { + // Create test data with ping messages mixed in + let test_lines = vec![ + "data: {\"id\": \"msg1\", \"object\": \"chat.completion.chunk\"}".to_string(), + "data: {\"type\": \"ping\"}".to_string(), // This should be filtered out + "data: {\"id\": \"msg2\", \"object\": \"chat.completion.chunk\"}".to_string(), + "data: {\"type\": \"ping\"}".to_string(), // This should be filtered out + "data: [DONE]".to_string(), // This should end the stream + ]; + + let mut iter = SseStreamIter::new(test_lines.into_iter()); + + // First event should be msg1 (ping filtered out) + let event1 = iter.next().unwrap(); + assert!(event1.data.as_ref().unwrap().contains("msg1")); + assert!(!event1.should_skip()); + + // Second event should be msg2 (ping filtered out) + let event2 = iter.next().unwrap(); + assert!(event2.data.as_ref().unwrap().contains("msg2")); + assert!(!event2.should_skip()); + + // Iterator should end at [DONE] (no more events) + assert!(iter.next().is_none()); + } + + #[test] + fn test_sse_stream_iter_handles_anthropic_events() { + // Create test data with Anthropic-style event/data pairs + let test_lines = vec![ + "event: message_start".to_string(), + "data: {\"type\":\"message_start\",\"message\":{\"id\":\"msg_123\"}}".to_string(), + "event: content_block_delta".to_string(), + "data: {\"type\":\"content_block_delta\",\"delta\":{\"text\":\"Hello\"}}".to_string(), + "data: [DONE]".to_string(), + ]; + + let mut iter = SseStreamIter::new(test_lines.into_iter()); + + // First event should be the event: line + let event1 = iter.next().unwrap(); + assert!(event1.is_event_only()); + assert_eq!(event1.event, Some("message_start".to_string())); + assert_eq!(event1.data, None); + + // Second event should be the data: line + let event2 = iter.next().unwrap(); + assert!(!event2.is_event_only()); + assert_eq!(event2.event, None); + assert!(event2.data.as_ref().unwrap().contains("message_start")); + + // Third event should be another event: line + let event3 = iter.next().unwrap(); + assert!(event3.is_event_only()); + assert_eq!(event3.event, Some("content_block_delta".to_string())); + + // Fourth event should be the content delta data + let event4 = iter.next().unwrap(); + assert!(!event4.is_event_only()); + assert!(event4.data.as_ref().unwrap().contains("Hello")); + + // Iterator should end at [DONE] + assert!(iter.next().is_none()); + } + + #[test] + fn test_provider_stream_response_event_type() { + use crate::apis::anthropic::{MessagesStreamEvent, MessagesContentDelta}; + use crate::apis::openai::ChatCompletionsStreamResponse; + + // Test Anthropic event type + let anthropic_event = MessagesStreamEvent::ContentBlockDelta { + index: 0, + delta: MessagesContentDelta::TextDelta { text: "Hello".to_string() }, + }; + let provider_type = ProviderStreamResponseType::MessagesStreamEvent(anthropic_event); + assert_eq!(provider_type.event_type(), Some("content_block_delta")); + + // Test OpenAI event type (should be None) + let openai_event = ChatCompletionsStreamResponse { + id: "test".to_string(), + object: "chat.completion.chunk".to_string(), + created: 123456789, + model: "gpt-4".to_string(), + choices: vec![], + usage: None, + system_fingerprint: None, + service_tier: None, + }; + let provider_type = ProviderStreamResponseType::ChatCompletionsStreamResponse(openai_event); + assert_eq!(provider_type.event_type(), None); + } +} diff --git a/crates/llm_gateway/src/filter_context.rs b/crates/llm_gateway/src/filter_context.rs index fc31355a..258a1a1c 100644 --- a/crates/llm_gateway/src/filter_context.rs +++ b/crates/llm_gateway/src/filter_context.rs @@ -89,7 +89,6 @@ impl RootContext for FilterContext { ); Some(Box::new(StreamContext::new( - context_id, Rc::clone(&self.metrics), Rc::clone( self.llm_providers diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index 6b2c5f15..da86296d 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -1,19 +1,3 @@ -use crate::metrics::Metrics; -use common::configuration::{LlmProvider, LlmProviderType, Overrides}; -use common::consts::{ - ARCH_PROVIDER_HINT_HEADER, ARCH_ROUTING_HEADER, CHAT_COMPLETIONS_PATH, HEALTHZ_PATH, - RATELIMIT_SELECTOR_HEADER_KEY, REQUEST_ID_HEADER, TRACE_PARENT_HEADER, -}; -use common::errors::ServerError; -use common::llm_providers::LlmProviders; -use common::ratelimit::Header; -use common::stats::{IncrementingMetric, RecordingMetric}; -use common::tracing::{Event, Span, TraceData, Traceparent}; -use common::{ratelimit, routing, tokenizer}; -use hermesllm::providers::response::ProviderStreamResponseIter; -use hermesllm::{ - ProviderId, ProviderRequest, ProviderRequestType, ProviderResponse, ProviderResponseType, -}; use http::StatusCode; use log::{debug, info, warn}; use proxy_wasm::hostcalls::get_current_time; @@ -25,13 +9,31 @@ use std::rc::Rc; use std::sync::{Arc, Mutex}; use std::time::{Duration, SystemTime, UNIX_EPOCH}; +use crate::metrics::Metrics; +use common::configuration::{LlmProvider, LlmProviderType, Overrides}; +use common::consts::{ + ARCH_PROVIDER_HINT_HEADER, ARCH_ROUTING_HEADER, HEALTHZ_PATH, RATELIMIT_SELECTOR_HEADER_KEY, + REQUEST_ID_HEADER, TRACE_PARENT_HEADER, +}; +use common::errors::ServerError; +use common::llm_providers::LlmProviders; +use common::ratelimit::Header; +use common::stats::{IncrementingMetric, RecordingMetric}; +use common::tracing::{Event, Span, TraceData, Traceparent}; +use common::{ratelimit, routing, tokenizer}; +use hermesllm::clients::endpoints::SupportedAPIs; +use hermesllm::providers::response::{ProviderResponse, SseEvent, SseStreamIter}; +use hermesllm::{ProviderId, ProviderRequest, ProviderRequestType, ProviderResponseType}; + pub struct StreamContext { - context_id: u32, metrics: Rc, ratelimit_selector: Option
, streaming_response: bool, response_tokens: usize, - is_chat_completions_request: bool, + /// The API that is requested by the client (before compatibility mapping) + client_api: Option, + /// The API that should be used for the upstream provider (after compatibility mapping) + resolved_api: Option, llm_providers: Rc, llm_provider: Option>, request_id: Option, @@ -47,20 +49,19 @@ pub struct StreamContext { impl StreamContext { pub fn new( - context_id: u32, metrics: Rc, llm_providers: Rc, traces_queue: Arc>>, overrides: Rc>, ) -> Self { StreamContext { - context_id, metrics, overrides, ratelimit_selector: None, streaming_response: false, response_tokens: 0, - is_chat_completions_request: false, + client_api: None, + resolved_api: None, llm_providers, llm_provider: None, request_id: None, @@ -73,6 +74,16 @@ impl StreamContext { user_message: None, } } + + /// Returns the appropriate request identifier for logging. + /// Uses request_id (from x-request-id header) when available, otherwise returns a literal indicating no request ID. + fn request_identifier(&self) -> String { + self.request_id + .as_ref() + .filter(|id| !id.is_empty()) // Filter out empty strings + .map(|id| id.clone()) + .unwrap_or_else(|| "NO_REQUEST_ID".to_string()) + } fn llm_provider(&self) -> &LlmProvider { self.llm_provider .as_ref() @@ -83,6 +94,18 @@ impl StreamContext { self.llm_provider().to_provider_id() } + //This function assumes that the provider has been set. + fn update_upstream_path(&mut self, request_path: &str) { + let hermes_provider_id = self.llm_provider().to_provider_id(); + if let Some(api) = &self.client_api { + let target_endpoint = + api.target_endpoint_for_provider(&hermes_provider_id, request_path); + if target_endpoint != request_path { + self.set_http_request_header(":path", Some(&target_endpoint)); + } + } + } + fn select_llm_provider(&mut self) { let provider_hint = self .get_http_request_header(ARCH_PROVIDER_HINT_HEADER) @@ -93,32 +116,11 @@ impl StreamContext { provider_hint, )); - match self.llm_provider.as_ref().unwrap().provider_interface { - LlmProviderType::Groq => { - if let Some(path) = self.get_http_request_header(":path") { - if path.starts_with("/v1/") { - let new_path = format!("/openai{}", path); - self.set_http_request_header(":path", Some(new_path.as_str())); - } - } - } - LlmProviderType::Gemini => { - if let Some(path) = self.get_http_request_header(":path") { - if path == "/v1/chat/completions" { - self.set_http_request_header( - ":path", - Some("/v1beta/openai/chat/completions"), - ); - } - } - } - _ => {} - } - - debug!( - "request received: llm provider hint: {}, selected provider: {}", + info!( + "[ARCHGW_REQ_ID:{}] PROVIDER_SELECTION: Hint='{}' -> Selected='{}'", + self.request_identifier(), self.get_http_request_header(ARCH_PROVIDER_HINT_HEADER) - .unwrap_or_default(), + .unwrap_or("none".to_string()), self.llm_provider.as_ref().unwrap().name ); } @@ -135,9 +137,23 @@ impl StreamContext { ), })?; - let authorization_header_value = format!("Bearer {}", llm_provider_api_key_value); - - self.set_http_request_header("Authorization", Some(&authorization_header_value)); + // Set API-specific headers based on the resolved upstream API + match self.resolved_api.as_ref() { + Some(SupportedAPIs::AnthropicMessagesAPI(_)) => { + // Anthropic API requires x-api-key and anthropic-version headers + // Remove any existing Authorization header since Anthropic doesn't use it + self.remove_http_request_header("Authorization"); + self.set_http_request_header("x-api-key", Some(llm_provider_api_key_value)); + self.set_http_request_header("anthropic-version", Some("2023-06-01")); + } + Some(SupportedAPIs::OpenAIChatCompletions(_)) | None => { + // OpenAI and default: use Authorization Bearer token + // Remove any existing x-api-key header since OpenAI doesn't use it + self.remove_http_request_header("x-api-key"); + let authorization_header_value = format!("Bearer {}", llm_provider_api_key_value); + self.set_http_request_header("Authorization", Some(&authorization_header_value)); + } + } Ok(()) } @@ -179,7 +195,13 @@ impl StreamContext { // Tokenize and record token count. let token_count = tokenizer::token_count(model, json_string).unwrap_or(0); - debug!("Recorded input token count: {}", token_count); + info!( + "[ARCHGW_REQ_ID:{}] TOKEN_COUNT: model='{}' input_tokens={}", + self.request_identifier(), + model, + token_count + ); + // Record the token count to metrics. self.metrics .input_sequence_length @@ -187,18 +209,361 @@ impl StreamContext { // Check if rate limiting needs to be applied. if let Some(selector) = self.ratelimit_selector.take() { - log::debug!("Applying ratelimit for model: {}", model); + info!( + "[ARCHGW_REQ_ID:{}] RATELIMIT_CHECK: model='{}' selector='{}:{}'", + self.request_identifier(), + model, + selector.key, + selector.value + ); ratelimit::ratelimits(None).read().unwrap().check_limit( model.to_owned(), selector, NonZero::new(token_count as u32).unwrap(), )?; } else { - debug!("No rate limit applied for model: {}", model); + debug!( + "[ARCHGW_REQ_ID:{}] RATELIMIT_SKIP: model='{}' (no selector)", + self.request_identifier(), + model + ); } Ok(()) } + + // === Helper methods extracted from on_http_response_body (no behavior change) === + #[inline] + fn record_ttft_if_needed(&mut self) { + if self.ttft_duration.is_none() { + let current_time = get_current_time().unwrap(); + self.ttft_time = Some(current_time_ns()); + match current_time.duration_since(self.start_time) { + Ok(duration) => { + let duration_ms = duration.as_millis(); + info!( + "[ARCHGW_REQ_ID:{}] TIME_TO_FIRST_TOKEN: {}ms", + self.request_identifier(), + duration_ms + ); + self.ttft_duration = Some(duration); + self.metrics.time_to_first_token.record(duration_ms as u64); + } + Err(e) => { + warn!( + "[ARCHGW_REQ_ID:{}] TIME_MEASUREMENT_ERROR: {:?}", + self.request_identifier(), + e + ); + } + } + } + } + fn handle_end_of_stream_metrics_and_traces(&mut self, current_time: SystemTime) { + // All streaming responses end with bytes=0 and end_stream=true + // Record the latency for the request + match current_time.duration_since(self.start_time) { + Ok(duration) => { + // Convert the duration to milliseconds + let duration_ms = duration.as_millis(); + info!( + "[ARCHGW_REQ_ID:{}] REQUEST_COMPLETE: latency={}ms tokens={}", + self.request_identifier(), + duration_ms, + self.response_tokens + ); + // Record the latency to the latency histogram + self.metrics.request_latency.record(duration_ms as u64); + + if self.response_tokens > 0 { + // Compute the time per output token + let tpot = duration_ms as u64 / self.response_tokens as u64; + + // Record the time per output token + self.metrics.time_per_output_token.record(tpot); + + info!( + "[ARCHGW_REQ_ID:{}] TOKEN_THROUGHPUT: time_per_token={}ms tokens_per_second={}", + self.request_identifier(), + tpot, + 1000 / tpot + ); + // Record the tokens per second + self.metrics.tokens_per_second.record(1000 / tpot); + } + } + Err(e) => { + warn!("SystemTime error: {:?}", e); + } + } + // Record the output sequence length + self.metrics + .output_sequence_length + .record(self.response_tokens as u64); + + if let Some(traceparent) = self.traceparent.as_ref() { + let current_time_ns = current_time_ns(); + + match Traceparent::try_from(traceparent.to_string()) { + Err(e) => { + warn!("traceparent header is invalid: {}", e); + } + Ok(traceparent) => { + let mut trace_data = common::tracing::TraceData::new(); + let mut llm_span = Span::new( + "egress_traffic".to_string(), + Some(traceparent.trace_id), + Some(traceparent.parent_id), + self.request_body_sent_time.unwrap(), + current_time_ns, + ); + llm_span + .add_attribute("model".to_string(), self.llm_provider().name.to_string()); + + if let Some(user_message) = &self.user_message { + llm_span.add_attribute("user_message".to_string(), user_message.clone()); + } + + if self.ttft_time.is_some() { + llm_span.add_event(Event::new( + "time_to_first_token".to_string(), + self.ttft_time.unwrap(), + )); + trace_data.add_span(llm_span); + } + + self.traces_queue.lock().unwrap().push_back(trace_data); + } + }; + } + } + + fn read_raw_response_body(&mut self, body_size: usize) -> Result, Action> { + if self.streaming_response { + let chunk_size = body_size; + debug!( + "[ARCHGW_REQ_ID:{}] UPSTREAM_RESPONSE_CHUNK: streaming=true chunk_size={}", + self.request_identifier(), + chunk_size + ); + let streaming_chunk = match self.get_http_response_body(0, chunk_size) { + Some(chunk) => chunk, + None => { + warn!( + "[ARCHGW_REQ_ID:{}] UPSTREAM_RESPONSE_ERROR: empty chunk, size={}", + self.request_identifier(), + chunk_size + ); + return Err(Action::Continue); + } + }; + + if streaming_chunk.len() != chunk_size { + warn!( + "[ARCHGW_REQ_ID:{}] UPSTREAM_RESPONSE_MISMATCH: expected={} actual={}", + self.request_identifier(), + chunk_size, + streaming_chunk.len() + ); + } + Ok(streaming_chunk) + } else { + if body_size == 0 { + return Err(Action::Continue); + } + debug!( + "[ARCHGW_REQ_ID:{}] UPSTREAM_RESPONSE_COMPLETE: streaming=false body_size={}", + self.request_identifier(), + body_size + ); + match self.get_http_response_body(0, body_size) { + Some(body) => Ok(body), + None => { + warn!("non streaming response body empty"); + Err(Action::Continue) + } + } + } + } + + fn debug_log_body(&self, body: &[u8]) { + debug!( + "[ARCHGW_REQ_ID:{}] UPSTREAM_RAW_RESPONSE: body_size={} content={}", + self.request_identifier(), + body.len(), + String::from_utf8_lossy(body) + ); + } + + fn handle_streaming_response( + &mut self, + body: &[u8], + provider_id: ProviderId, + ) -> Result, Action> { + debug!( + "[ARCHGW_REQ_ID:{}] STREAMING_PROCESS: provider_id={:?} chunk_size={}", + self.request_identifier(), + provider_id, + body.len() + ); + match self.client_api.as_ref() { + Some(client_api) => { + let client_api = client_api.clone(); // Clone to avoid borrowing issues + let upstream_api = provider_id.compatible_api_for_client(&client_api); + + // Parse body into SSE iterator using TryFrom + let sse_iter: SseStreamIter> = + match SseStreamIter::try_from(body) { + Ok(iter) => iter, + Err(e) => { + warn!("Failed to parse body into SSE iterator: {}", e); + return Err(Action::Continue); + } + }; + + let mut response_buffer = Vec::new(); + + // Process each SSE event + for sse_event in sse_iter { + // Transform event if upstream API != client API + let transformed_event: SseEvent = + match SseEvent::try_from((sse_event, &client_api, &upstream_api)) { + Ok(event) => event, + Err(e) => { + warn!("Failed to transform SSE event: {}", e); + return Err(Action::Continue); + } + }; + + // Extract ProviderStreamResponse for processing (token counting, etc.) + if !transformed_event.is_done() { + match transformed_event.provider_response() { + Ok(provider_response) => { + self.record_ttft_if_needed(); + + if provider_response.is_final() { + debug!( + "[ARCHGW_REQ_ID:{}] STREAMING_FINAL_CHUNK: total_tokens={}", + self.request_identifier(), + self.response_tokens + ); + } + + if let Some(content) = provider_response.content_delta() { + let estimated_tokens = content.len() / 4; + self.response_tokens += estimated_tokens.max(1); + debug!( + "[ARCHGW_REQ_ID:{}] STREAMING_TOKEN_UPDATE: delta_chars={} estimated_tokens={} total_tokens={}", + self.request_identifier(), + content.len(), + estimated_tokens.max(1), + self.response_tokens + ); + } + } + Err(e) => { + warn!( + "[ARCHGW_REQ_ID:{}] STREAMING_CHUNK_ERROR: {}", + self.request_identifier(), + e + ); + return Err(Action::Continue); + } + } + } + + // Add transformed event to response buffer + let bytes: Vec = transformed_event.into(); + response_buffer.extend_from_slice(&bytes); + } + + Ok(response_buffer) + } + None => { + warn!("Missing client_api for non-streaming response"); + Err(Action::Continue) + } + } + } + + fn handle_non_streaming_response( + &mut self, + body: &[u8], + provider_id: ProviderId, + ) -> Result, Action> { + info!( + "[ARCHGW_REQ_ID:{}] NON_STREAMING_PROCESS: provider_id={:?} body_size={}", + self.request_identifier(), + provider_id, + body.len() + ); + + let response: ProviderResponseType = match self.client_api.as_ref() { + Some(client_api) => { + match ProviderResponseType::try_from((body, client_api, &provider_id)) { + Ok(response) => response, + Err(e) => { + warn!( + "[ARCHGW_REQ_ID:{}] UPSTREAM_RESPONSE_PARSE_ERROR: {} | body: {}", + self.request_identifier(), + e, + String::from_utf8_lossy(body) + ); + self.send_server_error( + ServerError::LogicError(format!("Response parsing error: {}", e)), + Some(StatusCode::BAD_REQUEST), + ); + return Err(Action::Continue); + } + } + } + None => { + warn!( + "[ARCHGW_REQ_ID:{}] UPSTREAM_RESPONSE_ERROR: missing client_api", + self.request_identifier() + ); + return Err(Action::Continue); + } + }; + + // Use provider interface to extract usage information + if let Some((prompt_tokens, completion_tokens, total_tokens)) = + response.extract_usage_counts() + { + info!( + "[ARCHGW_REQ_ID:{}] RESPONSE_USAGE: prompt_tokens={} completion_tokens={} total_tokens={}", + self.request_identifier(), + prompt_tokens, + completion_tokens, + total_tokens + ); + self.response_tokens = completion_tokens; + } else { + warn!( + "[ARCHGW_REQ_ID:{}] RESPONSE_USAGE: no usage information found", + self.request_identifier() + ); + } + // Serialize the normalized response back to JSON bytes + match serde_json::to_vec(&response) { + Ok(bytes) => { + debug!( + "[ARCHGW_REQ_ID:{}] CLIENT_RESPONSE_PAYLOAD: {}", + self.request_identifier(), + String::from_utf8_lossy(&bytes) + ); + Ok(bytes) + } + Err(e) => { + warn!("Failed to serialize normalized response: {}", e); + self.send_server_error( + ServerError::LogicError(format!("Response serialization error: {}", e)), + Some(StatusCode::INTERNAL_SERVER_ERROR), + ); + Err(Action::Continue) + } + } + } } // HttpContext is the trait that allows the Rust code to interact with HTTP objects. @@ -212,8 +577,6 @@ impl HttpContext for StreamContext { return Action::Continue; } - self.is_chat_completions_request = CHAT_COMPLETIONS_PATH == request_path; - let use_agent_orchestrator = match self.overrides.as_ref() { Some(overrides) => overrides.use_agent_orchestrator.unwrap_or_default(), None => false, @@ -227,10 +590,35 @@ impl HttpContext for StreamContext { self.llm_provider = Some(Rc::new(LlmProvider { name: routing_header_value.to_string(), provider_interface: LlmProviderType::OpenAI, - ..Default::default() + ..Default::default() //TODO: THiS IS BROKEN. WHY ARE WE ASSUMING OPENAI FOR UPSTREAM? })); } else { + //TODO: Fix this brittle code path. We need to return values and have compile time self.select_llm_provider(); + + // Check if this is a supported API endpoint + if SupportedAPIs::from_endpoint(&request_path).is_none() { + self.send_http_response(404, vec![], Some(b"Unsupported endpoint")); + return Action::Continue; + } + + // Get the SupportedApi for routing decisions + let supported_api: Option = SupportedAPIs::from_endpoint(&request_path); + self.client_api = supported_api; + + // Debug: log provider, client API, resolved API, and request path + if let (Some(api), Some(provider)) = + (self.client_api.as_ref(), self.llm_provider.as_ref()) + { + let provider_id = provider.to_provider_id(); + self.resolved_api = Some(provider_id.compatible_api_for_client(api)); + } else { + self.resolved_api = None; + } + + //We need to update the upstream path if there is a variation for a provider like Gemini/Groq, etc. + self.update_upstream_path(&request_path); + if self.llm_provider().endpoint.is_some() { self.add_http_request_header( ARCH_ROUTING_HEADER, @@ -265,8 +653,10 @@ impl HttpContext for StreamContext { fn on_http_request_body(&mut self, body_size: usize, end_of_stream: bool) -> Action { debug!( - "on_http_request_body [S={}] bytes={} end_stream={}", - self.context_id, body_size, end_of_stream + "[ARCHGW_REQ_ID:{}] REQUEST_BODY_CHUNK: bytes={} end_stream={}", + self.request_identifier(), + body_size, + end_of_stream ); // Let the client send the gateway all the data before sending to the LLM_provider. @@ -298,23 +688,47 @@ impl HttpContext for StreamContext { } }; - let provider_id = self.get_provider_id(); + //We need to deserialize the request body based on the resolved API + let mut deserialized_client_request: ProviderRequestType = match self.client_api.as_ref() { + Some(the_client_api) => { + info!( + "[ARCHGW_REQ_ID:{}] CLIENT_REQUEST_RECEIVED: api={:?} body_size={}", + self.request_identifier(), + the_client_api, + body_bytes.len() + ); - let mut deserialized_body = - match ProviderRequestType::try_from((&body_bytes[..], &provider_id)) { - Ok(deserialized) => deserialized, - Err(e) => { - debug!( - "on_http_request_body: request body: {}", - String::from_utf8_lossy(&body_bytes) - ); - self.send_server_error( - ServerError::LogicError(format!("Request parsing error: {}", e)), - Some(StatusCode::BAD_REQUEST), - ); - return Action::Pause; + debug!( + "[ARCHGW_REQ_ID:{}] CLIENT_REQUEST_PAYLOAD: {}", + self.request_identifier(), + String::from_utf8_lossy(&body_bytes) + ); + + match ProviderRequestType::try_from((&body_bytes[..], the_client_api)) { + Ok(deserialized) => deserialized, + Err(e) => { + warn!( + "[ARCHGW_REQ_ID:{}] CLIENT_REQUEST_PARSE_ERROR: {} | body: {}", + self.request_identifier(), + e, + String::from_utf8_lossy(&body_bytes) + ); + self.send_server_error( + ServerError::LogicError(format!("Request parsing error: {}", e)), + Some(StatusCode::BAD_REQUEST), + ); + return Action::Pause; + } } - }; + } + None => { + self.send_server_error( + ServerError::LogicError("No resolved API for provider".to_string()), + Some(StatusCode::BAD_REQUEST), + ); + return Action::Pause; + } + }; let model_name = match self.llm_provider.as_ref() { Some(llm_provider) => llm_provider.model.as_ref(), @@ -327,7 +741,7 @@ impl HttpContext for StreamContext { }; // Store the original model for logging - let model_requested = deserialized_body.model().to_string(); + let model_requested = deserialized_client_request.model().to_string(); // Apply model name resolution logic using the trait method let resolved_model = match model_name { @@ -336,6 +750,13 @@ impl HttpContext for StreamContext { if use_agent_orchestrator { "agent_orchestrator".to_string() } else { + warn!( + "[ARCHGW_REQ_ID:{}] MODEL_RESOLUTION_ERROR: no model specified | req_model='{}' provider='{}' config_model={:?}", + self.request_identifier(), + model_requested, + self.llm_provider().name, + self.llm_provider().model + ); self.send_server_error( ServerError::BadRequest { why: format!( @@ -353,23 +774,25 @@ impl HttpContext for StreamContext { }; // Set the resolved model using the trait method - deserialized_body.set_model(resolved_model.clone()); + deserialized_client_request.set_model(resolved_model.clone()); // Extract user message for tracing - self.user_message = deserialized_body.get_recent_user_message(); + self.user_message = deserialized_client_request.get_recent_user_message(); info!( - "on_http_request_body: provider: {}, model requested (in body): {}, model selected: {}", - self.llm_provider().name, + "[ARCHGW_REQ_ID:{}] MODEL_RESOLUTION: req_model='{}' -> resolved_model='{}' provider='{}' streaming={}", + self.request_identifier(), model_requested, - model_name.unwrap_or(&"None".to_string()), + resolved_model, + self.llm_provider().name, + deserialized_client_request.is_streaming() ); // Use provider interface for streaming detection and setup - self.streaming_response = deserialized_body.is_streaming(); + self.streaming_response = deserialized_client_request.is_streaming(); // Use provider interface for text extraction (after potential mutation) - let input_tokens_str = deserialized_body.extract_messages_text(); + let input_tokens_str = deserialized_client_request.extract_messages_text(); // enforce ratelimits on ingress if let Err(e) = self.enforce_ratelimits(&resolved_model, input_tokens_str.as_str()) { self.send_server_error( @@ -381,28 +804,64 @@ impl HttpContext for StreamContext { } // Convert chat completion request to llm provider specific request using provider interface - let deserialized_body_bytes = match deserialized_body.to_bytes() { - Ok(bytes) => bytes, - Err(e) => { - warn!("Failed to serialize request body: {}", e); - self.send_server_error( - ServerError::LogicError(format!("Request serialization error: {}", e)), - Some(StatusCode::BAD_REQUEST), + let serialized_body_bytes_upstream = + match self.resolved_api.as_ref() { + Some(upstream) => { + info!( + "[ARCHGW_REQ_ID:{}] UPSTREAM_TRANSFORM: client_api={:?} -> upstream_api={:?}", + self.request_identifier(), self.client_api, upstream ); - return Action::Pause; - } - }; - self.set_http_request_body(0, body_size, &deserialized_body_bytes); + match ProviderRequestType::try_from((deserialized_client_request, upstream)) { + Ok(request) => { + debug!( + "[ARCHGW_REQ_ID:{}] UPSTREAM_REQUEST_PAYLOAD: {}", + self.request_identifier(), + String::from_utf8_lossy(&request.to_bytes().unwrap_or_default()) + ); + match request.to_bytes() { + Ok(bytes) => bytes, + Err(e) => { + warn!("Failed to serialize request body: {}", e); + self.send_server_error( + ServerError::LogicError(format!( + "Request serialization error: {}", + e + )), + Some(StatusCode::BAD_REQUEST), + ); + return Action::Pause; + } + } + } + Err(e) => { + warn!("Failed to create provider request: {}", e); + self.send_server_error( + ServerError::LogicError(format!("Provider request error: {}", e)), + Some(StatusCode::BAD_REQUEST), + ); + return Action::Pause; + } + } + } + None => { + warn!("No upstream API resolved"); + self.send_server_error( + ServerError::LogicError("No upstream API resolved".into()), + Some(StatusCode::BAD_REQUEST), + ); + return Action::Pause; + } + }; + + self.set_http_request_body(0, body_size, &serialized_body_bytes_upstream); Action::Continue } - fn on_http_response_headers(&mut self, _num_headers: usize, end_of_stream: bool) -> Action { - debug!( - "on_http_response_headers [S={}] end_stream={}", - self.context_id, end_of_stream - ); + fn on_http_response_headers(&mut self, _num_headers: usize, _end_of_stream: bool) -> Action { + self.remove_http_response_header("content-length"); + self.remove_http_response_header("content-encoding"); self.set_property( vec!["metadata", "filter_metadata", "llm_filter", "user_prompt"], @@ -413,248 +872,57 @@ impl HttpContext for StreamContext { } fn on_http_response_body(&mut self, body_size: usize, end_of_stream: bool) -> Action { - debug!( - "on_http_response_body [S={}] bytes={} end_stream={}", - self.context_id, body_size, end_of_stream - ); - if self.request_body_sent_time.is_none() { debug!("on_http_response_body: request body not sent, not doing any processing in llm filter"); return Action::Continue; } - if !self.is_chat_completions_request { - info!("on_http_response_body: non-chatcompletion request"); - return Action::Continue; + match self.client_api { + Some(SupportedAPIs::OpenAIChatCompletions(_)) => {} + Some(SupportedAPIs::AnthropicMessagesAPI(_)) => {} + _ => { + let api_info = match &self.client_api { + Some(api) => format!("{}", api), + None => "None".to_string(), + }; + info!( + "[ARCHGW_REQ_ID:{}], UNSUPPORTED API: {}", + self.request_identifier(), + api_info + ); + return Action::Continue; + } } let current_time = get_current_time().unwrap(); if end_of_stream && body_size == 0 { - // All streaming responses end with bytes=0 and end_stream=true - // Record the latency for the request - match current_time.duration_since(self.start_time) { - Ok(duration) => { - // Convert the duration to milliseconds - let duration_ms = duration.as_millis(); - info!("on_http_response_body: request latency: {}ms", duration_ms); - // Record the latency to the latency histogram - self.metrics.request_latency.record(duration_ms as u64); - - if self.response_tokens > 0 { - // Compute the time per output token - let tpot = duration_ms as u64 / self.response_tokens as u64; - - // Record the time per output token - self.metrics.time_per_output_token.record(tpot); - - debug!( - "time per token: {}ms, tokens per second: {}", - tpot, - 1000 / tpot - ); - // Record the tokens per second - self.metrics.tokens_per_second.record(1000 / tpot); - } - } - Err(e) => { - warn!("SystemTime error: {:?}", e); - } - } - // Record the output sequence length - self.metrics - .output_sequence_length - .record(self.response_tokens as u64); - - if let Some(traceparent) = self.traceparent.as_ref() { - let current_time_ns = current_time_ns(); - - match Traceparent::try_from(traceparent.to_string()) { - Err(e) => { - warn!("traceparent header is invalid: {}", e); - } - Ok(traceparent) => { - let mut trace_data = common::tracing::TraceData::new(); - let mut llm_span = Span::new( - "egress_traffic".to_string(), - Some(traceparent.trace_id), - Some(traceparent.parent_id), - self.request_body_sent_time.unwrap(), - current_time_ns, - ); - llm_span.add_attribute( - "model".to_string(), - self.llm_provider().name.to_string(), - ); - - if let Some(user_message) = &self.user_message { - llm_span - .add_attribute("user_message".to_string(), user_message.clone()); - } - - if self.ttft_time.is_some() { - llm_span.add_event(Event::new( - "time_to_first_token".to_string(), - self.ttft_time.unwrap(), - )); - trace_data.add_span(llm_span); - } - - self.traces_queue.lock().unwrap().push_back(trace_data); - } - }; - } - + self.handle_end_of_stream_metrics_and_traces(current_time); return Action::Continue; } - let body = if self.streaming_response { - let chunk_start = 0; - let chunk_size = body_size; - debug!( - "on_http_response_body: streaming response reading, {}..{}", - chunk_start, chunk_size - ); - let streaming_chunk = match self.get_http_response_body(0, chunk_size) { - Some(chunk) => chunk, - None => { - warn!( - "response body empty, chunk_start: {}, chunk_size: {}", - chunk_start, chunk_size - ); - return Action::Continue; - } - }; - - if streaming_chunk.len() != chunk_size { - warn!( - "chunk size mismatch: read: {} != requested: {}", - streaming_chunk.len(), - chunk_size - ); - } - streaming_chunk - } else { - if body_size == 0 { - return Action::Continue; - } - debug!("non streaming response bytes read: 0:{}", body_size); - match self.get_http_response_body(0, body_size) { - Some(body) => body, - None => { - warn!("non streaming response body empty"); - return Action::Continue; - } - } + let body = match self.read_raw_response_body(body_size) { + Ok(bytes) => bytes, + Err(action) => return action, }; - if log::log_enabled!(log::Level::Debug) { - debug!( - "response data (converted to utf8): {}", - String::from_utf8_lossy(&body) - ); - } + self.debug_log_body(&body); + let provider_id = self.get_provider_id(); if self.streaming_response { - debug!("processing streaming response"); - match ProviderStreamResponseIter::try_from((&body[..], &self.get_provider_id())) { - Ok(mut streaming_response) => { - // Process each streaming chunk - while let Some(chunk_result) = streaming_response.next() { - match chunk_result { - Ok(chunk) => { - // Compute TTFT on first chunk - if self.ttft_duration.is_none() { - let current_time = get_current_time().unwrap(); - self.ttft_time = Some(current_time_ns()); - match current_time.duration_since(self.start_time) { - Ok(duration) => { - let duration_ms = duration.as_millis(); - info!( - "on_http_response_body: time to first token: {}ms", - duration_ms - ); - self.ttft_duration = Some(duration); - self.metrics - .time_to_first_token - .record(duration_ms as u64); - } - Err(e) => { - warn!("SystemTime error: {:?}", e); - } - } - } - - // For streaming responses, we handle token counting differently - // The ProviderStreamResponse trait provides content_delta, is_final, and role - // Token counting for streaming responses typically happens with final usage chunk - if chunk.is_final() { - // For now, we'll implement basic token estimation - // In a complete implementation, the final chunk would contain usage information - debug!("Received final streaming chunk"); - } - - // For now, estimate tokens from content delta - if let Some(content) = chunk.content_delta() { - // Rough estimation: ~4 characters per token - let estimated_tokens = content.len() / 4; - self.response_tokens += estimated_tokens.max(1); - } - } - Err(e) => { - warn!("Error processing streaming chunk: {}", e); - return Action::Continue; - } - } - } - } - Err(e) => { - warn!("Failed to parse streaming response: {}", e); + match self.handle_streaming_response(&body, provider_id) { + Ok(serialized_body) => { + self.set_http_response_body(0, body_size, &serialized_body); } + Err(action) => return action, } } else { - debug!("non streaming response"); - let provider_id = self.get_provider_id(); - let response: ProviderResponseType = - match ProviderResponseType::try_from((&body[..], provider_id)) { - Ok(response) => response, - Err(e) => { - warn!( - "could not parse response: {}, body str: {}", - e, - String::from_utf8_lossy(&body) - ); - debug!( - "on_http_response_body: S[{}], response body: {}", - self.context_id, - String::from_utf8_lossy(&body) - ); - self.send_server_error( - ServerError::LogicError(format!("Response parsing error: {}", e)), - Some(StatusCode::BAD_REQUEST), - ); - return Action::Continue; - } - }; - - // Use provider interface to extract usage information - if let Some((prompt_tokens, completion_tokens, total_tokens)) = - response.extract_usage_counts() - { - debug!( - "Response usage: prompt={}, completion={}, total={}", - prompt_tokens, completion_tokens, total_tokens - ); - self.response_tokens = completion_tokens; - } else { - warn!("No usage information found in response"); + match self.handle_non_streaming_response(&body, provider_id) { + Ok(serialized_body) => { + self.set_http_response_body(0, body_size, &serialized_body); + } + Err(action) => return action, } } - - debug!( - "recv [S={}] total_tokens={} end_stream={}", - self.context_id, self.response_tokens, end_of_stream - ); - Action::Continue } } diff --git a/crates/llm_gateway/tests/integration.rs b/crates/llm_gateway/tests/integration.rs index 82ae8322..48d4bc86 100644 --- a/crates/llm_gateway/tests/integration.rs +++ b/crates/llm_gateway/tests/integration.rs @@ -31,14 +31,15 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) { ) .returning(None) .expect_log( - Some(LogLevel::Debug), - Some("request received: llm provider hint: default, selected provider: open-ai-gpt-4"), + Some(LogLevel::Info), + None, // Dynamic request ID - could be context_id or x-request-id ) .expect_add_header_map_value( Some(MapType::HttpRequestHeaders), Some("x-arch-llm-provider"), Some("openai"), ) + .expect_remove_header_map_value(Some(MapType::HttpRequestHeaders), Some("x-api-key")) .expect_replace_header_map_value( Some(MapType::HttpRequestHeaders), Some("Authorization"), @@ -193,10 +194,7 @@ fn llm_gateway_successful_request_to_open_ai_chat_completions() { module .call_proxy_on_context_create(http_context, filter_context) - .expect_log( - Some(LogLevel::Trace), - Some("||| create_http_context called with context_id: 2 |||"), - ) + .expect_log(Some(LogLevel::Trace), None) .execute_and_expect(ReturnType::None) .unwrap(); @@ -211,15 +209,19 @@ fn llm_gateway_successful_request_to_open_ai_chat_completions() { chat_completions_request_body.len() as i32, true, ) - .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID - REQUEST_BODY_CHUNK .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) .returning(Some(chat_completions_request_body)) - .expect_log(Some(LogLevel::Info), None) - .expect_log(Some(LogLevel::Debug), None) - .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - CLIENT_REQUEST_RECEIVED + .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID - CLIENT_REQUEST_PAYLOAD + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - MODEL_RESOLUTION + .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4")) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - TOKEN_COUNT .expect_metric_record("input_sequence_length", 21) - .expect_log(Some(LogLevel::Debug), None) - .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - RATELIMIT_CHECK + .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=21")) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - UPSTREAM_TRANSFORM + .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID - UPSTREAM_REQUEST_PAYLOAD .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None) .execute_and_expect(ReturnType::Action(Action::Continue)) .unwrap(); @@ -263,15 +265,19 @@ fn llm_gateway_bad_request_to_open_ai_chat_completions() { incomplete_chat_completions_request_body.len() as i32, true, ) + .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID - REQUEST_BODY_CHUNK .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) .returning(Some(incomplete_chat_completions_request_body)) - .expect_log(Some(LogLevel::Debug), None) - .expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): gpt-1, model selected: gpt-4")) - .expect_log(Some(LogLevel::Debug), Some("getting token count model=gpt-4")) - .expect_log(Some(LogLevel::Debug), Some("Recorded input token count: 13")) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - CLIENT_REQUEST_RECEIVED + .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID - CLIENT_REQUEST_PAYLOAD + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - MODEL_RESOLUTION + .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4")) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - TOKEN_COUNT .expect_metric_record("input_sequence_length", 13) - .expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4")) - .expect_log(Some(LogLevel::Debug), Some(r#"Checking limit for provider=gpt-4, with selector=Header { key: "selector-key", value: "selector-value" }, consuming tokens=13"#)) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - RATELIMIT_CHECK + .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=13")) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - RATELIMIT_CHECK + .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}")) .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None) .execute_and_expect(ReturnType::Action(Action::Continue)) .unwrap(); @@ -322,16 +328,18 @@ fn llm_gateway_request_ratelimited() { chat_completions_request_body.len() as i32, true, ) - .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID) .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) .returning(Some(chat_completions_request_body)) // The actual call is not important in this test, we just need to grab the token_id - .expect_log(Some(LogLevel::Info), None) - .expect_log(Some(LogLevel::Debug), None) - .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) + .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] CLIENT_REQUEST_PAYLOAD: {\"messages\": [{\"role\": \"system\",\"content\": \"You are a helpful poetic assistant!, skilled in explaining complex programming concepts with creative flair. Be sure to be concise and to the point.\"},{\"role\": \"user\",\"content\": \"Compose a poem that explains the concept of recursion in programming. Compose a poem that explains the concept of recursion in programming. Compose a poem that explains the concept of recursion in programming. And also summarize it how a 4th graded would understand it. Compose a poem that explains the concept of recursion in programming. And also summarize it how a 4th graded would understand it.\"}],\"model\": \"gpt-4\"}")) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) + .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4")) + .expect_log(Some(LogLevel::Info), None)// Dynamic request ID) .expect_metric_record("input_sequence_length", 107) - .expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4")) - .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) + .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=107")) .expect_log(Some(LogLevel::Warn), Some(r#"server error occurred: exceeded limit provider=gpt-4, selector=Header { key: "selector-key", value: "selector-value" }, tokens_used=107"#)) .expect_send_local_response( Some(StatusCode::TOO_MANY_REQUESTS.as_u16().into()), @@ -376,16 +384,21 @@ fn llm_gateway_request_not_ratelimited() { chat_completions_request_body.len() as i32, true, ) - .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID) .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) .returning(Some(chat_completions_request_body)) // The actual call is not important in this test, we just need to grab the token_id .expect_log(Some(LogLevel::Info), None) - .expect_log(Some(LogLevel::Debug), Some("getting token count model=gpt-4")) - .expect_log(Some(LogLevel::Debug), Some("Recorded input token count: 29")) + // Dynamic request ID) + .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}")) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) + .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4")) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) .expect_metric_record("input_sequence_length", 29) - .expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4")) - .expect_log(Some(LogLevel::Debug), Some(r#"Checking limit for provider=gpt-4, with selector=Header { key: "selector-key", value: "selector-value" }, consuming tokens=29"#)) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) + .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29")) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) + .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}")) .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None) .execute_and_expect(ReturnType::Action(Action::Continue)) .unwrap(); @@ -423,16 +436,20 @@ fn llm_gateway_override_model_name() { chat_completions_request_body.len() as i32, true, ) + .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID) .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) .returning(Some(chat_completions_request_body)) // The actual call is not important in this test, we just need to grab the token_id - .expect_log(Some(LogLevel::Debug), None) - .expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): gpt-1, model selected: gpt-4")) - .expect_log(Some(LogLevel::Debug), Some("getting token count model=gpt-4")) - .expect_log(Some(LogLevel::Debug), Some("Recorded input token count: 29")) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) + .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}")) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) + .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4")) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) .expect_metric_record("input_sequence_length", 29) - .expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4")) - .expect_log(Some(LogLevel::Debug), Some(r#"Checking limit for provider=gpt-4, with selector=Header { key: "selector-key", value: "selector-value" }, consuming tokens=29"#)) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) + .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29")) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) + .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}")) .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None) .execute_and_expect(ReturnType::Action(Action::Continue)) .unwrap(); @@ -470,19 +487,23 @@ fn llm_gateway_override_use_default_model() { chat_completions_request_body.len() as i32, true, ) - .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID) .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) .returning(Some(chat_completions_request_body)) // The actual call is not important in this test, we just need to grab the token_id + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) + .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}")) .expect_log( Some(LogLevel::Info), - Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): gpt-1, model selected: gpt-4"), + None // Dynamic request ID, ) - .expect_log(Some(LogLevel::Debug), Some("getting token count model=gpt-4")) - .expect_log(Some(LogLevel::Debug), Some("Recorded input token count: 29")) + .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4")) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) .expect_metric_record("input_sequence_length", 29) - .expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4")) - .expect_log(Some(LogLevel::Debug), Some(r#"Checking limit for provider=gpt-4, with selector=Header { key: "selector-key", value: "selector-value" }, consuming tokens=29"#)) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) + .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29")) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) + .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}")) .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None) .execute_and_expect(ReturnType::Action(Action::Continue)) .unwrap(); @@ -520,16 +541,21 @@ fn llm_gateway_override_use_model_name_none() { chat_completions_request_body.len() as i32, true, ) + .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID) .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) .returning(Some(chat_completions_request_body)) // The actual call is not important in this test, we just need to grab the token_id - .expect_log(Some(LogLevel::Debug), None) - .expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): none, model selected: gpt-4")) - .expect_log(Some(LogLevel::Debug), Some("getting token count model=gpt-4")) - .expect_log(Some(LogLevel::Debug), Some("Recorded input token count: 29")) + .expect_log(Some(LogLevel::Info), None) + // Dynamic request ID) + .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] CLIENT_REQUEST_PAYLOAD: {\"model\":\"none\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}")) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) + .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4")) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) .expect_metric_record("input_sequence_length", 29) - .expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4")) - .expect_log(Some(LogLevel::Debug), Some(r#"Checking limit for provider=gpt-4, with selector=Header { key: "selector-key", value: "selector-value" }, consuming tokens=29"#)) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) + .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29")) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) + .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}")) .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None) .execute_and_expect(ReturnType::Action(Action::Continue)) .unwrap(); diff --git a/demos/samples_java/weather_forcecast_service/src/main/java/weather/controller/WeatherController.java b/demos/samples_java/weather_forcecast_service/src/main/java/weather/controller/WeatherController.java index bb41227a..d7caeaf9 100644 --- a/demos/samples_java/weather_forcecast_service/src/main/java/weather/controller/WeatherController.java +++ b/demos/samples_java/weather_forcecast_service/src/main/java/weather/controller/WeatherController.java @@ -7,7 +7,6 @@ import org.springframework.web.bind.annotation.PostMapping; import org.springframework.web.bind.annotation.RequestBody; import org.springframework.web.bind.annotation.RestController; -import java.time.Instant; import java.time.LocalDate; import java.util.ArrayList; import java.util.List; diff --git a/demos/samples_python/weather_forecast/arch_config.yaml b/demos/samples_python/weather_forecast/arch_config.yaml index afc0ef04..6d33a5c5 100644 --- a/demos/samples_python/weather_forecast/arch_config.yaml +++ b/demos/samples_python/weather_forecast/arch_config.yaml @@ -7,6 +7,12 @@ listeners: message_format: openai timeout: 30s + egress_traffic: + address: 0.0.0.0 + port: 12000 + message_format: openai + timeout: 30s + endpoints: weather_forecast_service: endpoint: host.docker.internal:18083 @@ -24,6 +30,12 @@ llm_providers: model: openai/gpt-4o default: true + - access_key: $OPENAI_API_KEY + model: openai/gpt-4o-mini + + - access_key: $ANTHROPIC_API_KEY + model: anthropic/claude-sonnet-4-20250514 + system_prompt: | You are a helpful assistant. diff --git a/demos/shared/chatbot_ui/requirements.txt b/demos/shared/chatbot_ui/requirements.txt index b8e20cba..da4ac00b 100644 --- a/demos/shared/chatbot_ui/requirements.txt +++ b/demos/shared/chatbot_ui/requirements.txt @@ -5,4 +5,4 @@ asyncio==3.4.3 httpx==0.27.0 python-dotenv==1.0.1 pydantic==2.8.2 -openai==1.51.0 +openai==1.54.0 diff --git a/demos/use_cases/llm_routing/arch_config.yaml b/demos/use_cases/llm_routing/arch_config.yaml index addaae66..f90643ff 100644 --- a/demos/use_cases/llm_routing/arch_config.yaml +++ b/demos/use_cases/llm_routing/arch_config.yaml @@ -23,10 +23,10 @@ llm_providers: model: mistral/ministral-3b-latest - access_key: $ANTHROPIC_API_KEY - model: claude/claude-3-7-sonnet-latest + model: anthropic/claude-3-7-sonnet-latest - access_key: $ANTHROPIC_API_KEY - model: claude/claude-sonnet-4-0 + model: anthropic/claude-sonnet-4-0 - access_key: $DEEPSEEK_API_KEY model: deepseek/deepseek-reasoner diff --git a/demos/use_cases/preference_based_routing/arch_config.yaml b/demos/use_cases/preference_based_routing/arch_config.yaml index 33136325..cb9f685a 100644 --- a/demos/use_cases/preference_based_routing/arch_config.yaml +++ b/demos/use_cases/preference_based_routing/arch_config.yaml @@ -19,8 +19,8 @@ llm_providers: - name: code understanding description: understand and explain existing code snippets, functions, or libraries - - model: openai/gpt-4.1 - access_key: $OPENAI_API_KEY + - model: anthropic/claude-sonnet-4-20250514 + access_key: $ANTHROPIC_API_KEY routing_preferences: - name: code generation description: generating new code snippets, functions, or boilerplate based on user prompts or requirements diff --git a/demos/use_cases/preference_based_routing/hurl_tests/simple.hurl b/demos/use_cases/preference_based_routing/hurl_tests/simple.hurl index d9b243e7..1aa56271 100644 --- a/demos/use_cases/preference_based_routing/hurl_tests/simple.hurl +++ b/demos/use_cases/preference_based_routing/hurl_tests/simple.hurl @@ -2,7 +2,7 @@ POST http://localhost:12000/v1/chat/completions Content-Type: application/json { - "model": "openai/gpt-4.1", + "model": "openai/gpt-4o-mini", "messages": [ { "role": "user", @@ -13,7 +13,7 @@ Content-Type: application/json HTTP 200 [Asserts] header "content-type" == "application/json" -jsonpath "$.model" matches /^gpt-4.1/ +jsonpath "$.model" matches /^gpt-4o-mini/ jsonpath "$.usage" != null jsonpath "$.choices[0].message.content" != null jsonpath "$.choices[0].message.role" == "assistant" diff --git a/tests/e2e/docker-compose.yaml b/tests/e2e/docker-compose.yaml index 53b4338d..a78c5632 100644 --- a/tests/e2e/docker-compose.yaml +++ b/tests/e2e/docker-compose.yaml @@ -16,5 +16,6 @@ services: environment: - OPENAI_API_KEY=${OPENAI_API_KEY:?error} - MISTRAL_API_KEY=${MISTRAL_API_KEY:?error} + - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:?error} - OTEL_TRACING_HTTP_ENDPOINT=http://host.docker.internal:4318/v1/traces - MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-51000} diff --git a/tests/e2e/poetry.lock b/tests/e2e/poetry.lock index 7e296f88..35f9156b 100644 --- a/tests/e2e/poetry.lock +++ b/tests/e2e/poetry.lock @@ -1,4 +1,63 @@ -# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.2 and should not be changed by hand. + +[[package]] +name = "annotated-types" +version = "0.7.0" +description = "Reusable constraint types to use with typing.Annotated" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"}, + {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, +] + +[[package]] +name = "anthropic" +version = "0.66.0" +description = "The official Python library for the anthropic API" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "anthropic-0.66.0-py3-none-any.whl", hash = "sha256:67b8cd4486f3cdd09211598dc5325cc8e4e349c106a03041231d551603551c06"}, + {file = "anthropic-0.66.0.tar.gz", hash = "sha256:5aa8b18da57dc27d83fc1d82c9fb860977e5adfae3e0c215d7ab2ebd70afb9cb"}, +] + +[package.dependencies] +anyio = ">=3.5.0,<5" +distro = ">=1.7.0,<2" +httpx = ">=0.25.0,<1" +jiter = ">=0.4.0,<1" +pydantic = ">=1.9.0,<3" +sniffio = "*" +typing-extensions = ">=4.10,<5" + +[package.extras] +aiohttp = ["aiohttp", "httpx-aiohttp (>=0.1.8)"] +bedrock = ["boto3 (>=1.28.57)", "botocore (>=1.31.57)"] +vertex = ["google-auth[requests] (>=2,<3)"] + +[[package]] +name = "anyio" +version = "4.10.0" +description = "High-level concurrency and networking framework on top of asyncio or Trio" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "anyio-4.10.0-py3-none-any.whl", hash = "sha256:60e474ac86736bbfd6f210f7a61218939c318f43f9972497381f1c5e930ed3d1"}, + {file = "anyio-4.10.0.tar.gz", hash = "sha256:3f3fae35c96039744587aa5b8371e7e8e603c0702999535961dd336026973ba6"}, +] + +[package.dependencies] +exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""} +idna = ">=2.8" +sniffio = ">=1.1" +typing_extensions = {version = ">=4.5", markers = "python_version < \"3.13\""} + +[package.extras] +trio = ["trio (>=0.26.1)"] [[package]] name = "attrs" @@ -6,18 +65,19 @@ version = "25.1.0" description = "Classes Without Boilerplate" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "attrs-25.1.0-py3-none-any.whl", hash = "sha256:c75a69e28a550a7e93789579c22aa26b0f5b83b75dc4e08fe092980051e1090a"}, {file = "attrs-25.1.0.tar.gz", hash = "sha256:1c97078a80c814273a76b2a298a932eb681c87415c11dee0a6921de7f1b02c3e"}, ] [package.extras] -benchmark = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins", "pytest-xdist[psutil]"] -cov = ["cloudpickle", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] -dev = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pre-commit-uv", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +benchmark = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] +cov = ["cloudpickle ; platform_python_implementation == \"CPython\"", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] +dev = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pre-commit-uv", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier (<24.7)"] -tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] -tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"] +tests = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] +tests-mypy = ["mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\""] [[package]] name = "certifi" @@ -25,6 +85,7 @@ version = "2025.1.31" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.6" +groups = ["main"] files = [ {file = "certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe"}, {file = "certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651"}, @@ -36,6 +97,8 @@ version = "1.17.1" description = "Foreign Function Interface for Python calling C code." optional = false python-versions = ">=3.8" +groups = ["main"] +markers = "os_name == \"nt\" and implementation_name != \"pypy\"" files = [ {file = "cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14"}, {file = "cffi-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67"}, @@ -115,6 +178,7 @@ version = "3.4.1" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "charset_normalizer-3.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de"}, {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176"}, @@ -216,10 +280,12 @@ version = "0.4.6" description = "Cross-platform colored terminal text." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +groups = ["main", "dev"] files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +markers = {main = "sys_platform == \"win32\" or platform_system == \"Windows\"", dev = "sys_platform == \"win32\""} [[package]] name = "coverage" @@ -227,6 +293,7 @@ version = "7.6.12" description = "Code coverage measurement for Python" optional = false python-versions = ">=3.9" +groups = ["dev"] files = [ {file = "coverage-7.6.12-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:704c8c8c6ce6569286ae9622e534b4f5b9759b6f2cd643f1c1a61f666d534fe8"}, {file = "coverage-7.6.12-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ad7525bf0241e5502168ae9c643a2f6c219fa0a283001cee4cf23a9b7da75879"}, @@ -297,7 +364,7 @@ files = [ tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""} [package.extras] -toml = ["tomli"] +toml = ["tomli ; python_full_version <= \"3.11.0a6\""] [[package]] name = "deepdiff" @@ -305,6 +372,7 @@ version = "8.2.0" description = "Deep Difference and Search of any Python object/data. Recreate objects by adding adding deltas to each other." optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "deepdiff-8.2.0-py3-none-any.whl", hash = "sha256:5091f2cdfd372b1b9f6bfd8065ba323ae31118dc4e42594371b38c8bea3fd0a4"}, {file = "deepdiff-8.2.0.tar.gz", hash = "sha256:6ec78f65031485735545ffbe7a61e716c3c2d12ca6416886d5e9291fc76c46c3"}, @@ -317,12 +385,26 @@ orderly-set = ">=5.3.0,<6" cli = ["click (==8.1.8)", "pyyaml (==6.0.2)"] optimize = ["orjson"] +[[package]] +name = "distro" +version = "1.9.0" +description = "Distro - an OS platform information API" +optional = false +python-versions = ">=3.6" +groups = ["main"] +files = [ + {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"}, + {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"}, +] + [[package]] name = "exceptiongroup" version = "1.2.2" description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" +groups = ["main", "dev"] +markers = "python_version == \"3.10\"" files = [ {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, @@ -337,17 +419,66 @@ version = "0.14.0" description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, ] +[[package]] +name = "httpcore" +version = "1.0.8" +description = "A minimal low-level HTTP client." +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "httpcore-1.0.8-py3-none-any.whl", hash = "sha256:5254cf149bcb5f75e9d1b2b9f729ea4a4b883d1ad7379fc632b727cec23674be"}, + {file = "httpcore-1.0.8.tar.gz", hash = "sha256:86e94505ed24ea06514883fd44d2bc02d90e77e7979c8eb71b90f41d364a1bad"}, +] + +[package.dependencies] +certifi = "*" +h11 = ">=0.13,<0.15" + +[package.extras] +asyncio = ["anyio (>=4.0,<5.0)"] +http2 = ["h2 (>=3,<5)"] +socks = ["socksio (==1.*)"] +trio = ["trio (>=0.22.0,<1.0)"] + +[[package]] +name = "httpx" +version = "0.28.1" +description = "The next generation HTTP client." +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad"}, + {file = "httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc"}, +] + +[package.dependencies] +anyio = "*" +certifi = "*" +httpcore = "==1.*" +idna = "*" + +[package.extras] +brotli = ["brotli ; platform_python_implementation == \"CPython\"", "brotlicffi ; platform_python_implementation != \"CPython\""] +cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] +http2 = ["h2 (>=3,<5)"] +socks = ["socksio (==1.*)"] +zstd = ["zstandard (>=0.18.0)"] + [[package]] name = "idna" version = "3.10" description = "Internationalized Domain Names in Applications (IDNA)" optional = false python-versions = ">=3.6" +groups = ["main"] files = [ {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"}, {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"}, @@ -362,17 +493,134 @@ version = "2.0.0" description = "brain-dead simple config-ini parsing" optional = false python-versions = ">=3.7" +groups = ["main", "dev"] files = [ {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] +[[package]] +name = "jiter" +version = "0.10.0" +description = "Fast iterable JSON parser." +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "jiter-0.10.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:cd2fb72b02478f06a900a5782de2ef47e0396b3e1f7d5aba30daeb1fce66f303"}, + {file = "jiter-0.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:32bb468e3af278f095d3fa5b90314728a6916d89ba3d0ffb726dd9bf7367285e"}, + {file = "jiter-0.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa8b3e0068c26ddedc7abc6fac37da2d0af16b921e288a5a613f4b86f050354f"}, + {file = "jiter-0.10.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:286299b74cc49e25cd42eea19b72aa82c515d2f2ee12d11392c56d8701f52224"}, + {file = "jiter-0.10.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6ed5649ceeaeffc28d87fb012d25a4cd356dcd53eff5acff1f0466b831dda2a7"}, + {file = "jiter-0.10.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2ab0051160cb758a70716448908ef14ad476c3774bd03ddce075f3c1f90a3d6"}, + {file = "jiter-0.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:03997d2f37f6b67d2f5c475da4412be584e1cec273c1cfc03d642c46db43f8cf"}, + {file = "jiter-0.10.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c404a99352d839fed80d6afd6c1d66071f3bacaaa5c4268983fc10f769112e90"}, + {file = "jiter-0.10.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:66e989410b6666d3ddb27a74c7e50d0829704ede652fd4c858e91f8d64b403d0"}, + {file = "jiter-0.10.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b532d3af9ef4f6374609a3bcb5e05a1951d3bf6190dc6b176fdb277c9bbf15ee"}, + {file = "jiter-0.10.0-cp310-cp310-win32.whl", hash = "sha256:da9be20b333970e28b72edc4dff63d4fec3398e05770fb3205f7fb460eb48dd4"}, + {file = "jiter-0.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:f59e533afed0c5b0ac3eba20d2548c4a550336d8282ee69eb07b37ea526ee4e5"}, + {file = "jiter-0.10.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:3bebe0c558e19902c96e99217e0b8e8b17d570906e72ed8a87170bc290b1e978"}, + {file = "jiter-0.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:558cc7e44fd8e507a236bee6a02fa17199ba752874400a0ca6cd6e2196cdb7dc"}, + {file = "jiter-0.10.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d613e4b379a07d7c8453c5712ce7014e86c6ac93d990a0b8e7377e18505e98d"}, + {file = "jiter-0.10.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f62cf8ba0618eda841b9bf61797f21c5ebd15a7a1e19daab76e4e4b498d515b2"}, + {file = "jiter-0.10.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:919d139cdfa8ae8945112398511cb7fca58a77382617d279556b344867a37e61"}, + {file = "jiter-0.10.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:13ddbc6ae311175a3b03bd8994881bc4635c923754932918e18da841632349db"}, + {file = "jiter-0.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c440ea003ad10927a30521a9062ce10b5479592e8a70da27f21eeb457b4a9c5"}, + {file = "jiter-0.10.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dc347c87944983481e138dea467c0551080c86b9d21de6ea9306efb12ca8f606"}, + {file = "jiter-0.10.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:13252b58c1f4d8c5b63ab103c03d909e8e1e7842d302473f482915d95fefd605"}, + {file = "jiter-0.10.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7d1bbf3c465de4a24ab12fb7766a0003f6f9bce48b8b6a886158c4d569452dc5"}, + {file = "jiter-0.10.0-cp311-cp311-win32.whl", hash = "sha256:db16e4848b7e826edca4ccdd5b145939758dadf0dc06e7007ad0e9cfb5928ae7"}, + {file = "jiter-0.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:9c9c1d5f10e18909e993f9641f12fe1c77b3e9b533ee94ffa970acc14ded3812"}, + {file = "jiter-0.10.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:1e274728e4a5345a6dde2d343c8da018b9d4bd4350f5a472fa91f66fda44911b"}, + {file = "jiter-0.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7202ae396446c988cb2a5feb33a543ab2165b786ac97f53b59aafb803fef0744"}, + {file = "jiter-0.10.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23ba7722d6748b6920ed02a8f1726fb4b33e0fd2f3f621816a8b486c66410ab2"}, + {file = "jiter-0.10.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:371eab43c0a288537d30e1f0b193bc4eca90439fc08a022dd83e5e07500ed026"}, + {file = "jiter-0.10.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6c675736059020365cebc845a820214765162728b51ab1e03a1b7b3abb70f74c"}, + {file = "jiter-0.10.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0c5867d40ab716e4684858e4887489685968a47e3ba222e44cde6e4a2154f959"}, + {file = "jiter-0.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:395bb9a26111b60141757d874d27fdea01b17e8fac958b91c20128ba8f4acc8a"}, + {file = "jiter-0.10.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6842184aed5cdb07e0c7e20e5bdcfafe33515ee1741a6835353bb45fe5d1bd95"}, + {file = "jiter-0.10.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:62755d1bcea9876770d4df713d82606c8c1a3dca88ff39046b85a048566d56ea"}, + {file = "jiter-0.10.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:533efbce2cacec78d5ba73a41756beff8431dfa1694b6346ce7af3a12c42202b"}, + {file = "jiter-0.10.0-cp312-cp312-win32.whl", hash = "sha256:8be921f0cadd245e981b964dfbcd6fd4bc4e254cdc069490416dd7a2632ecc01"}, + {file = "jiter-0.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:a7c7d785ae9dda68c2678532a5a1581347e9c15362ae9f6e68f3fdbfb64f2e49"}, + {file = "jiter-0.10.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:e0588107ec8e11b6f5ef0e0d656fb2803ac6cf94a96b2b9fc675c0e3ab5e8644"}, + {file = "jiter-0.10.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cafc4628b616dc32530c20ee53d71589816cf385dd9449633e910d596b1f5c8a"}, + {file = "jiter-0.10.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:520ef6d981172693786a49ff5b09eda72a42e539f14788124a07530f785c3ad6"}, + {file = "jiter-0.10.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:554dedfd05937f8fc45d17ebdf298fe7e0c77458232bcb73d9fbbf4c6455f5b3"}, + {file = "jiter-0.10.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5bc299da7789deacf95f64052d97f75c16d4fc8c4c214a22bf8d859a4288a1c2"}, + {file = "jiter-0.10.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5161e201172de298a8a1baad95eb85db4fb90e902353b1f6a41d64ea64644e25"}, + {file = "jiter-0.10.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e2227db6ba93cb3e2bf67c87e594adde0609f146344e8207e8730364db27041"}, + {file = "jiter-0.10.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:15acb267ea5e2c64515574b06a8bf393fbfee6a50eb1673614aa45f4613c0cca"}, + {file = "jiter-0.10.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:901b92f2e2947dc6dfcb52fd624453862e16665ea909a08398dde19c0731b7f4"}, + {file = "jiter-0.10.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:d0cb9a125d5a3ec971a094a845eadde2db0de85b33c9f13eb94a0c63d463879e"}, + {file = "jiter-0.10.0-cp313-cp313-win32.whl", hash = "sha256:48a403277ad1ee208fb930bdf91745e4d2d6e47253eedc96e2559d1e6527006d"}, + {file = "jiter-0.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:75f9eb72ecb640619c29bf714e78c9c46c9c4eaafd644bf78577ede459f330d4"}, + {file = "jiter-0.10.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:28ed2a4c05a1f32ef0e1d24c2611330219fed727dae01789f4a335617634b1ca"}, + {file = "jiter-0.10.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14a4c418b1ec86a195f1ca69da8b23e8926c752b685af665ce30777233dfe070"}, + {file = "jiter-0.10.0-cp313-cp313t-win_amd64.whl", hash = "sha256:d7bfed2fe1fe0e4dda6ef682cee888ba444b21e7a6553e03252e4feb6cf0adca"}, + {file = "jiter-0.10.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:5e9251a5e83fab8d87799d3e1a46cb4b7f2919b895c6f4483629ed2446f66522"}, + {file = "jiter-0.10.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:023aa0204126fe5b87ccbcd75c8a0d0261b9abdbbf46d55e7ae9f8e22424eeb8"}, + {file = "jiter-0.10.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c189c4f1779c05f75fc17c0c1267594ed918996a231593a21a5ca5438445216"}, + {file = "jiter-0.10.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:15720084d90d1098ca0229352607cd68256c76991f6b374af96f36920eae13c4"}, + {file = "jiter-0.10.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e4f2fb68e5f1cfee30e2b2a09549a00683e0fde4c6a2ab88c94072fc33cb7426"}, + {file = "jiter-0.10.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ce541693355fc6da424c08b7edf39a2895f58d6ea17d92cc2b168d20907dee12"}, + {file = "jiter-0.10.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:31c50c40272e189d50006ad5c73883caabb73d4e9748a688b216e85a9a9ca3b9"}, + {file = "jiter-0.10.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fa3402a2ff9815960e0372a47b75c76979d74402448509ccd49a275fa983ef8a"}, + {file = "jiter-0.10.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:1956f934dca32d7bb647ea21d06d93ca40868b505c228556d3373cbd255ce853"}, + {file = "jiter-0.10.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:fcedb049bdfc555e261d6f65a6abe1d5ad68825b7202ccb9692636c70fcced86"}, + {file = "jiter-0.10.0-cp314-cp314-win32.whl", hash = "sha256:ac509f7eccca54b2a29daeb516fb95b6f0bd0d0d8084efaf8ed5dfc7b9f0b357"}, + {file = "jiter-0.10.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:5ed975b83a2b8639356151cef5c0d597c68376fc4922b45d0eb384ac058cfa00"}, + {file = "jiter-0.10.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3aa96f2abba33dc77f79b4cf791840230375f9534e5fac927ccceb58c5e604a5"}, + {file = "jiter-0.10.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:bd6292a43c0fc09ce7c154ec0fa646a536b877d1e8f2f96c19707f65355b5a4d"}, + {file = "jiter-0.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:39de429dcaeb6808d75ffe9effefe96a4903c6a4b376b2f6d08d77c1aaee2f18"}, + {file = "jiter-0.10.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:52ce124f13a7a616fad3bb723f2bfb537d78239d1f7f219566dc52b6f2a9e48d"}, + {file = "jiter-0.10.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:166f3606f11920f9a1746b2eea84fa2c0a5d50fd313c38bdea4edc072000b0af"}, + {file = "jiter-0.10.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:28dcecbb4ba402916034fc14eba7709f250c4d24b0c43fc94d187ee0580af181"}, + {file = "jiter-0.10.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:86c5aa6910f9bebcc7bc4f8bc461aff68504388b43bfe5e5c0bd21efa33b52f4"}, + {file = "jiter-0.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ceeb52d242b315d7f1f74b441b6a167f78cea801ad7c11c36da77ff2d42e8a28"}, + {file = "jiter-0.10.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ff76d8887c8c8ee1e772274fcf8cc1071c2c58590d13e33bd12d02dc9a560397"}, + {file = "jiter-0.10.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a9be4d0fa2b79f7222a88aa488bd89e2ae0a0a5b189462a12def6ece2faa45f1"}, + {file = "jiter-0.10.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9ab7fd8738094139b6c1ab1822d6f2000ebe41515c537235fd45dabe13ec9324"}, + {file = "jiter-0.10.0-cp39-cp39-win32.whl", hash = "sha256:5f51e048540dd27f204ff4a87f5d79294ea0aa3aa552aca34934588cf27023cf"}, + {file = "jiter-0.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:1b28302349dc65703a9e4ead16f163b1c339efffbe1049c30a44b001a2a4fff9"}, + {file = "jiter-0.10.0.tar.gz", hash = "sha256:07a7142c38aacc85194391108dc91b5b57093c978a9932bd86a36862759d9500"}, +] + +[[package]] +name = "openai" +version = "1.106.1" +description = "The official Python library for the openai API" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "openai-1.106.1-py3-none-any.whl", hash = "sha256:bfdef37c949f80396c59f2c17e0eda35414979bc07ef3379596a93c9ed044f3a"}, + {file = "openai-1.106.1.tar.gz", hash = "sha256:5f575967e3a05555825c43829cdcd50be6e49ab6a3e5262f0937a3f791f917f1"}, +] + +[package.dependencies] +anyio = ">=3.5.0,<5" +distro = ">=1.7.0,<2" +httpx = ">=0.23.0,<1" +jiter = ">=0.4.0,<1" +pydantic = ">=1.9.0,<3" +sniffio = "*" +tqdm = ">4" +typing-extensions = ">=4.11,<5" + +[package.extras] +aiohttp = ["aiohttp", "httpx-aiohttp (>=0.1.8)"] +datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] +realtime = ["websockets (>=13,<16)"] +voice-helpers = ["numpy (>=2.0.2)", "sounddevice (>=0.5.1)"] + [[package]] name = "orderly-set" version = "5.3.0" description = "Orderly set" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "orderly_set-5.3.0-py3-none-any.whl", hash = "sha256:c2c0bfe604f5d3d9b24e8262a06feb612594f37aa3845650548befd7772945d1"}, {file = "orderly_set-5.3.0.tar.gz", hash = "sha256:80b3d8fdd3d39004d9aad389eaa0eab02c71f0a0511ba3a6d54a935a6c6a0acc"}, @@ -384,6 +632,7 @@ version = "1.3.0.post0" description = "Capture the outcome of Python function calls." optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "outcome-1.3.0.post0-py2.py3-none-any.whl", hash = "sha256:e771c5ce06d1415e356078d3bdd68523f284b4ce5419828922b6871e65eda82b"}, {file = "outcome-1.3.0.post0.tar.gz", hash = "sha256:9dcf02e65f2971b80047b377468e72a268e15c0af3cf1238e6ff14f7f91143b8"}, @@ -398,6 +647,7 @@ version = "24.2" description = "Core utilities for Python packages" optional = false python-versions = ">=3.8" +groups = ["main", "dev"] files = [ {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, @@ -409,6 +659,7 @@ version = "1.5.0" description = "plugin and hook calling mechanisms for python" optional = false python-versions = ">=3.8" +groups = ["main", "dev"] files = [ {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, @@ -424,17 +675,154 @@ version = "2.22" description = "C parser in Python" optional = false python-versions = ">=3.8" +groups = ["main"] +markers = "os_name == \"nt\" and implementation_name != \"pypy\"" files = [ {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"}, {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"}, ] +[[package]] +name = "pydantic" +version = "2.11.7" +description = "Data validation using Python type hints" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "pydantic-2.11.7-py3-none-any.whl", hash = "sha256:dde5df002701f6de26248661f6835bbe296a47bf73990135c7d07ce741b9623b"}, + {file = "pydantic-2.11.7.tar.gz", hash = "sha256:d989c3c6cb79469287b1569f7447a17848c998458d49ebe294e975b9baf0f0db"}, +] + +[package.dependencies] +annotated-types = ">=0.6.0" +pydantic-core = "2.33.2" +typing-extensions = ">=4.12.2" +typing-inspection = ">=0.4.0" + +[package.extras] +email = ["email-validator (>=2.0.0)"] +timezone = ["tzdata ; python_version >= \"3.9\" and platform_system == \"Windows\""] + +[[package]] +name = "pydantic-core" +version = "2.33.2" +description = "Core functionality for Pydantic validation and serialization" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "pydantic_core-2.33.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2b3d326aaef0c0399d9afffeb6367d5e26ddc24d351dbc9c636840ac355dc5d8"}, + {file = "pydantic_core-2.33.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0e5b2671f05ba48b94cb90ce55d8bdcaaedb8ba00cc5359f6810fc918713983d"}, + {file = "pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0069c9acc3f3981b9ff4cdfaf088e98d83440a4c7ea1bc07460af3d4dc22e72d"}, + {file = "pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d53b22f2032c42eaaf025f7c40c2e3b94568ae077a606f006d206a463bc69572"}, + {file = "pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0405262705a123b7ce9f0b92f123334d67b70fd1f20a9372b907ce1080c7ba02"}, + {file = "pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4b25d91e288e2c4e0662b8038a28c6a07eaac3e196cfc4ff69de4ea3db992a1b"}, + {file = "pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6bdfe4b3789761f3bcb4b1ddf33355a71079858958e3a552f16d5af19768fef2"}, + {file = "pydantic_core-2.33.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:efec8db3266b76ef9607c2c4c419bdb06bf335ae433b80816089ea7585816f6a"}, + {file = "pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:031c57d67ca86902726e0fae2214ce6770bbe2f710dc33063187a68744a5ecac"}, + {file = "pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:f8de619080e944347f5f20de29a975c2d815d9ddd8be9b9b7268e2e3ef68605a"}, + {file = "pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:73662edf539e72a9440129f231ed3757faab89630d291b784ca99237fb94db2b"}, + {file = "pydantic_core-2.33.2-cp310-cp310-win32.whl", hash = "sha256:0a39979dcbb70998b0e505fb1556a1d550a0781463ce84ebf915ba293ccb7e22"}, + {file = "pydantic_core-2.33.2-cp310-cp310-win_amd64.whl", hash = "sha256:b0379a2b24882fef529ec3b4987cb5d003b9cda32256024e6fe1586ac45fc640"}, + {file = "pydantic_core-2.33.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:4c5b0a576fb381edd6d27f0a85915c6daf2f8138dc5c267a57c08a62900758c7"}, + {file = "pydantic_core-2.33.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e799c050df38a639db758c617ec771fd8fb7a5f8eaaa4b27b101f266b216a246"}, + {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc46a01bf8d62f227d5ecee74178ffc448ff4e5197c756331f71efcc66dc980f"}, + {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a144d4f717285c6d9234a66778059f33a89096dfb9b39117663fd8413d582dcc"}, + {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:73cf6373c21bc80b2e0dc88444f41ae60b2f070ed02095754eb5a01df12256de"}, + {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3dc625f4aa79713512d1976fe9f0bc99f706a9dee21dfd1810b4bbbf228d0e8a"}, + {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:881b21b5549499972441da4758d662aeea93f1923f953e9cbaff14b8b9565aef"}, + {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bdc25f3681f7b78572699569514036afe3c243bc3059d3942624e936ec93450e"}, + {file = "pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:fe5b32187cbc0c862ee201ad66c30cf218e5ed468ec8dc1cf49dec66e160cc4d"}, + {file = "pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:bc7aee6f634a6f4a95676fcb5d6559a2c2a390330098dba5e5a5f28a2e4ada30"}, + {file = "pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:235f45e5dbcccf6bd99f9f472858849f73d11120d76ea8707115415f8e5ebebf"}, + {file = "pydantic_core-2.33.2-cp311-cp311-win32.whl", hash = "sha256:6368900c2d3ef09b69cb0b913f9f8263b03786e5b2a387706c5afb66800efd51"}, + {file = "pydantic_core-2.33.2-cp311-cp311-win_amd64.whl", hash = "sha256:1e063337ef9e9820c77acc768546325ebe04ee38b08703244c1309cccc4f1bab"}, + {file = "pydantic_core-2.33.2-cp311-cp311-win_arm64.whl", hash = "sha256:6b99022f1d19bc32a4c2a0d544fc9a76e3be90f0b3f4af413f87d38749300e65"}, + {file = "pydantic_core-2.33.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a7ec89dc587667f22b6a0b6579c249fca9026ce7c333fc142ba42411fa243cdc"}, + {file = "pydantic_core-2.33.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3c6db6e52c6d70aa0d00d45cdb9b40f0433b96380071ea80b09277dba021ddf7"}, + {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e61206137cbc65e6d5256e1166f88331d3b6238e082d9f74613b9b765fb9025"}, + {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eb8c529b2819c37140eb51b914153063d27ed88e3bdc31b71198a198e921e011"}, + {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c52b02ad8b4e2cf14ca7b3d918f3eb0ee91e63b3167c32591e57c4317e134f8f"}, + {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:96081f1605125ba0855dfda83f6f3df5ec90c61195421ba72223de35ccfb2f88"}, + {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f57a69461af2a5fa6e6bbd7a5f60d3b7e6cebb687f55106933188e79ad155c1"}, + {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:572c7e6c8bb4774d2ac88929e3d1f12bc45714ae5ee6d9a788a9fb35e60bb04b"}, + {file = "pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:db4b41f9bd95fbe5acd76d89920336ba96f03e149097365afe1cb092fceb89a1"}, + {file = "pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:fa854f5cf7e33842a892e5c73f45327760bc7bc516339fda888c75ae60edaeb6"}, + {file = "pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5f483cfb75ff703095c59e365360cb73e00185e01aaea067cd19acffd2ab20ea"}, + {file = "pydantic_core-2.33.2-cp312-cp312-win32.whl", hash = "sha256:9cb1da0f5a471435a7bc7e439b8a728e8b61e59784b2af70d7c169f8dd8ae290"}, + {file = "pydantic_core-2.33.2-cp312-cp312-win_amd64.whl", hash = "sha256:f941635f2a3d96b2973e867144fde513665c87f13fe0e193c158ac51bfaaa7b2"}, + {file = "pydantic_core-2.33.2-cp312-cp312-win_arm64.whl", hash = "sha256:cca3868ddfaccfbc4bfb1d608e2ccaaebe0ae628e1416aeb9c4d88c001bb45ab"}, + {file = "pydantic_core-2.33.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:1082dd3e2d7109ad8b7da48e1d4710c8d06c253cbc4a27c1cff4fbcaa97a9e3f"}, + {file = "pydantic_core-2.33.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f517ca031dfc037a9c07e748cefd8d96235088b83b4f4ba8939105d20fa1dcd6"}, + {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a9f2c9dd19656823cb8250b0724ee9c60a82f3cdf68a080979d13092a3b0fef"}, + {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2b0a451c263b01acebe51895bfb0e1cc842a5c666efe06cdf13846c7418caa9a"}, + {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ea40a64d23faa25e62a70ad163571c0b342b8bf66d5fa612ac0dec4f069d916"}, + {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0fb2d542b4d66f9470e8065c5469ec676978d625a8b7a363f07d9a501a9cb36a"}, + {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fdac5d6ffa1b5a83bca06ffe7583f5576555e6c8b3a91fbd25ea7780f825f7d"}, + {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:04a1a413977ab517154eebb2d326da71638271477d6ad87a769102f7c2488c56"}, + {file = "pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c8e7af2f4e0194c22b5b37205bfb293d166a7344a5b0d0eaccebc376546d77d5"}, + {file = "pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:5c92edd15cd58b3c2d34873597a1e20f13094f59cf88068adb18947df5455b4e"}, + {file = "pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:65132b7b4a1c0beded5e057324b7e16e10910c106d43675d9bd87d4f38dde162"}, + {file = "pydantic_core-2.33.2-cp313-cp313-win32.whl", hash = "sha256:52fb90784e0a242bb96ec53f42196a17278855b0f31ac7c3cc6f5c1ec4811849"}, + {file = "pydantic_core-2.33.2-cp313-cp313-win_amd64.whl", hash = "sha256:c083a3bdd5a93dfe480f1125926afcdbf2917ae714bdb80b36d34318b2bec5d9"}, + {file = "pydantic_core-2.33.2-cp313-cp313-win_arm64.whl", hash = "sha256:e80b087132752f6b3d714f041ccf74403799d3b23a72722ea2e6ba2e892555b9"}, + {file = "pydantic_core-2.33.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:61c18fba8e5e9db3ab908620af374db0ac1baa69f0f32df4f61ae23f15e586ac"}, + {file = "pydantic_core-2.33.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95237e53bb015f67b63c91af7518a62a8660376a6a0db19b89acc77a4d6199f5"}, + {file = "pydantic_core-2.33.2-cp313-cp313t-win_amd64.whl", hash = "sha256:c2fc0a768ef76c15ab9238afa6da7f69895bb5d1ee83aeea2e3509af4472d0b9"}, + {file = "pydantic_core-2.33.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:a2b911a5b90e0374d03813674bf0a5fbbb7741570dcd4b4e85a2e48d17def29d"}, + {file = "pydantic_core-2.33.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6fa6dfc3e4d1f734a34710f391ae822e0a8eb8559a85c6979e14e65ee6ba2954"}, + {file = "pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c54c939ee22dc8e2d545da79fc5381f1c020d6d3141d3bd747eab59164dc89fb"}, + {file = "pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:53a57d2ed685940a504248187d5685e49eb5eef0f696853647bf37c418c538f7"}, + {file = "pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:09fb9dd6571aacd023fe6aaca316bd01cf60ab27240d7eb39ebd66a3a15293b4"}, + {file = "pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0e6116757f7959a712db11f3e9c0a99ade00a5bbedae83cb801985aa154f071b"}, + {file = "pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d55ab81c57b8ff8548c3e4947f119551253f4e3787a7bbc0b6b3ca47498a9d3"}, + {file = "pydantic_core-2.33.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c20c462aa4434b33a2661701b861604913f912254e441ab8d78d30485736115a"}, + {file = "pydantic_core-2.33.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:44857c3227d3fb5e753d5fe4a3420d6376fa594b07b621e220cd93703fe21782"}, + {file = "pydantic_core-2.33.2-cp39-cp39-musllinux_1_1_armv7l.whl", hash = "sha256:eb9b459ca4df0e5c87deb59d37377461a538852765293f9e6ee834f0435a93b9"}, + {file = "pydantic_core-2.33.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9fcd347d2cc5c23b06de6d3b7b8275be558a0c90549495c699e379a80bf8379e"}, + {file = "pydantic_core-2.33.2-cp39-cp39-win32.whl", hash = "sha256:83aa99b1285bc8f038941ddf598501a86f1536789740991d7d8756e34f1e74d9"}, + {file = "pydantic_core-2.33.2-cp39-cp39-win_amd64.whl", hash = "sha256:f481959862f57f29601ccced557cc2e817bce7533ab8e01a797a48b49c9692b3"}, + {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5c4aa4e82353f65e548c476b37e64189783aa5384903bfea4f41580f255fddfa"}, + {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d946c8bf0d5c24bf4fe333af284c59a19358aa3ec18cb3dc4370080da1e8ad29"}, + {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:87b31b6846e361ef83fedb187bb5b4372d0da3f7e28d85415efa92d6125d6e6d"}, + {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa9d91b338f2df0508606f7009fde642391425189bba6d8c653afd80fd6bb64e"}, + {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2058a32994f1fde4ca0480ab9d1e75a0e8c87c22b53a3ae66554f9af78f2fe8c"}, + {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:0e03262ab796d986f978f79c943fc5f620381be7287148b8010b4097f79a39ec"}, + {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:1a8695a8d00c73e50bff9dfda4d540b7dee29ff9b8053e38380426a85ef10052"}, + {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:fa754d1850735a0b0e03bcffd9d4b4343eb417e47196e4485d9cca326073a42c"}, + {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a11c8d26a50bfab49002947d3d237abe4d9e4b5bdc8846a63537b6488e197808"}, + {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:dd14041875d09cc0f9308e37a6f8b65f5585cf2598a53aa0123df8b129d481f8"}, + {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:d87c561733f66531dced0da6e864f44ebf89a8fba55f31407b00c2f7f9449593"}, + {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f82865531efd18d6e07a04a17331af02cb7a651583c418df8266f17a63c6612"}, + {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bfb5112df54209d820d7bf9317c7a6c9025ea52e49f46b6a2060104bba37de7"}, + {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:64632ff9d614e5eecfb495796ad51b0ed98c453e447a76bcbeeb69615079fc7e"}, + {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:f889f7a40498cc077332c7ab6b4608d296d852182211787d4f3ee377aaae66e8"}, + {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:de4b83bb311557e439b9e186f733f6c645b9417c84e2eb8203f3f820a4b988bf"}, + {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:82f68293f055f51b51ea42fafc74b6aad03e70e191799430b90c13d643059ebb"}, + {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:329467cecfb529c925cf2bbd4d60d2c509bc2fb52a20c1045bf09bb70971a9c1"}, + {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:87acbfcf8e90ca885206e98359d7dca4bcbb35abdc0ff66672a293e1d7a19101"}, + {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:7f92c15cd1e97d4b12acd1cc9004fa092578acfa57b67ad5e43a197175d01a64"}, + {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d3f26877a748dc4251cfcfda9dfb5f13fcb034f5308388066bcfe9031b63ae7d"}, + {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dac89aea9af8cd672fa7b510e7b8c33b0bba9a43186680550ccf23020f32d535"}, + {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:970919794d126ba8645f3837ab6046fb4e72bbc057b3709144066204c19a455d"}, + {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:3eb3fe62804e8f859c49ed20a8451342de53ed764150cb14ca71357c765dc2a6"}, + {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:3abcd9392a36025e3bd55f9bd38d908bd17962cc49bc6da8e7e96285336e2bca"}, + {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:3a1c81334778f9e3af2f8aeb7a960736e5cab1dfebfb26aabca09afd2906c039"}, + {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:2807668ba86cb38c6817ad9bc66215ab8584d1d304030ce4f0887336f28a5e27"}, + {file = "pydantic_core-2.33.2.tar.gz", hash = "sha256:7cb8bc3605c29176e1b105350d2e6474142d7c1bd1d9327c4a9bdb46bf827acc"}, +] + +[package.dependencies] +typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" + [[package]] name = "pysocks" version = "1.7.1" description = "A Python SOCKS client module. See https://github.com/Anorov/PySocks for more information." optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +groups = ["main"] files = [ {file = "PySocks-1.7.1-py27-none-any.whl", hash = "sha256:08e69f092cc6dbe92a0fdd16eeb9b9ffbc13cadfe5ca4c7bd92ffb078b293299"}, {file = "PySocks-1.7.1-py3-none-any.whl", hash = "sha256:2725bd0a9925919b9b51739eea5f9e2bae91e83288108a9ad338b2e3a4435ee5"}, @@ -447,6 +835,7 @@ version = "8.3.5" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.8" +groups = ["main", "dev"] files = [ {file = "pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820"}, {file = "pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845"}, @@ -469,6 +858,7 @@ version = "4.1.0" description = "Pytest plugin for measuring coverage." optional = false python-versions = ">=3.7" +groups = ["dev"] files = [ {file = "pytest-cov-4.1.0.tar.gz", hash = "sha256:3904b13dfbfec47f003b8e77fd5b589cd11904a21ddf1ab38a64f204d6a10ef6"}, {file = "pytest_cov-4.1.0-py3-none-any.whl", hash = "sha256:6ba70b9e97e69fcc3fb45bfeab2d0a138fb65c4d0d6a41ef33983ad114be8c3a"}, @@ -487,6 +877,7 @@ version = "1.7.0" description = "Adds the ability to retry flaky tests in CI environments" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "pytest_retry-1.7.0-py3-none-any.whl", hash = "sha256:a2dac85b79a4e2375943f1429479c65beb6c69553e7dae6b8332be47a60954f4"}, {file = "pytest_retry-1.7.0.tar.gz", hash = "sha256:f8d52339f01e949df47c11ba9ee8d5b362f5824dff580d3870ec9ae0057df80f"}, @@ -504,6 +895,7 @@ version = "1.0.0" description = "pytest-sugar is a plugin for pytest that changes the default look and feel of pytest (e.g. progressbar, show tests that fail instantly)." optional = false python-versions = "*" +groups = ["main"] files = [ {file = "pytest-sugar-1.0.0.tar.gz", hash = "sha256:6422e83258f5b0c04ce7c632176c7732cab5fdb909cb39cca5c9139f81276c0a"}, {file = "pytest_sugar-1.0.0-py3-none-any.whl", hash = "sha256:70ebcd8fc5795dc457ff8b69d266a4e2e8a74ae0c3edc749381c64b5246c8dfd"}, @@ -523,6 +915,7 @@ version = "2.32.3" description = "Python HTTP for Humans." optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"}, {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"}, @@ -544,6 +937,7 @@ version = "4.29.0" description = "Official Python bindings for Selenium WebDriver" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "selenium-4.29.0-py3-none-any.whl", hash = "sha256:ce5d26f1ddc1111641113653af33694c13947dd36c2df09cdd33f554351d372e"}, {file = "selenium-4.29.0.tar.gz", hash = "sha256:3a62f7ec33e669364a6c0562a701deb69745b569c50d55f1a912bf8eb33358ba"}, @@ -563,6 +957,7 @@ version = "1.3.1" description = "Sniff out which async library your code is running under" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"}, {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, @@ -574,6 +969,7 @@ version = "2.4.0" description = "Sorted Containers -- Sorted List, Sorted Dict, Sorted Set" optional = false python-versions = "*" +groups = ["main"] files = [ {file = "sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0"}, {file = "sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88"}, @@ -585,6 +981,7 @@ version = "2.5.0" description = "ANSI color formatting for output in terminal" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "termcolor-2.5.0-py3-none-any.whl", hash = "sha256:37b17b5fc1e604945c2642c872a3764b5d547a48009871aea3edd3afa180afb8"}, {file = "termcolor-2.5.0.tar.gz", hash = "sha256:998d8d27da6d48442e8e1f016119076b690d962507531df4890fcd2db2ef8a6f"}, @@ -599,6 +996,7 @@ version = "2.2.1" description = "A lil' TOML parser" optional = false python-versions = ">=3.8" +groups = ["main", "dev"] files = [ {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"}, {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"}, @@ -633,6 +1031,29 @@ files = [ {file = "tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc"}, {file = "tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff"}, ] +markers = {main = "python_version == \"3.10\"", dev = "python_full_version <= \"3.11.0a6\""} + +[[package]] +name = "tqdm" +version = "4.67.1" +description = "Fast, Extensible Progress Meter" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"}, + {file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[package.extras] +dev = ["nbval", "pytest (>=6)", "pytest-asyncio (>=0.24)", "pytest-cov", "pytest-timeout"] +discord = ["requests"] +notebook = ["ipywidgets (>=6)"] +slack = ["slack-sdk"] +telegram = ["requests"] [[package]] name = "trio" @@ -640,6 +1061,7 @@ version = "0.29.0" description = "A friendly Python library for async concurrency and I/O" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "trio-0.29.0-py3-none-any.whl", hash = "sha256:d8c463f1a9cc776ff63e331aba44c125f423a5a13c684307e828d930e625ba66"}, {file = "trio-0.29.0.tar.gz", hash = "sha256:ea0d3967159fc130acb6939a0be0e558e364fee26b5deeecc893a6b08c361bdf"}, @@ -660,6 +1082,7 @@ version = "0.12.2" description = "WebSocket library for Trio" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "trio_websocket-0.12.2-py3-none-any.whl", hash = "sha256:df605665f1db533f4a386c94525870851096a223adcb97f72a07e8b4beba45b6"}, {file = "trio_websocket-0.12.2.tar.gz", hash = "sha256:22c72c436f3d1e264d0910a3951934798dcc5b00ae56fc4ee079d46c7cf20fae"}, @@ -677,17 +1100,34 @@ version = "4.12.2" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, ] +[[package]] +name = "typing-inspection" +version = "0.4.1" +description = "Runtime typing introspection tools" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "typing_inspection-0.4.1-py3-none-any.whl", hash = "sha256:389055682238f53b04f7badcb49b989835495a96700ced5dab2d8feae4b26f51"}, + {file = "typing_inspection-0.4.1.tar.gz", hash = "sha256:6ae134cc0203c33377d43188d4064e9b357dba58cff3185f22924610e70a9d28"}, +] + +[package.dependencies] +typing-extensions = ">=4.12.0" + [[package]] name = "urllib3" version = "2.3.0" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df"}, {file = "urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d"}, @@ -697,7 +1137,7 @@ files = [ pysocks = {version = ">=1.5.6,<1.5.7 || >1.5.7,<2.0", optional = true, markers = "extra == \"socks\""} [package.extras] -brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] +brotli = ["brotli (>=1.0.9) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\""] h2 = ["h2 (>=4,<5)"] socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] zstd = ["zstandard (>=0.18.0)"] @@ -708,6 +1148,7 @@ version = "1.8.0" description = "WebSocket client for Python with low level API options" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526"}, {file = "websocket_client-1.8.0.tar.gz", hash = "sha256:3239df9f44da632f96012472805d40a23281a991027ce11d2f45a6f24ac4c3da"}, @@ -724,6 +1165,7 @@ version = "1.2.0" description = "WebSockets state-machine based protocol implementation" optional = false python-versions = ">=3.7.0" +groups = ["main"] files = [ {file = "wsproto-1.2.0-py3-none-any.whl", hash = "sha256:b9acddd652b585d75b20477888c56642fdade28bdfd3579aa24a4d2c037dd736"}, {file = "wsproto-1.2.0.tar.gz", hash = "sha256:ad565f26ecb92588a3e43bc3d96164de84cd9902482b130d0ddbaa9664a85065"}, @@ -733,6 +1175,6 @@ files = [ h11 = ">=0.9.0,<1" [metadata] -lock-version = "2.0" +lock-version = "2.1" python-versions = "^3.10" -content-hash = "a40015b90325879e50f82cca6a26a730d763cad26589671df798832d41c42db3" +content-hash = "11665d36dc4efcfa5bebf8c7005808ff752bd3c62f6570c21d9cbce4b7f2e2ee" diff --git a/tests/e2e/pyproject.toml b/tests/e2e/pyproject.toml index f9a0defd..42ebbcb9 100644 --- a/tests/e2e/pyproject.toml +++ b/tests/e2e/pyproject.toml @@ -15,6 +15,8 @@ selenium = "^4.11.2" pytest-sugar = "^1.0.0" deepdiff = "^8.0.1" pytest-retry = "^1.6.3" +anthropic = "^0.66.0" +openai = "^1.0.0" [tool.poetry.dev-dependencies] pytest-cov = "^4.1.0" diff --git a/tests/e2e/run_e2e_tests.sh b/tests/e2e/run_e2e_tests.sh index 5e6b6dbc..c716a182 100644 --- a/tests/e2e/run_e2e_tests.sh +++ b/tests/e2e/run_e2e_tests.sh @@ -48,6 +48,9 @@ cd ../../ archgw build cd - +# Once we build archgw we have to install the dependencies again to a new virtual environment. +poetry install + log startup arch gateway with function calling demo cd ../../ tail -F ~/archgw_logs/modelserver.log & @@ -59,7 +62,6 @@ cd - log running e2e tests log ================= -poetry install poetry run pytest log shutting down the arch gateway service diff --git a/tests/e2e/test_prompt_gateway.py b/tests/e2e/test_prompt_gateway.py index e6a10f3a..362be227 100644 --- a/tests/e2e/test_prompt_gateway.py +++ b/tests/e2e/test_prompt_gateway.py @@ -3,9 +3,12 @@ import pytest import requests from deepdiff import DeepDiff import re +import anthropic +import openai from common import ( PROMPT_GATEWAY_ENDPOINT, + LLM_GATEWAY_ENDPOINT, PREFILL_LIST, get_arch_messages, get_data_chunks, @@ -352,3 +355,178 @@ def test_prompt_gateway_prompt_guard_jailbreak(stream): response_json.get("choices")[0]["message"]["content"] == "Looks like you're curious about my abilities, but I can only provide assistance for weather forecasting." ) + + +def test_claude_v1_messages_api(): + """Test Claude client using /v1/messages API through llm_gateway (port 12000)""" + # Get the base URL from the LLM gateway endpoint + base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "") + + client = anthropic.Anthropic( + api_key="test-key", base_url=base_url # Dummy key for testing + ) + + message = client.messages.create( + model="claude-sonnet-4-20250514", # Use working model from smoke test + max_tokens=50, + messages=[ + { + "role": "user", + "content": "Hello, please respond with exactly: Hello from Claude!", + } + ], + ) + + assert message.content[0].text == "Hello from Claude!" + + +def test_claude_v1_messages_api_streaming(): + base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "") + + client = anthropic.Anthropic(api_key="test-key", base_url=base_url) + + with client.messages.stream( + model="claude-sonnet-4-20250514", + max_tokens=50, + messages=[ + { + "role": "user", + "content": "Hello, please respond with exactly: Hello from Claude!", + } + ], + ) as stream: + # This yields only text deltas in order + pieces = [t for t in stream.text_stream] + full_text = "".join(pieces) + + # You can also get the fully-assembled Message object + final = stream.get_final_message() + # A safe way to reassemble text from the content blocks: + final_text = "".join(b.text for b in final.content if b.type == "text") + + assert full_text == "Hello from Claude!" + assert final_text == "Hello from Claude!" + + +def test_anthropic_client_with_openai_model_streaming(): + """Test Anthropic client using /v1/messages API with OpenAI model (gpt-4o-mini) + This tests the transformation: OpenAI upstream -> Anthropic client format with proper event lines + """ + base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "") + + client = anthropic.Anthropic(api_key="test-key", base_url=base_url) + + with client.messages.stream( + model="gpt-4o-mini", # OpenAI model via Anthropic client + max_tokens=50, + messages=[ + { + "role": "user", + "content": "Hello, please respond with exactly: Hello from GPT-4o-mini via Anthropic!", + } + ], + ) as stream: + # This yields only text deltas in order + pieces = [t for t in stream.text_stream] + full_text = "".join(pieces) + + # You can also get the fully-assembled Message object + final = stream.get_final_message() + # A safe way to reassemble text from the content blocks: + final_text = "".join(b.text for b in final.content if b.type == "text") + + assert full_text == "Hello from GPT-4o-mini via Anthropic!" + assert final_text == "Hello from GPT-4o-mini via Anthropic!" + + +def test_openai_gpt4o_mini_v1_messages_api(): + """Test OpenAI GPT-4o-mini using /v1/chat/completions API through llm_gateway (port 12000)""" + # Get the base URL from the LLM gateway endpoint + base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "") + + client = openai.OpenAI( + api_key="test-key", # Dummy key for testing + base_url=f"{base_url}/v1", # OpenAI needs /v1 suffix in base_url + ) + + completion = client.chat.completions.create( + model="gpt-4o-mini", + max_tokens=50, + messages=[ + { + "role": "user", + "content": "Hello, please respond with exactly: Hello from GPT-4o-mini!", + } + ], + ) + + assert completion.choices[0].message.content == "Hello from GPT-4o-mini!" + + +def test_openai_gpt4o_mini_v1_messages_api_streaming(): + """Test OpenAI GPT-4o-mini using /v1/chat/completions API with streaming through llm_gateway (port 12000)""" + # Get the base URL from the LLM gateway endpoint + base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "") + + client = openai.OpenAI( + api_key="test-key", # Dummy key for testing + base_url=f"{base_url}/v1", # OpenAI needs /v1 suffix in base_url + ) + + stream = client.chat.completions.create( + model="gpt-4o-mini", + max_tokens=50, + messages=[ + { + "role": "user", + "content": "Hello, please respond with exactly: Hello from GPT-4o-mini!", + } + ], + stream=True, + ) + + # Collect all the streaming chunks + content_chunks = [] + for chunk in stream: + if chunk.choices[0].delta.content: + content_chunks.append(chunk.choices[0].delta.content) + + # Reconstruct the full message + full_content = "".join(content_chunks) + assert full_content == "Hello from GPT-4o-mini!" + + +def test_openai_client_with_claude_model_streaming(): + """Test OpenAI client using /v1/chat/completions API with Claude model (claude-sonnet-4-20250514) + This tests the transformation: Anthropic upstream -> OpenAI client format with proper chunk handling + """ + # Get the base URL from the LLM gateway endpoint + base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "") + + client = openai.OpenAI( + api_key="test-key", # Dummy key for testing + base_url=f"{base_url}/v1", # OpenAI needs /v1 suffix in base_url + ) + + stream = client.chat.completions.create( + model="claude-sonnet-4-20250514", # Claude model via OpenAI client + max_tokens=50, + messages=[ + { + "role": "user", + "content": "Who are you? ALWAYS RESPOND WITH:I appreciate the request, but I should clarify that I'm Claude, made by Anthropic, not OpenAI. I don't want to create confusion about my origins.", + } + ], + stream=True, + temperature=0.1, + ) + + # Collect all the streaming chunks + content_chunks = [] + for chunk in stream: + if chunk.choices[0].delta.content: + content_chunks.append(chunk.choices[0].delta.content) + + # Reconstruct the full message + full_content = "".join(content_chunks) + assert full_content is not None