diff --git a/.github/workflows/e2e_archgw.yml b/.github/workflows/e2e_archgw.yml
index cf6cc702..b18735a6 100644
--- a/.github/workflows/e2e_archgw.yml
+++ b/.github/workflows/e2e_archgw.yml
@@ -37,6 +37,7 @@ jobs:
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
           MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
           GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
         run: |
           docker compose up | tee &> archgw.logs &
 
@@ -63,5 +64,6 @@ jobs:
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
           MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
           GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
         run: |
           docker compose down
diff --git a/.github/workflows/e2e_test_preference_based_routing.yml b/.github/workflows/e2e_test_preference_based_routing.yml
index 5faa9ee7..f0e2b106 100644
--- a/.github/workflows/e2e_test_preference_based_routing.yml
+++ b/.github/workflows/e2e_test_preference_based_routing.yml
@@ -53,6 +53,7 @@ jobs:
           MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
           GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
           ARCH_API_KEY: ${{ secrets.ARCH_API_KEY }}
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
         run: |
           source venv/bin/activate
           cd demos/shared/test_runner && sh run_demo_tests.sh use_cases/preference_based_routing
diff --git a/.github/workflows/e2e_tests.yml b/.github/workflows/e2e_tests.yml
index 576a7fc3..df715e00 100644
--- a/.github/workflows/e2e_tests.yml
+++ b/.github/workflows/e2e_tests.yml
@@ -30,6 +30,7 @@ jobs:
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
           MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
           GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
         run: |
           python -mvenv venv
           source venv/bin/activate && cd tests/e2e && bash run_e2e_tests.sh
diff --git a/arch/envoy.template.yaml b/arch/envoy.template.yaml
index 53044770..5c2fd420 100644
--- a/arch/envoy.template.yaml
+++ b/arch/envoy.template.yaml
@@ -526,13 +526,13 @@ static_resources:
               tls_minimum_protocol_version: TLSv1_2
               tls_maximum_protocol_version: TLSv1_3
 
-    - name: claude
+    - name: anthropic
       connect_timeout: 0.5s
       type: LOGICAL_DNS
       dns_lookup_family: V4_ONLY
       lb_policy: ROUND_ROBIN
       load_assignment:
-        cluster_name: claude
+        cluster_name: anthropic
         endpoints:
           - lb_endpoints:
               - endpoint:
diff --git a/arch/supervisord.conf b/arch/supervisord.conf
index bec147cc..df25eea9 100644
--- a/arch/supervisord.conf
+++ b/arch/supervisord.conf
@@ -9,7 +9,7 @@ stdout_logfile_maxbytes=0
 stderr_logfile_maxbytes=0
 
 [program:envoy]
-command=/bin/sh -c "python /app/config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:debug 2>&1 | tee /var/log//envoy.log"
+command=/bin/sh -c "python /app/config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:debug --log-format '[%%Y-%%m-%%d %%T.%%e][%%l] %%v' 2>&1 | tee /var/log//envoy.log"
 stdout_logfile=/dev/stdout
 redirect_stderr=true
 stdout_logfile_maxbytes=0
diff --git a/arch/tools/cli/config_generator.py b/arch/tools/cli/config_generator.py
index 8346acc0..0f157ea1 100644
--- a/arch/tools/cli/config_generator.py
+++ b/arch/tools/cli/config_generator.py
@@ -8,12 +8,12 @@ from urllib.parse import urlparse
 
 SUPPORTED_PROVIDERS = [
     "arch",
-    "claude",
     "deepseek",
     "groq",
     "mistral",
     "openai",
     "gemini",
+    "anthropic",
 ]
 
 
diff --git a/crates/brightstaff/src/handlers/chat_completions.rs b/crates/brightstaff/src/handlers/chat_completions.rs
index d0e5910a..fff07c22 100644
--- a/crates/brightstaff/src/handlers/chat_completions.rs
+++ b/crates/brightstaff/src/handlers/chat_completions.rs
@@ -4,6 +4,8 @@ use bytes::Bytes;
 use common::configuration::ModelUsagePreference;
 use common::consts::ARCH_PROVIDER_HINT_HEADER;
 use hermesllm::apis::openai::ChatCompletionsRequest;
+use hermesllm::clients::SupportedAPIs;
+use hermesllm::{ProviderRequest, ProviderRequestType};
 use http_body_util::combinators::BoxBody;
 use http_body_util::{BodyExt, Full, StreamBody};
 use hyper::body::Frame;
@@ -22,66 +24,61 @@ fn full<T: Into<Bytes>>(chunk: T) -> BoxBody<Bytes, hyper::Error> {
         .boxed()
 }
 
-pub async fn chat_completions(
+pub async fn chat(
     request: Request<hyper::body::Incoming>,
     router_service: Arc<RouterService>,
-    llm_provider_endpoint: String,
+    full_qualified_llm_provider_url: String,
 ) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
+
     let request_path = request.uri().path().to_string();
     let mut request_headers = request.headers().clone();
-
     let chat_request_bytes = request.collect().await?.to_bytes();
 
     debug!("Received request body (raw utf8): {}", String::from_utf8_lossy(&chat_request_bytes));
+    let mut client_request = match ProviderRequestType::try_from((&chat_request_bytes[..], &SupportedAPIs::from_endpoint(request_path.as_str()).unwrap())) {
+        Ok(request) => request,
+        Err(err) => {
+            warn!("Failed to parse request as ProviderRequestType: {}", err);
+            let err_msg = format!("Failed to parse request: {}", err);
+            let mut bad_request = Response::new(full(err_msg));
+            *bad_request.status_mut() = StatusCode::BAD_REQUEST;
+            return Ok(bad_request);
+        }
+    };
 
-    let chat_request_parsed = serde_json::from_slice::<serde_json::Value>(&chat_request_bytes)
-        .inspect_err(|err| {
-            warn!(
-                "Failed to parse request body as JSON: err: {}, str: {}",
-                err,
-                String::from_utf8_lossy(&chat_request_bytes)
-            )
-        })
-        .unwrap_or_else(|_| {
-            warn!(
-                "Failed to parse request body as JSON: {}",
-                String::from_utf8_lossy(&chat_request_bytes)
-            );
-            serde_json::Value::Null
-        });
+    // Clone metadata for routing and remove archgw_preference_config from original
+    let routing_metadata = client_request.metadata().clone();
 
-    if chat_request_parsed == serde_json::Value::Null {
-        warn!("Request body is not valid JSON");
-        let err_msg = "Request body is not valid JSON".to_string();
-        let mut bad_request = Response::new(full(err_msg));
-        *bad_request.status_mut() = StatusCode::BAD_REQUEST;
-        return Ok(bad_request);
+    if client_request.remove_metadata_key("archgw_preference_config") {
+        debug!("Removed archgw_preference_config from metadata");
     }
 
-    let chat_completion_request: ChatCompletionsRequest =
-        serde_json::from_value(chat_request_parsed.clone()).unwrap();
+    let client_request_bytes_for_upstream = ProviderRequestType::to_bytes(&client_request).unwrap();
 
-    // remove metadata from the request
-    let mut chat_request_user_preferences_removed = chat_request_parsed;
-    if let Some(metadata) = chat_request_user_preferences_removed.get_mut("metadata") {
-        debug!("Removing metadata from request");
-        if let Some(m) = metadata.as_object_mut() {
-            m.remove("archgw_preference_config");
-            debug!("Removed archgw_preference_config from metadata");
-        }
-
-        // if metadata is empty, remove it
-        if metadata.as_object().map_or(false, |m| m.is_empty()) {
-            debug!("Removing empty metadata from request");
-            chat_request_user_preferences_removed
-                .as_object_mut()
-                .map(|m| m.remove("metadata"));
-        }
-    }
+    // Convert to ChatCompletionsRequest regardless of input type (clone to avoid moving original)
+    let chat_completions_request_for_arch_router: ChatCompletionsRequest =
+        match ProviderRequestType::try_from((client_request, &SupportedAPIs::OpenAIChatCompletions(hermesllm::apis::OpenAIApi::ChatCompletions))) {
+            Ok(ProviderRequestType::ChatCompletionsRequest(req)) => req,
+            Ok(ProviderRequestType::MessagesRequest(_)) => {
+                // This should not happen after conversion to OpenAI format
+                warn!("Unexpected: got MessagesRequest after converting to OpenAI format");
+                let err_msg = "Request conversion failed".to_string();
+                let mut bad_request = Response::new(full(err_msg));
+                *bad_request.status_mut() = StatusCode::BAD_REQUEST;
+                return Ok(bad_request);
+            },
+            Err(err) => {
+                warn!("Failed to convert request to ChatCompletionsRequest: {}", err);
+                let err_msg = format!("Failed to convert request: {}", err);
+                let mut bad_request = Response::new(full(err_msg));
+                *bad_request.status_mut() = StatusCode::BAD_REQUEST;
+                return Ok(bad_request);
+            }
+        };
 
     debug!(
-        "arch-router request received: {}",
-        &serde_json::to_string(&chat_completion_request).unwrap()
+        "[BRIGHTSTAFF -> ARCH_ROUTER] REQ: {}",
+        &serde_json::to_string(&chat_completions_request_for_arch_router).unwrap()
     );
 
     let trace_parent = request_headers
@@ -90,7 +87,7 @@ pub async fn chat_completions(
         .map(|(_, value)| value.to_str().unwrap_or_default().to_string());
 
     let usage_preferences_str: Option<String> =
-        chat_completion_request.metadata.and_then(|metadata| {
+        routing_metadata.as_ref().and_then(|metadata| {
             metadata
                 .get("archgw_preference_config")
                 .map(|value| value.to_string())
@@ -101,7 +98,7 @@ pub async fn chat_completions(
         .and_then(|s| serde_yaml::from_str(s).ok());
 
     let latest_message_for_log =
-        chat_completion_request
+        chat_completions_request_for_arch_router
             .messages
             .last()
             .map_or("None".to_string(), |msg| {
@@ -126,7 +123,7 @@ pub async fn chat_completions(
 
     let model_name = match router_service
         .determine_route(
-            &chat_completion_request.messages,
+            &chat_completions_request_for_arch_router.messages,
             trace_parent.clone(),
             usage_preferences,
         )
@@ -137,9 +134,9 @@ pub async fn chat_completions(
             None => {
                 debug!(
                     "No route determined, using default model from request: {}",
-                    chat_completion_request.model
+                    chat_completions_request_for_arch_router.model
                 );
-                chat_completion_request.model.clone()
+                chat_completions_request_for_arch_router.model.clone()
             }
         },
         Err(err) => {
@@ -151,8 +148,8 @@ pub async fn chat_completions(
     };
 
     debug!(
-        "sending request to llm provider: {}, with model hint: {}",
-        llm_provider_endpoint, model_name
+        "[BRIGHTSTAFF -> ARCH_ROUTER] URL: {}, Model Hint: {}",
+        full_qualified_llm_provider_url, model_name
     );
 
     request_headers.insert(
@@ -166,17 +163,13 @@ pub async fn chat_completions(
             header::HeaderValue::from_str(&trace_parent).unwrap(),
         );
     }
-
-    let chat_request_parsed_bytes =
-        serde_json::to_string(&chat_request_user_preferences_removed).unwrap();
-
     // remove content-length header if it exists
     request_headers.remove(header::CONTENT_LENGTH);
 
     let llm_response = match reqwest::Client::new()
-        .post(llm_provider_endpoint)
+        .post(full_qualified_llm_provider_url)
         .headers(request_headers)
-        .body(chat_request_parsed_bytes)
+        .body(client_request_bytes_for_upstream)
         .send()
         .await
     {
diff --git a/crates/brightstaff/src/main.rs b/crates/brightstaff/src/main.rs
index 34fa3aa3..d3843125 100644
--- a/crates/brightstaff/src/main.rs
+++ b/crates/brightstaff/src/main.rs
@@ -1,9 +1,10 @@
-use brightstaff::handlers::chat_completions::chat_completions;
+use brightstaff::handlers::chat_completions::chat;
 use brightstaff::handlers::models::list_models;
 use brightstaff::router::llm_router::RouterService;
 use brightstaff::utils::tracing::init_tracer;
 use bytes::Bytes;
 use common::configuration::Configuration;
+use common::consts::{CHAT_COMPLETIONS_PATH, MESSAGES_PATH};
 use http_body_util::{combinators::BoxBody, BodyExt, Empty};
 use hyper::body::Incoming;
 use hyper::server::conn::http1;
@@ -67,10 +68,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
         &serde_json::to_string(arch_config.as_ref()).unwrap()
     );
 
-    let llm_provider_endpoint = env::var("LLM_PROVIDER_ENDPOINT")
-        .unwrap_or_else(|_| "http://localhost:12001/v1/chat/completions".to_string());
+    let llm_provider_url = env::var("LLM_PROVIDER_ENDPOINT")
+        .unwrap_or_else(|_| "http://localhost:12001".to_string());
 
-    info!("llm provider endpoint: {}", llm_provider_endpoint);
+    info!("llm provider url: {}", llm_provider_url);
     info!("listening on http://{}", bind_address);
     let listener = TcpListener::bind(bind_address).await?;
 
@@ -88,7 +89,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
 
     let router_service: Arc<RouterService> = Arc::new(RouterService::new(
         arch_config.llm_providers.clone(),
-        llm_provider_endpoint.clone(),
+        llm_provider_url.clone() + CHAT_COMPLETIONS_PATH,
         routing_model_name,
         routing_llm_provider,
     ));
@@ -99,19 +100,21 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
         let io = TokioIo::new(stream);
 
         let router_service: Arc<RouterService> = Arc::clone(&router_service);
-        let llm_provider_endpoint = llm_provider_endpoint.clone();
+        let llm_provider_url = llm_provider_url.clone();
 
         let llm_providers = llm_providers.clone();
         let service = service_fn(move |req| {
+
             let router_service = Arc::clone(&router_service);
             let parent_cx = extract_context_from_request(&req);
-            let llm_provider_endpoint = llm_provider_endpoint.clone();
+            let llm_provider_url = llm_provider_url.clone();
             let llm_providers = llm_providers.clone();
 
             async move {
                 match (req.method(), req.uri().path()) {
-                    (&Method::POST, "/v1/chat/completions") => {
-                        chat_completions(req, router_service, llm_provider_endpoint)
+                    (&Method::POST, CHAT_COMPLETIONS_PATH | MESSAGES_PATH) => {
+                        let fully_qualified_url = format!("{}{}", llm_provider_url, req.uri().path());
+                        chat(req, router_service, fully_qualified_url)
                             .with_context(parent_cx)
                             .await
                     }
diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs
index 20d2623b..93f4fd38 100644
--- a/crates/common/src/configuration.rs
+++ b/crates/common/src/configuration.rs
@@ -149,8 +149,8 @@ pub struct EmbeddingProviver {
 pub enum LlmProviderType {
     #[serde(rename = "arch")]
     Arch,
-    #[serde(rename = "claude")]
-    Claude,
+    #[serde(rename = "anthropic")]
+    Anthropic,
     #[serde(rename = "deepseek")]
     Deepseek,
     #[serde(rename = "groq")]
@@ -167,7 +167,7 @@ impl Display for LlmProviderType {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
             LlmProviderType::Arch => write!(f, "arch"),
-            LlmProviderType::Claude => write!(f, "claude"),
+            LlmProviderType::Anthropic => write!(f, "anthropic"),
             LlmProviderType::Deepseek => write!(f, "deepseek"),
             LlmProviderType::Groq => write!(f, "groq"),
             LlmProviderType::Gemini => write!(f, "gemini"),
diff --git a/crates/common/src/consts.rs b/crates/common/src/consts.rs
index 3ff2ce5e..0eb5a036 100644
--- a/crates/common/src/consts.rs
+++ b/crates/common/src/consts.rs
@@ -12,6 +12,7 @@ pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider";
 pub const MESSAGES_KEY: &str = "messages";
 pub const ARCH_PROVIDER_HINT_HEADER: &str = "x-arch-llm-provider-hint";
 pub const CHAT_COMPLETIONS_PATH: &str = "/v1/chat/completions";
+pub const MESSAGES_PATH: &str = "/v1/messages";
 pub const HEALTHZ_PATH: &str = "/healthz";
 pub const X_ARCH_STATE_HEADER: &str = "x-arch-state";
 pub const X_ARCH_API_RESPONSE: &str = "x-arch-api-response-message";
diff --git a/crates/common/src/routing.rs b/crates/common/src/routing.rs
index f4baf896..2e9bac09 100644
--- a/crates/common/src/routing.rs
+++ b/crates/common/src/routing.rs
@@ -33,6 +33,7 @@ pub fn get_llm_provider(
         return provider;
     }
 
+
     if llm_providers.default().is_some() {
         return llm_providers.default().unwrap();
     }
diff --git a/crates/common/src/tokenizer.rs b/crates/common/src/tokenizer.rs
index ded885d5..9b11f639 100644
--- a/crates/common/src/tokenizer.rs
+++ b/crates/common/src/tokenizer.rs
@@ -2,7 +2,7 @@ use log::debug;
 
 #[allow(dead_code)]
 pub fn token_count(model_name: &str, text: &str) -> Result<usize, String> {
-    debug!("getting token count model={}", model_name);
+    debug!("TOKENIZER: computing token count for model={}", model_name);
     //HACK: add support for tokenizing mistral and other models
     //filed issue https://github.com/katanemo/arch/issues/222
 
diff --git a/crates/hermesllm/src/apis/anthropic.rs b/crates/hermesllm/src/apis/anthropic.rs
index 0ffe4e8d..ae61e2fe 100644
--- a/crates/hermesllm/src/apis/anthropic.rs
+++ b/crates/hermesllm/src/apis/anthropic.rs
@@ -1,9 +1,14 @@
+use crate::providers::response::TokenUsage;
 use serde::{Deserialize, Serialize};
 use serde_json::Value;
 use serde_with::skip_serializing_none;
 use std::collections::HashMap;
 
 use super::ApiDefinition;
+use crate::providers::request::{ProviderRequest, ProviderRequestError};
+use crate::providers::response::{ProviderResponse, ProviderStreamResponse};
+use crate::clients::transformer::ExtractText;
+use crate::{MESSAGES_PATH};
 
 // Enum for all supported Anthropic APIs
 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
@@ -17,13 +22,13 @@ pub enum AnthropicApi {
 impl ApiDefinition for AnthropicApi {
     fn endpoint(&self) -> &'static str {
         match self {
-            AnthropicApi::Messages => "/v1/messages",
+            AnthropicApi::Messages => MESSAGES_PATH,
         }
     }
 
     fn from_endpoint(endpoint: &str) -> Option<Self> {
         match endpoint {
-            "/v1/messages" => Some(AnthropicApi::Messages),
+            MESSAGES_PATH => Some(AnthropicApi::Messages),
             _ => None,
         }
     }
@@ -186,6 +191,19 @@ pub enum MessagesContentBlock {
     },
 }
 
+impl ExtractText for Vec<MessagesContentBlock> {
+    fn extract_text(&self) -> String {
+        self.iter()
+            .filter_map(|block| match block {
+                MessagesContentBlock::Text { text } => Some(text.as_str()),
+                _ => None,
+            })
+            .collect::<Vec<_>>()
+            .join("\n")
+    }
+}
+
+
 #[derive(Serialize, Deserialize, Debug, Clone)]
 #[serde(rename_all = "snake_case")]
 pub enum MessagesImageSource {
@@ -220,6 +238,15 @@ pub enum MessagesMessageContent {
     Blocks(Vec<MessagesContentBlock>),
 }
 
+impl ExtractText for MessagesMessageContent {
+    fn extract_text(&self) -> String {
+        match self {
+            MessagesMessageContent::Single(text) => text.clone(),
+            MessagesMessageContent::Blocks(parts) => parts.extract_text()
+        }
+    }
+}
+
 #[derive(Serialize, Deserialize, Debug, Clone)]
 #[serde(untagged)]
 pub enum MessagesSystemPrompt {
@@ -369,6 +396,121 @@ impl MessagesRequest {
     }
 }
 
+impl TryFrom<&[u8]> for MessagesRequest {
+    type Error = serde_json::Error;
+
+    fn try_from(bytes: &[u8]) -> Result<Self, Self::Error> {
+        serde_json::from_slice(bytes)
+    }
+}
+
+impl TokenUsage for MessagesResponse {
+    fn completion_tokens(&self) -> usize {
+        self.usage.output_tokens as usize
+    }
+    fn prompt_tokens(&self) -> usize {
+        self.usage.input_tokens as usize
+    }
+    fn total_tokens(&self) -> usize {
+        (self.usage.input_tokens + self.usage.output_tokens) as usize
+    }
+}
+
+impl ProviderResponse for MessagesResponse {
+    fn usage(&self) -> Option<&dyn TokenUsage> {
+        Some(self)
+    }
+    fn extract_usage_counts(&self) -> Option<(usize, usize, usize)> {
+        Some((self.usage.input_tokens as usize, self.usage.output_tokens as usize, (self.usage.input_tokens + self.usage.output_tokens) as usize))
+    }
+}
+
+impl ProviderRequest for MessagesRequest {
+    fn model(&self) -> &str {
+        &self.model
+    }
+
+    fn set_model(&mut self, model: String) {
+        self.model = model;
+    }
+
+    fn is_streaming(&self) -> bool {
+        self.stream.unwrap_or(false)
+    }
+
+    fn extract_messages_text(&self) -> String {
+        let mut text_parts = Vec::new();
+
+        // Include system prompt if present
+        if let Some(system) = &self.system {
+            match system {
+                MessagesSystemPrompt::Single(s) => text_parts.push(s.clone()),
+                MessagesSystemPrompt::Blocks(blocks) => {
+                    for block in blocks {
+                        if let MessagesContentBlock::Text { text } = block {
+                            text_parts.push(text.clone());
+                        }
+                    }
+                }
+            }
+        }
+
+        // Extract text from all messages
+        for message in &self.messages {
+            match &message.content {
+                MessagesMessageContent::Single(text) => text_parts.push(text.clone()),
+                MessagesMessageContent::Blocks(blocks) => {
+                    for block in blocks {
+                        if let MessagesContentBlock::Text { text } = block {
+                            text_parts.push(text.clone());
+                        }
+                    }
+                }
+            }
+        }
+
+        text_parts.join(" ")
+    }
+
+    fn get_recent_user_message(&self) -> Option<String> {
+        // Find the most recent user message
+        for message in self.messages.iter().rev() {
+            if message.role == MessagesRole::User {
+                match &message.content {
+                    MessagesMessageContent::Single(text) => return Some(text.clone()),
+                    MessagesMessageContent::Blocks(blocks) => {
+                        for block in blocks {
+                            if let MessagesContentBlock::Text { text } = block {
+                                return Some(text.clone());
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        None
+    }
+
+    fn to_bytes(&self) -> Result<Vec<u8>, ProviderRequestError> {
+        serde_json::to_vec(self).map_err(|e| ProviderRequestError {
+            message: format!("Failed to serialize MessagesRequest: {}", e),
+            source: Some(Box::new(e)),
+        })
+    }
+
+    fn metadata(&self) -> &Option<HashMap<String, Value>> {
+       return  &self.metadata;
+    }
+
+    fn remove_metadata_key(&mut self, key: &str) -> bool {
+        if let Some(ref mut metadata) = self.metadata {
+            metadata.remove(key).is_some()
+        } else {
+            false
+        }
+    }
+}
+
 impl MessagesResponse {
     pub fn api_type() -> AnthropicApi {
         AnthropicApi::Messages
@@ -381,6 +523,54 @@ impl MessagesStreamEvent {
     }
 }
 
+impl MessagesRole {
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            MessagesRole::User => "user",
+            MessagesRole::Assistant => "assistant",
+        }
+    }
+}
+
+// Implement ProviderStreamResponse for MessagesStreamEvent
+impl ProviderStreamResponse for MessagesStreamEvent {
+    fn content_delta(&self) -> Option<&str> {
+        match self {
+            MessagesStreamEvent::ContentBlockDelta { delta, .. } => {
+                if let MessagesContentDelta::TextDelta { text } = delta {
+                    Some(text)
+                } else {
+                    None
+                }
+            }
+            _ => None,
+        }
+    }
+
+    fn is_final(&self) -> bool {
+        matches!(self, MessagesStreamEvent::MessageStop)
+    }
+
+    fn role(&self) -> Option<&str> {
+        match self {
+            MessagesStreamEvent::MessageStart { message } => Some(message.role.as_str()),
+            _ => None,
+        }
+    }
+
+    fn event_type(&self) -> Option<&str> {
+        Some(match self {
+            MessagesStreamEvent::MessageStart { .. } => "message_start",
+            MessagesStreamEvent::ContentBlockStart { .. } => "content_block_start",
+            MessagesStreamEvent::ContentBlockDelta { .. } => "content_block_delta",
+            MessagesStreamEvent::ContentBlockStop { .. } => "content_block_stop",
+            MessagesStreamEvent::MessageDelta { .. } => "message_delta",
+            MessagesStreamEvent::MessageStop => "message_stop",
+            MessagesStreamEvent::Ping => "ping",
+        })
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -878,13 +1068,13 @@ mod tests {
         let api = AnthropicApi::Messages;
 
         // Test trait methods
-        assert_eq!(api.endpoint(), "/v1/messages");
+        assert_eq!(api.endpoint(), MESSAGES_PATH);
         assert!(api.supports_streaming());
         assert!(api.supports_tools());
         assert!(api.supports_vision());
 
         // Test from_endpoint trait method
-        let found_api = AnthropicApi::from_endpoint("/v1/messages");
+        let found_api = AnthropicApi::from_endpoint(MESSAGES_PATH);
         assert_eq!(found_api, Some(AnthropicApi::Messages));
 
         let not_found = AnthropicApi::from_endpoint("/v1/unknown");
diff --git a/crates/hermesllm/src/apis/mod.rs b/crates/hermesllm/src/apis/mod.rs
index 78b634d5..b175988c 100644
--- a/crates/hermesllm/src/apis/mod.rs
+++ b/crates/hermesllm/src/apis/mod.rs
@@ -1,110 +1,9 @@
 pub mod anthropic;
 pub mod openai;
-
-// Re-export all types for convenience
 pub use anthropic::*;
 pub use openai::*;
 
-/// Common trait that all API definitions must implement
-///
-/// This trait ensures consistency across different AI provider API definitions
-/// and makes it easy to add new providers like Gemini, Claude, etc.
-///
-/// Note: This is different from the `ApiProvider` enum in `clients::endpoints`
-/// which represents provider identification, while this trait defines API capabilities.
-///
-/// # Benefits
-///
-/// - **Consistency**: All API providers implement the same interface
-/// - **Extensibility**: Easy to add new providers without breaking existing code
-/// - **Type Safety**: Compile-time guarantees that all providers implement required methods
-/// - **Discoverability**: Clear documentation of what capabilities each API supports
-///
-/// # Example implementation for a new provider:
-///
-/// ```rust,ignore
-/// use serde::{Deserialize, Serialize};
-/// use super::ApiDefinition;
-///
-/// #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
-/// pub enum GeminiApi {
-///     GenerateContent,
-///     ChatCompletions,
-/// }
-///
-/// impl GeminiApi {
-///     pub fn endpoint(&self) -> &'static str {
-///         match self {
-///             GeminiApi::GenerateContent => "/v1/models/gemini-pro:generateContent",
-///             GeminiApi::ChatCompletions => "/v1/models/gemini-pro:chat",
-///         }
-///     }
-///
-///     pub fn from_endpoint(endpoint: &str) -> Option<Self> {
-///         match endpoint {
-///             "/v1/models/gemini-pro:generateContent" => Some(GeminiApi::GenerateContent),
-///             "/v1/models/gemini-pro:chat" => Some(GeminiApi::ChatCompletions),
-///             _ => None,
-///         }
-///     }
-///
-///     pub fn supports_streaming(&self) -> bool {
-///         match self {
-///             GeminiApi::GenerateContent => true,
-///             GeminiApi::ChatCompletions => true,
-///         }
-///     }
-///
-///     pub fn supports_tools(&self) -> bool {
-///         match self {
-///             GeminiApi::GenerateContent => true,
-///             GeminiApi::ChatCompletions => false,
-///         }
-///     }
-///
-///     pub fn supports_vision(&self) -> bool {
-///         match self {
-///             GeminiApi::GenerateContent => true,
-///             GeminiApi::ChatCompletions => false,
-///         }
-///     }
-/// }
-///
-/// impl ApiDefinition for GeminiApi {
-///     fn endpoint(&self) -> &'static str {
-///         self.endpoint()
-///     }
-///
-///     fn from_endpoint(endpoint: &str) -> Option<Self> {
-///         Self::from_endpoint(endpoint)
-///     }
-///
-///     fn supports_streaming(&self) -> bool {
-///         self.supports_streaming()
-///     }
-///
-///     fn supports_tools(&self) -> bool {
-///         self.supports_tools()
-///     }
-///
-///     fn supports_vision(&self) -> bool {
-///         self.supports_vision()
-///     }
-/// }
-///
-/// // Now you can use generic code that works with any API:
-/// fn print_api_info<T: ApiDefinition>(api: &T) {
-///     println!("Endpoint: {}", api.endpoint());
-///     println!("Supports streaming: {}", api.supports_streaming());
-///     println!("Supports tools: {}", api.supports_tools());
-///     println!("Supports vision: {}", api.supports_vision());
-/// }
-///
-/// // Works with both OpenAI and Anthropic (and future Gemini)
-/// print_api_info(&OpenAIApi::ChatCompletions);
-/// print_api_info(&AnthropicApi::Messages);
-/// print_api_info(&GeminiApi::GenerateContent);
-/// ```
+
 pub trait ApiDefinition {
     /// Returns the endpoint path for this API
     fn endpoint(&self) -> &'static str;
@@ -132,6 +31,7 @@ pub trait ApiDefinition {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::{CHAT_COMPLETIONS_PATH, MESSAGES_PATH};
 
     #[test]
     fn test_generic_api_functionality() {
@@ -150,8 +50,8 @@ mod tests {
     fn test_api_detection_from_endpoints() {
         // Test that we can detect APIs from endpoints using the trait
         let endpoints = vec![
-            "/v1/chat/completions",
-            "/v1/messages",
+            CHAT_COMPLETIONS_PATH,
+            MESSAGES_PATH,
             "/v1/unknown"
         ];
 
diff --git a/crates/hermesllm/src/apis/openai.rs b/crates/hermesllm/src/apis/openai.rs
index 2471fc35..7e89acd2 100644
--- a/crates/hermesllm/src/apis/openai.rs
+++ b/crates/hermesllm/src/apis/openai.rs
@@ -5,11 +5,11 @@ use std::collections::HashMap;
 use std::fmt::Display;
 use thiserror::Error;
 
-
-
 use crate::providers::request::{ProviderRequest, ProviderRequestError};
-use crate::providers::response::{ProviderResponse, ProviderStreamResponse, TokenUsage, SseStreamIter};
+use crate::providers::response::{ProviderResponse, ProviderStreamResponse, TokenUsage};
 use super::ApiDefinition;
+use crate::clients::transformer::{ExtractText};
+use crate::{CHAT_COMPLETIONS_PATH};
 
 // ============================================================================
 // OPENAI API ENUMERATION
@@ -28,13 +28,13 @@ pub enum OpenAIApi {
 impl ApiDefinition for OpenAIApi {
     fn endpoint(&self) -> &'static str {
         match self {
-            OpenAIApi::ChatCompletions => "/v1/chat/completions",
+            OpenAIApi::ChatCompletions => CHAT_COMPLETIONS_PATH,
         }
     }
 
     fn from_endpoint(endpoint: &str) -> Option<Self> {
         match endpoint {
-            "/v1/chat/completions" => Some(OpenAIApi::ChatCompletions),
+            CHAT_COMPLETIONS_PATH => Some(OpenAIApi::ChatCompletions),
             _ => None,
         }
     }
@@ -81,7 +81,7 @@ pub struct ChatCompletionsRequest {
     // Maximum tokens in the response has been deprecated, but we keep it for compatibility
     pub max_tokens: Option<u32>,
     pub modalities: Option<Vec<String>>,
-    pub metadata: Option<HashMap<String, String>>,
+    pub metadata: Option<HashMap<String, Value>>,
     pub n: Option<u32>,
     pub presence_penalty: Option<f32>,
     pub parallel_tool_calls: Option<bool>,
@@ -174,6 +174,28 @@ pub enum MessageContent {
     Parts(Vec<ContentPart>),
 }
 
+// Content Extraction
+impl ExtractText for MessageContent {
+    fn extract_text(&self) -> String {
+        match self {
+            MessageContent::Text(text) => text.clone(),
+            MessageContent::Parts(parts) => parts.extract_text()
+        }
+    }
+}
+
+impl ExtractText for Vec<ContentPart> {
+    fn extract_text(&self) -> String {
+        self.iter()
+            .filter_map(|part| match part {
+                ContentPart::Text { text } => Some(text.as_str()),
+                _ => None,
+            })
+            .collect::<Vec<_>>()
+            .join("\n")
+    }
+}
+
 impl Display for MessageContent {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
@@ -328,6 +350,7 @@ pub struct ChatCompletionsResponse {
     pub choices: Vec<Choice>,
     pub usage: Usage,
     pub system_fingerprint: Option<String>,
+    pub service_tier: Option<String>,
 }
 
 /// Finish reason for completion
@@ -576,6 +599,18 @@ impl ProviderRequest for ChatCompletionsRequest {
             source: Some(Box::new(e)),
         })
     }
+
+    fn metadata(&self) -> &Option<HashMap<String, Value>> {
+        return &self.metadata;
+    }
+
+    fn remove_metadata_key(&mut self, key: &str) -> bool {
+        if let Some(ref mut metadata) = self.metadata {
+            metadata.remove(key).is_some()
+        } else {
+            false
+        }
+    }
 }
 
 /// Implementation of ProviderResponse for ChatCompletionsResponse
@@ -593,68 +628,6 @@ impl ProviderResponse for ChatCompletionsResponse {
     }
 }
 
-// ============================================================================
-// OPENAI SSE STREAMING ITERATOR
-// ============================================================================
-
-/// OpenAI-specific SSE streaming iterator
-/// Handles OpenAI's specific SSE format and ChatCompletionsStreamResponse parsing
-pub struct OpenAISseIter<I>
-where
-    I: Iterator,
-    I::Item: AsRef<str>,
-{
-    sse_stream: SseStreamIter<I>,
-}
-
-impl<I> OpenAISseIter<I>
-where
-    I: Iterator,
-    I::Item: AsRef<str>,
-{
-    pub fn new(sse_stream: SseStreamIter<I>) -> Self {
-        Self { sse_stream }
-    }
-}
-
-impl<I> Iterator for OpenAISseIter<I>
-where
-    I: Iterator,
-    I::Item: AsRef<str>,
-{
-    type Item = Result<Box<dyn ProviderStreamResponse>, Box<dyn std::error::Error + Send + Sync>>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        for line in &mut self.sse_stream.lines {
-            let line = line.as_ref();
-            if line.is_empty() {
-                continue;
-            }
-
-            if line.starts_with("data: ") {
-                let data = &line[6..]; // Remove "data: " prefix
-                if data == "[DONE]" {
-                    return None;
-                }
-
-                // Skip ping messages (usually from other providers, but handle gracefully)
-                if data == r#"{"type": "ping"}"# {
-                    continue;
-                }
-
-                // OpenAI-specific parsing of ChatCompletionsStreamResponse
-                match serde_json::from_str::<ChatCompletionsStreamResponse>(data) {
-                    Ok(response) => return Some(Ok(Box::new(response))),
-                    Err(e) => return Some(Err(Box::new(
-                        OpenAIStreamError::InvalidStreamingData(format!("Error parsing OpenAI streaming data: {}, data: {}", e, data))
-                    ))),
-                }
-            }
-        }
-        None
-    }
-}
-
 // Direct implementation of ProviderStreamResponse trait on ChatCompletionsStreamResponse
 impl ProviderStreamResponse for ChatCompletionsStreamResponse {
     fn content_delta(&self) -> Option<&str> {
@@ -680,6 +653,10 @@ impl ProviderStreamResponse for ChatCompletionsStreamResponse {
                 Role::Tool => "tool",
             }))
     }
+
+    fn event_type(&self) -> Option<&str> {
+        None // OpenAI doesn't use event types in SSE
+    }
 }
 
 
@@ -982,13 +959,13 @@ mod tests {
         let api = OpenAIApi::ChatCompletions;
 
         // Test trait methods
-        assert_eq!(api.endpoint(), "/v1/chat/completions");
+        assert_eq!(api.endpoint(), CHAT_COMPLETIONS_PATH);
         assert!(api.supports_streaming());
         assert!(api.supports_tools());
         assert!(api.supports_vision());
 
         // Test from_endpoint
-        let found_api = OpenAIApi::from_endpoint("/v1/chat/completions");
+        let found_api = OpenAIApi::from_endpoint(CHAT_COMPLETIONS_PATH);
         assert_eq!(found_api, Some(OpenAIApi::ChatCompletions));
 
         let not_found = OpenAIApi::from_endpoint("/v1/unknown");
@@ -1139,4 +1116,84 @@ mod tests {
         let invalid_result: Result<ToolChoice, _> = serde_json::from_value(json!("invalid"));
         assert!(invalid_result.is_err());
     }
+
+    #[test]
+    fn test_chat_completions_response_with_service_tier() {
+        // Test that ChatCompletionsResponse can deserialize OpenAI responses with service_tier field
+        let json_response = r#"{
+            "id": "chatcmpl-CAJc2Df6QCc7Mv3RP0Cf2xlbDV1x2",
+            "object": "chat.completion",
+            "created": 1756574706,
+            "model": "gpt-4o-2024-08-06",
+            "choices": [{
+                "index": 0,
+                "message": {
+                    "role": "assistant",
+                    "content": "Test response content",
+                    "annotations": []
+                },
+                "finish_reason": "stop"
+            }],
+            "usage": {
+                "prompt_tokens": 65,
+                "completion_tokens": 184,
+                "total_tokens": 249,
+                "prompt_tokens_details": {
+                    "cached_tokens": 0,
+                    "audio_tokens": 0
+                },
+                "completion_tokens_details": {
+                    "reasoning_tokens": 0,
+                    "audio_tokens": 0,
+                    "accepted_prediction_tokens": 0,
+                    "rejected_prediction_tokens": 0
+                }
+            },
+            "service_tier": "default",
+            "system_fingerprint": "fp_f33640a400"
+        }"#;
+
+        let response: ChatCompletionsResponse = serde_json::from_str(json_response).unwrap();
+
+        assert_eq!(response.id, "chatcmpl-CAJc2Df6QCc7Mv3RP0Cf2xlbDV1x2");
+        assert_eq!(response.object, "chat.completion");
+        assert_eq!(response.created, 1756574706);
+        assert_eq!(response.model, "gpt-4o-2024-08-06");
+        assert_eq!(response.service_tier, Some("default".to_string()));
+        assert_eq!(response.system_fingerprint, Some("fp_f33640a400".to_string()));
+        assert_eq!(response.choices.len(), 1);
+        assert_eq!(response.usage.prompt_tokens, 65);
+        assert_eq!(response.usage.completion_tokens, 184);
+        assert_eq!(response.usage.total_tokens, 249);
+    }
+
+    #[test]
+    fn test_chat_completions_response_without_service_tier() {
+        // Test that ChatCompletionsResponse can deserialize responses without service_tier (backward compatibility)
+        let json_response = r#"{
+            "id": "chatcmpl-123",
+            "object": "chat.completion",
+            "created": 1234567890,
+            "model": "gpt-4",
+            "choices": [{
+                "index": 0,
+                "message": {
+                    "role": "assistant",
+                    "content": "Test response"
+                },
+                "finish_reason": "stop"
+            }],
+            "usage": {
+                "prompt_tokens": 10,
+                "completion_tokens": 20,
+                "total_tokens": 30
+            }
+        }"#;
+
+        let response: ChatCompletionsResponse = serde_json::from_str(json_response).unwrap();
+
+        assert_eq!(response.id, "chatcmpl-123");
+        assert_eq!(response.service_tier, None); // Should be None when not present
+        assert_eq!(response.system_fingerprint, None);
+    }
 }
diff --git a/crates/hermesllm/src/clients/endpoints.rs b/crates/hermesllm/src/clients/endpoints.rs
index bf0648a9..5af51fe0 100644
--- a/crates/hermesllm/src/clients/endpoints.rs
+++ b/crates/hermesllm/src/clients/endpoints.rs
@@ -6,12 +6,13 @@
 //! # Examples
 //!
 //! ```rust
-//! use hermesllm::clients::endpoints::{is_supported_endpoint, supported_endpoints};
+//! use hermesllm::clients::endpoints::supported_endpoints;
 //!
 //! // Check if we support an endpoint
-//! assert!(is_supported_endpoint("/v1/chat/completions"));
-//! assert!(is_supported_endpoint("/v1/messages"));
-//! assert!(!is_supported_endpoint("/v1/unknown"));
+//! use hermesllm::clients::endpoints::SupportedAPIs;
+//! assert!(SupportedAPIs::from_endpoint("/v1/chat/completions").is_some());
+//! assert!(SupportedAPIs::from_endpoint("/v1/messages").is_some());
+//! assert!(!SupportedAPIs::from_endpoint("/v1/unknown").is_some());
 //!
 //! // Get all supported endpoints
 //! let endpoints = supported_endpoints();
@@ -20,23 +21,81 @@
 //! assert!(endpoints.contains(&"/v1/messages"));
 //! ```
 
-use crate::apis::{AnthropicApi, OpenAIApi, ApiDefinition};
+use crate::{apis::{AnthropicApi, ApiDefinition, OpenAIApi}, ProviderId};
+use std::fmt;
 
-/// Check if the given endpoint path is supported
-pub fn is_supported_endpoint(endpoint: &str) -> bool {
-    // Try OpenAI APIs
-    if OpenAIApi::from_endpoint(endpoint).is_some() {
-        return true;
-    }
-
-    // Try Anthropic APIs
-    if AnthropicApi::from_endpoint(endpoint).is_some() {
-        return true;
-    }
-
-    false
+/// Unified enum representing all supported API endpoints across providers
+#[derive(Debug, Clone, PartialEq)]
+pub enum SupportedAPIs {
+    OpenAIChatCompletions(OpenAIApi),
+    AnthropicMessagesAPI(AnthropicApi),
 }
 
+impl fmt::Display for SupportedAPIs {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            SupportedAPIs::OpenAIChatCompletions(api) => write!(f, "OpenAI API ({})", api.endpoint()),
+            SupportedAPIs::AnthropicMessagesAPI(api) => write!(f, "Anthropic API ({})", api.endpoint()),
+        }
+    }
+}
+
+impl SupportedAPIs {
+    /// Create a SupportedApi from an endpoint path
+    pub fn from_endpoint(endpoint: &str) -> Option<Self> {
+        if let Some(openai_api) = OpenAIApi::from_endpoint(endpoint) {
+            return Some(SupportedAPIs::OpenAIChatCompletions(openai_api));
+        }
+
+        if let Some(anthropic_api) = AnthropicApi::from_endpoint(endpoint) {
+            return Some(SupportedAPIs::AnthropicMessagesAPI(anthropic_api));
+        }
+
+        None
+    }
+
+    /// Get the endpoint path for this API
+    pub fn endpoint(&self) -> &'static str {
+        match self {
+            SupportedAPIs::OpenAIChatCompletions(api) => api.endpoint(),
+            SupportedAPIs::AnthropicMessagesAPI(api) => api.endpoint(),
+        }
+    }
+
+    pub fn target_endpoint_for_provider(&self, provider_id: &ProviderId, request_path: &str) -> String {
+        let default_endpoint = "/v1/chat/completions".to_string();
+        match self {
+            SupportedAPIs::AnthropicMessagesAPI(AnthropicApi::Messages) => {
+                match provider_id {
+                    ProviderId::Anthropic => "/v1/messages".to_string(),
+                    _ => default_endpoint,
+                }
+            }
+            _ => {
+                match provider_id {
+                    ProviderId::Groq => {
+                        if request_path.starts_with("/v1/") {
+                            format!("/openai{}", request_path)
+                        } else {
+                            default_endpoint
+                        }
+                    }
+                    ProviderId::Gemini => {
+                        if request_path.starts_with("/v1/") {
+                            "/v1beta/openai/chat/completions".to_string()
+                        } else {
+                            default_endpoint
+                        }
+                    }
+                    _ => default_endpoint,
+                }
+            }
+        }
+    }
+}
+
+
+
 /// Get all supported endpoint paths
 pub fn supported_endpoints() -> Vec<&'static str> {
     let mut endpoints = Vec::new();
@@ -74,15 +133,15 @@ mod tests {
     #[test]
     fn test_is_supported_endpoint() {
         // OpenAI endpoints
-        assert!(is_supported_endpoint("/v1/chat/completions"));
+        assert!(SupportedAPIs::from_endpoint("/v1/chat/completions").is_some());
 
         // Anthropic endpoints
-        assert!(is_supported_endpoint("/v1/messages"));
+        assert!(SupportedAPIs::from_endpoint("/v1/messages").is_some());
 
         // Unsupported endpoints
-        assert!(!is_supported_endpoint("/v1/unknown"));
-        assert!(!is_supported_endpoint("/v2/chat"));
-        assert!(!is_supported_endpoint(""));
+        assert!(!SupportedAPIs::from_endpoint("/v1/unknown").is_some());
+        assert!(!SupportedAPIs::from_endpoint("/v2/chat").is_some());
+        assert!(!SupportedAPIs::from_endpoint("").is_some());
     }
 
     #[test]
diff --git a/crates/hermesllm/src/clients/mod.rs b/crates/hermesllm/src/clients/mod.rs
index eb3032ce..73972445 100644
--- a/crates/hermesllm/src/clients/mod.rs
+++ b/crates/hermesllm/src/clients/mod.rs
@@ -4,6 +4,6 @@ pub mod endpoints;
 
 // Re-export the main items for easier access
 pub use lib::*;
-pub use endpoints::{is_supported_endpoint, supported_endpoints, identify_provider};
+pub use endpoints::{SupportedAPIs, identify_provider};
 
 // Note: transformer module contains TryFrom trait implementations that are automatically available
diff --git a/crates/hermesllm/src/clients/transformer.rs b/crates/hermesllm/src/clients/transformer.rs
index 23ca26ee..8170a53d 100644
--- a/crates/hermesllm/src/clients/transformer.rs
+++ b/crates/hermesllm/src/clients/transformer.rs
@@ -44,8 +44,6 @@
 
 use serde_json::Value;
 use std::time::{SystemTime, UNIX_EPOCH};
-
-// Import centralized types
 use crate::apis::*;
 use super::TransformError;
 
@@ -61,7 +59,7 @@ const DEFAULT_MAX_TOKENS: u32 = 4096;
 // ============================================================================
 
 /// Trait for extracting text content from various types
-trait ExtractText {
+pub trait ExtractText {
     fn extract_text(&self) -> String;
 }
 
@@ -213,6 +211,7 @@ impl TryFrom<MessagesResponse> for ChatCompletionsResponse {
             choices: vec![choice],
             usage,
             system_fingerprint: None,
+            service_tier: None,
         })
     }
 }
@@ -541,40 +540,6 @@ impl Into<Role> for MessagesRole {
     }
 }
 
-// Content Extraction
-impl ExtractText for MessageContent {
-    fn extract_text(&self) -> String {
-        match self {
-            MessageContent::Text(text) => text.clone(),
-            MessageContent::Parts(parts) => parts.extract_text()
-        }
-    }
-}
-
-impl ExtractText for Vec<ContentPart> {
-    fn extract_text(&self) -> String {
-        self.iter()
-            .filter_map(|part| match part {
-                ContentPart::Text { text } => Some(text.as_str()),
-                _ => None,
-            })
-            .collect::<Vec<_>>()
-            .join("\n")
-    }
-}
-
-impl ExtractText for Vec<MessagesContentBlock> {
-    fn extract_text(&self) -> String {
-        self.iter()
-            .filter_map(|block| match block {
-                MessagesContentBlock::Text { text } => Some(text.as_str()),
-                _ => None,
-            })
-            .collect::<Vec<_>>()
-            .join("\n")
-    }
-}
-
 // Content Utilities
 impl ContentUtils<ToolCall> for Vec<MessagesContentBlock> {
     fn extract_tool_calls(&self) -> Result<Option<Vec<ToolCall>>, TransformError> {
diff --git a/crates/hermesllm/src/lib.rs b/crates/hermesllm/src/lib.rs
index b4ad9932..a9e8c48e 100644
--- a/crates/hermesllm/src/lib.rs
+++ b/crates/hermesllm/src/lib.rs
@@ -4,12 +4,16 @@
 pub mod providers;
 pub mod apis;
 pub mod clients;
-
 // Re-export important types and traits
 pub use providers::request::{ProviderRequestType, ProviderRequest, ProviderRequestError};
-pub use providers::response::{ProviderResponseType, ProviderResponse, ProviderStreamResponse, ProviderStreamResponseIter, ProviderResponseError, TokenUsage};
+pub use providers::response::{ProviderResponseType, ProviderStreamResponseType, ProviderResponse, ProviderStreamResponse, ProviderResponseError, TokenUsage, SseEvent, SseStreamIter};
 pub use providers::id::ProviderId;
-pub use providers::adapters::{has_compatible_api, supported_apis};
+
+
+//TODO: Refactor such that commons doesn't depend on Hermes. For now this will clean up strings
+pub const CHAT_COMPLETIONS_PATH: &str = "/v1/chat/completions";
+pub const MESSAGES_PATH: &str = "/v1/messages";
+
 
 #[cfg(test)]
 mod tests {
@@ -23,72 +27,51 @@ mod tests {
         assert_eq!(ProviderId::from("arch"), ProviderId::Arch);
     }
 
-    #[test]
-    fn test_provider_api_compatibility() {
-        assert!(has_compatible_api(&ProviderId::OpenAI, "/v1/chat/completions"));
-        assert!(!has_compatible_api(&ProviderId::OpenAI, "/v1/embeddings"));
-    }
-
-    #[test]
-    fn test_provider_supported_apis() {
-        let apis = supported_apis(&ProviderId::OpenAI);
-        assert!(apis.contains(&"/v1/chat/completions"));
-
-        // Test that provider supports the expected API endpoints
-        assert!(has_compatible_api(&ProviderId::OpenAI, "/v1/chat/completions"));
-    }
-
-    #[test]
-    fn test_provider_request_parsing() {
-        // Test with a sample JSON request
-        let json_request = r#"{
-            "model": "gpt-4",
-            "messages": [
-                {
-                    "role": "system",
-                    "content": "You are a helpful assistant"
-                },
-                {
-                    "role": "user",
-                    "content": "Hello!"
-                }
-            ]
-        }"#;
-
-        let result: Result<ProviderRequestType, std::io::Error> = ProviderRequestType::try_from(json_request.as_bytes());
-        assert!(result.is_ok());
-
-        let request = result.unwrap();
-        assert_eq!(request.model(), "gpt-4");
-        assert_eq!(request.get_recent_user_message(), Some("Hello!".to_string()));
-    }
-
     #[test]
     fn test_provider_streaming_response() {
         // Test streaming response parsing with sample SSE data
-        let sse_data = r#"data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"gpt-4","choices":[{"index":0,"delta":{"role":"assistant","content":"Hello"},"finish_reason":null}]}
+    let sse_data = r#"data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"gpt-4","choices":[{"index":0,"delta":{"role":"assistant","content":"Hello"},"finish_reason":null}]}
 
-data: [DONE]
-"#;
+    data: [DONE]
+    "#;
 
-        let result = ProviderStreamResponseIter::try_from((sse_data.as_bytes(), &ProviderId::OpenAI));
-        assert!(result.is_ok());
+    use crate::clients::endpoints::SupportedAPIs;
+    let client_api = SupportedAPIs::OpenAIChatCompletions(crate::apis::OpenAIApi::ChatCompletions);
+    let upstream_api =  SupportedAPIs::OpenAIChatCompletions(crate::apis::OpenAIApi::ChatCompletions);
 
-        let mut streaming_response = result.unwrap();
+    // Test the new simplified architecture - create SseStreamIter directly
+    let sse_iter = SseStreamIter::try_from(sse_data.as_bytes());
+    assert!(sse_iter.is_ok());
 
-        // Test that we can iterate over chunks - it's just an iterator now!
-        let first_chunk = streaming_response.next();
-        assert!(first_chunk.is_some());
+    let mut streaming_iter = sse_iter.unwrap();
 
-        let chunk_result = first_chunk.unwrap();
-        assert!(chunk_result.is_ok());
+    // Test that we can iterate over SseEvents
+    let first_event = streaming_iter.next();
+    assert!(first_event.is_some());
 
-        let chunk = chunk_result.unwrap();
-        assert_eq!(chunk.content_delta(), Some("Hello"));
-        assert!(!chunk.is_final());
+    let sse_event = first_event.unwrap();
 
-        // Test that stream ends properly
-        let final_chunk = streaming_response.next();
-        assert!(final_chunk.is_none());
+    // Test SseEvent properties
+    assert!(!sse_event.is_done());
+    assert!(sse_event.data.as_ref().unwrap().contains("Hello"));
+
+    // Test that we can parse the event into a provider stream response
+    let transformed_event = SseEvent::try_from((sse_event, &client_api, &upstream_api));
+    if let Err(e) = &transformed_event {
+        println!("Transform error: {:?}", e);
+    }
+    assert!(transformed_event.is_ok());
+
+    let transformed_event = transformed_event.unwrap();
+    let provider_response = transformed_event.provider_response();
+    assert!(provider_response.is_ok());
+
+    let stream_response = provider_response.unwrap();
+    assert_eq!(stream_response.content_delta(), Some("Hello"));
+    assert!(!stream_response.is_final());
+
+    // Test that stream ends properly with [DONE] (SseStreamIter should stop before [DONE])
+    let final_event = streaming_iter.next();
+    assert!(final_event.is_none()); // Should be None because iterator stops at [DONE]
     }
 }
diff --git a/crates/hermesllm/src/providers/adapters.rs b/crates/hermesllm/src/providers/adapters.rs
deleted file mode 100644
index a001cf09..00000000
--- a/crates/hermesllm/src/providers/adapters.rs
+++ /dev/null
@@ -1,39 +0,0 @@
-use crate::providers::id::ProviderId;
-
-#[derive(Debug, Clone)]
-pub enum AdapterType {
-    OpenAICompatible,
-    // Future: Claude, Gemini, etc.
-}
-
-/// Provider adapter configuration
-#[derive(Debug, Clone)]
-pub struct ProviderConfig {
-    pub supported_apis: &'static [&'static str],
-    pub adapter_type: AdapterType,
-}
-
-/// Check if provider has compatible API
-pub fn has_compatible_api(provider_id: &ProviderId, api_path: &str) -> bool {
-    let config = get_provider_config(provider_id);
-    config.supported_apis.iter().any(|&supported| supported == api_path)
-}
-
-/// Get supported APIs for provider
-pub fn supported_apis(provider_id: &ProviderId) -> Vec<&'static str> {
-    let config = get_provider_config(provider_id);
-    config.supported_apis.to_vec()
-}
-
-/// Get provider configuration
-pub fn get_provider_config(provider_id: &ProviderId) -> ProviderConfig {
-    match provider_id {
-        ProviderId::OpenAI | ProviderId::Groq | ProviderId::Mistral | ProviderId::Deepseek
-        | ProviderId::Arch | ProviderId::Gemini | ProviderId::Claude | ProviderId::GitHub => {
-            ProviderConfig {
-                supported_apis: &["/v1/chat/completions"],
-                adapter_type: AdapterType::OpenAICompatible,
-            }
-        }
-    }
-}
diff --git a/crates/hermesllm/src/providers/id.rs b/crates/hermesllm/src/providers/id.rs
index 2c0c494e..26933adc 100644
--- a/crates/hermesllm/src/providers/id.rs
+++ b/crates/hermesllm/src/providers/id.rs
@@ -1,4 +1,6 @@
 use std::fmt::Display;
+use crate::clients::endpoints::SupportedAPIs;
+use crate::apis::{OpenAIApi, AnthropicApi};
 
 /// Provider identifier enum - simple enum for identifying providers
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
@@ -8,7 +10,7 @@ pub enum ProviderId {
     Deepseek,
     Groq,
     Gemini,
-    Claude,
+    Anthropic,
     GitHub,
     Arch,
 }
@@ -21,7 +23,7 @@ impl From<&str> for ProviderId {
             "deepseek" => ProviderId::Deepseek,
             "groq" => ProviderId::Groq,
             "gemini" => ProviderId::Gemini,
-            "claude" => ProviderId::Claude,
+            "anthropic" => ProviderId::Anthropic,
             "github" => ProviderId::GitHub,
             "arch" => ProviderId::Arch,
             _ => panic!("Unknown provider: {}", value),
@@ -29,6 +31,21 @@ impl From<&str> for ProviderId {
     }
 }
 
+impl ProviderId {
+    /// Given a client API, return the compatible upstream API for this provider
+    pub fn compatible_api_for_client(&self, client_api: &SupportedAPIs) -> SupportedAPIs {
+        match (self, client_api) {
+            // Claude/Anthropic providers natively support Anthropic APIs
+            (ProviderId::Anthropic, SupportedAPIs::AnthropicMessagesAPI(_)) => SupportedAPIs::AnthropicMessagesAPI(AnthropicApi::Messages),
+            (ProviderId::Anthropic, SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions)) => SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions),
+
+            // OpenAI-compatible providers only support OpenAI chat completions
+            (ProviderId::OpenAI | ProviderId::Groq | ProviderId::Mistral | ProviderId::Deepseek | ProviderId::Arch | ProviderId::Gemini | ProviderId::GitHub, SupportedAPIs::AnthropicMessagesAPI(_)) => SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions),
+            (ProviderId::OpenAI | ProviderId::Groq | ProviderId::Mistral | ProviderId::Deepseek | ProviderId::Arch | ProviderId::Gemini | ProviderId::GitHub, SupportedAPIs::OpenAIChatCompletions(_)) => SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions),
+        }
+    }
+}
+
 impl Display for ProviderId {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
@@ -37,7 +54,7 @@ impl Display for ProviderId {
             ProviderId::Deepseek => write!(f, "Deepseek"),
             ProviderId::Groq => write!(f, "Groq"),
             ProviderId::Gemini => write!(f, "Gemini"),
-            ProviderId::Claude => write!(f, "Claude"),
+            ProviderId::Anthropic => write!(f, "Anthropic"),
             ProviderId::GitHub => write!(f, "GitHub"),
             ProviderId::Arch => write!(f, "Arch"),
         }
diff --git a/crates/hermesllm/src/providers/mod.rs b/crates/hermesllm/src/providers/mod.rs
index 4abccc0c..601af955 100644
--- a/crates/hermesllm/src/providers/mod.rs
+++ b/crates/hermesllm/src/providers/mod.rs
@@ -6,9 +6,7 @@
 pub mod id;
 pub mod request;
 pub mod response;
-pub mod adapters;
 
 pub use id::ProviderId;
 pub use request::{ProviderRequestType, ProviderRequest, ProviderRequestError} ;
-pub use response::{ProviderResponseType, ProviderStreamResponseIter, ProviderResponse, ProviderStreamResponse, TokenUsage };
-pub use adapters::*;
+pub use response::{ProviderResponseType, ProviderResponse, ProviderStreamResponse, TokenUsage };
diff --git a/crates/hermesllm/src/providers/request.rs b/crates/hermesllm/src/providers/request.rs
index 1eb39416..adde81f4 100644
--- a/crates/hermesllm/src/providers/request.rs
+++ b/crates/hermesllm/src/providers/request.rs
@@ -1,41 +1,17 @@
-
 use crate::apis::openai::ChatCompletionsRequest;
-use super::{ProviderId, get_provider_config, AdapterType};
+use crate::apis::anthropic::MessagesRequest;
+use crate::clients::endpoints::SupportedAPIs;
+
+use serde_json::Value;
 use std::error::Error;
 use std::fmt;
+use std::collections::HashMap;
+#[derive(Clone)]
 pub enum ProviderRequestType {
     ChatCompletionsRequest(ChatCompletionsRequest),
-    //MessagesRequest(MessagesRequest),
+    MessagesRequest(MessagesRequest),
     //add more request types here
 }
-
-impl TryFrom<&[u8]> for ProviderRequestType {
-    type Error = std::io::Error;
-
-    // if passing bytes without provider id we assume the request is in OpenAI format
-    fn try_from(bytes: &[u8]) -> Result<Self, Self::Error> {
-        let chat_completion_request: ChatCompletionsRequest = ChatCompletionsRequest::try_from(bytes)
-            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
-        Ok(ProviderRequestType::ChatCompletionsRequest(chat_completion_request))
-    }
-}
-
-impl TryFrom<(&[u8], &ProviderId)> for ProviderRequestType {
-    type Error = std::io::Error;
-
-    fn try_from((bytes, provider_id): (&[u8], &ProviderId)) -> Result<Self, Self::Error> {
-        let config = get_provider_config(provider_id);
-        match config.adapter_type {
-            AdapterType::OpenAICompatible => {
-                let chat_completion_request: ChatCompletionsRequest = ChatCompletionsRequest::try_from(bytes)
-                    .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
-                Ok(ProviderRequestType::ChatCompletionsRequest(chat_completion_request))
-            }
-            // Future: handle other adapter types like Claude
-        }
-    }
-}
-
 pub trait ProviderRequest: Send + Sync {
     /// Extract the model name from the request
     fn model(&self) -> &str;
@@ -54,46 +30,129 @@ pub trait ProviderRequest: Send + Sync {
 
     /// Convert the request to bytes for transmission
     fn to_bytes(&self) -> Result<Vec<u8>, ProviderRequestError>;
+
+    fn metadata(&self) -> &Option<HashMap<String, Value>>;
+
+    /// Remove a metadata key from the request and return true if the key was present
+    fn remove_metadata_key(&mut self, key: &str) -> bool;
 }
 
 impl ProviderRequest for ProviderRequestType {
     fn model(&self) -> &str {
         match self {
             Self::ChatCompletionsRequest(r) => r.model(),
+            Self::MessagesRequest(r) => r.model(),
         }
     }
 
     fn set_model(&mut self, model: String) {
         match self {
             Self::ChatCompletionsRequest(r) => r.set_model(model),
+            Self::MessagesRequest(r) => r.set_model(model),
         }
     }
 
     fn is_streaming(&self) -> bool {
         match self {
             Self::ChatCompletionsRequest(r) => r.is_streaming(),
+            Self::MessagesRequest(r) => r.is_streaming(),
         }
     }
 
     fn extract_messages_text(&self) -> String {
         match self {
             Self::ChatCompletionsRequest(r) => r.extract_messages_text(),
+            Self::MessagesRequest(r) => r.extract_messages_text(),
         }
     }
 
     fn get_recent_user_message(&self) -> Option<String> {
         match self {
             Self::ChatCompletionsRequest(r) => r.get_recent_user_message(),
+            Self::MessagesRequest(r) => r.get_recent_user_message(),
         }
     }
 
     fn to_bytes(&self) -> Result<Vec<u8>, ProviderRequestError> {
         match self {
             Self::ChatCompletionsRequest(r) => r.to_bytes(),
+            Self::MessagesRequest(r) => r.to_bytes(),
+        }
+    }
+
+    fn metadata(&self) -> &Option<HashMap<String, Value>> {
+        match self {
+            Self::ChatCompletionsRequest(r) => r.metadata(),
+            Self::MessagesRequest(r) => r.metadata(),
+        }
+    }
+
+    fn remove_metadata_key(&mut self, key: &str) -> bool {
+        match self {
+            Self::ChatCompletionsRequest(r) => r.remove_metadata_key(key),
+            Self::MessagesRequest(r) => r.remove_metadata_key(key),
         }
     }
 }
 
+/// Parse the client API from a byte slice.
+impl TryFrom<(&[u8], &SupportedAPIs)> for ProviderRequestType {
+    type Error = std::io::Error;
+
+    fn try_from((bytes, client_api): (&[u8], &SupportedAPIs)) -> Result<Self, Self::Error> {
+        // Use SupportedApi to determine the appropriate request type
+        match client_api {
+            SupportedAPIs::OpenAIChatCompletions(_) => {
+                let chat_completion_request: ChatCompletionsRequest = ChatCompletionsRequest::try_from(bytes)
+                    .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
+                Ok(ProviderRequestType::ChatCompletionsRequest(chat_completion_request))
+                }
+                SupportedAPIs::AnthropicMessagesAPI(_) => {
+                    let messages_request: MessagesRequest = MessagesRequest::try_from(bytes)
+                        .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
+                    Ok(ProviderRequestType::MessagesRequest(messages_request))
+                }
+        }
+    }
+}
+
+/// Conversion from one ProviderRequestType to a different ProviderRequestType (SupportedAPIs)
+impl TryFrom<(ProviderRequestType, &SupportedAPIs)> for ProviderRequestType {
+    type Error = ProviderRequestError;
+
+    fn try_from((request, upstream_api): (ProviderRequestType, &SupportedAPIs)) -> Result<Self, Self::Error> {
+        match (request, upstream_api) {
+            // Same API - no conversion needed, just clone the reference
+            (ProviderRequestType::ChatCompletionsRequest(chat_req), SupportedAPIs::OpenAIChatCompletions(_)) => {
+                Ok(ProviderRequestType::ChatCompletionsRequest(chat_req))
+            }
+            (ProviderRequestType::MessagesRequest(messages_req), SupportedAPIs::AnthropicMessagesAPI(_)) => {
+                Ok(ProviderRequestType::MessagesRequest(messages_req))
+            }
+
+            // Cross-API conversion - cloning is necessary for transformation
+            (ProviderRequestType::ChatCompletionsRequest(chat_req), SupportedAPIs::AnthropicMessagesAPI(_)) => {
+                let messages_req = MessagesRequest::try_from(chat_req)
+                    .map_err(|e| ProviderRequestError {
+                        message: format!("Failed to convert ChatCompletionsRequest to MessagesRequest: {}", e),
+                        source: Some(Box::new(e))
+                    })?;
+                Ok(ProviderRequestType::MessagesRequest(messages_req))
+            }
+
+            (ProviderRequestType::MessagesRequest(messages_req), SupportedAPIs::OpenAIChatCompletions(_)) => {
+                let chat_req = ChatCompletionsRequest::try_from(messages_req)
+                    .map_err(|e| ProviderRequestError {
+                        message: format!("Failed to convert MessagesRequest to ChatCompletionsRequest: {}", e),
+                        source: Some(Box::new(e))
+                    })?;
+                Ok(ProviderRequestType::ChatCompletionsRequest(chat_req))
+            }
+        }
+    }
+}
+
+
 
 /// Error types for provider operations
 #[derive(Debug)]
@@ -113,3 +172,194 @@ impl Error for ProviderRequestError {
         self.source.as_ref().map(|e| e.as_ref() as &(dyn Error + 'static))
     }
 }
+
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::clients::endpoints::SupportedAPIs;
+    use crate::apis::anthropic::AnthropicApi::Messages;
+    use crate::apis::openai::OpenAIApi::ChatCompletions;
+    use crate::apis::anthropic::MessagesRequest as AnthropicMessagesRequest;
+    use crate::apis::openai::{ChatCompletionsRequest};
+    use crate::clients::transformer::ExtractText;
+    use serde_json::json;
+
+    #[test]
+    fn test_openai_request_from_bytes() {
+        let req = json!({
+            "model": "gpt-4",
+            "messages": [
+                {"role": "system", "content": "You are a helpful assistant"},
+                {"role": "user", "content": "Hello!"}
+            ]
+        });
+        let bytes = serde_json::to_vec(&req).unwrap();
+        let api = SupportedAPIs::OpenAIChatCompletions(ChatCompletions);
+        let result = ProviderRequestType::try_from((bytes.as_slice(), &api));
+        assert!(result.is_ok());
+        match result.unwrap() {
+            ProviderRequestType::ChatCompletionsRequest(r) => {
+                assert_eq!(r.model, "gpt-4");
+                assert_eq!(r.messages.len(), 2);
+            },
+            _ => panic!("Expected ChatCompletionsRequest variant"),
+        }
+    }
+
+    #[test]
+    fn test_anthropic_request_from_bytes_with_endpoint() {
+        let req = json!({
+            "model": "claude-3-sonnet",
+            "system": "You are a helpful assistant",
+            "max_tokens": 100,
+            "messages": [
+                {"role": "user", "content": "Hello!"}
+            ]
+        });
+        let bytes = serde_json::to_vec(&req).unwrap();
+        let endpoint = SupportedAPIs::AnthropicMessagesAPI(Messages);
+        let result = ProviderRequestType::try_from((bytes.as_slice(), &endpoint));
+        assert!(result.is_ok());
+        match result.unwrap() {
+            ProviderRequestType::MessagesRequest(r) => {
+                assert_eq!(r.model, "claude-3-sonnet");
+                assert_eq!(r.messages.len(), 1);
+            },
+            _ => panic!("Expected MessagesRequest variant"),
+        }
+    }
+
+    #[test]
+    fn test_openai_request_from_bytes_with_endpoint() {
+        let req = json!({
+            "model": "gpt-4",
+            "messages": [
+                {"role": "system", "content": "You are a helpful assistant"},
+                {"role": "user", "content": "Hello!"}
+            ]
+        });
+        let bytes = serde_json::to_vec(&req).unwrap();
+        let endpoint = SupportedAPIs::OpenAIChatCompletions(ChatCompletions);
+        let result = ProviderRequestType::try_from((bytes.as_slice(), &endpoint));
+        assert!(result.is_ok());
+        match result.unwrap() {
+            ProviderRequestType::ChatCompletionsRequest(r) => {
+                assert_eq!(r.model, "gpt-4");
+                assert_eq!(r.messages.len(), 2);
+            },
+            _ => panic!("Expected ChatCompletionsRequest variant"),
+        }
+    }
+
+    #[test]
+    fn test_anthropic_request_from_bytes_wrong_endpoint() {
+        let req = json!({
+            "model": "claude-3-sonnet",
+            "system": "You are a helpful assistant",
+            "messages": [
+                {"role": "user", "content": "Hello!"}
+            ]
+        });
+        let bytes = serde_json::to_vec(&req).unwrap();
+        // Intentionally use OpenAI endpoint for Anthropic payload
+        let endpoint = SupportedAPIs::OpenAIChatCompletions(ChatCompletions);
+        let result = ProviderRequestType::try_from((bytes.as_slice(), &endpoint));
+        // Should parse as ChatCompletionsRequest, not error
+        assert!(result.is_ok());
+        match result.unwrap() {
+            ProviderRequestType::ChatCompletionsRequest(r) => {
+                assert_eq!(r.model, "claude-3-sonnet");
+                assert_eq!(r.messages.len(), 1);
+            },
+            _ => panic!("Expected ChatCompletionsRequest variant"),
+        }
+    }
+
+    #[test]
+    fn test_v1_messages_to_v1_chat_completions_roundtrip() {
+        let anthropic_req = AnthropicMessagesRequest {
+            model: "claude-3-sonnet".to_string(),
+            system: Some(crate::apis::anthropic::MessagesSystemPrompt::Single("You are a helpful assistant".to_string())),
+            messages: vec![
+                crate::apis::anthropic::MessagesMessage {
+                    role: crate::apis::anthropic::MessagesRole::User,
+                    content: crate::apis::anthropic::MessagesMessageContent::Single("Hello!".to_string()),
+                }
+            ],
+            max_tokens: 128,
+            container: None,
+            mcp_servers: None,
+            service_tier: None,
+            thinking: None,
+            temperature: Some(0.7),
+            top_p: Some(1.0),
+            top_k: None,
+            stream: Some(false),
+            stop_sequences: Some(vec!["\n".to_string()]),
+            tools: None,
+            tool_choice: None,
+            metadata: None,
+        };
+
+        let openai_req = ChatCompletionsRequest::try_from(anthropic_req.clone()).expect("Anthropic->OpenAI conversion failed");
+        let anthropic_req2 = AnthropicMessagesRequest::try_from(openai_req).expect("OpenAI->Anthropic conversion failed");
+
+        assert_eq!(anthropic_req.model, anthropic_req2.model);
+        // Compare system prompt text if present
+        assert_eq!(
+            anthropic_req.system.as_ref().and_then(|s| match s { crate::apis::anthropic::MessagesSystemPrompt::Single(t) => Some(t), _ => None }),
+            anthropic_req2.system.as_ref().and_then(|s| match s { crate::apis::anthropic::MessagesSystemPrompt::Single(t) => Some(t), _ => None })
+        );
+        assert_eq!(anthropic_req.messages[0].role, anthropic_req2.messages[0].role);
+        // Compare message content text if present
+        assert_eq!(
+            anthropic_req.messages[0].content.extract_text(),
+            anthropic_req2.messages[0].content.extract_text()
+        );
+        assert_eq!(anthropic_req.max_tokens, anthropic_req2.max_tokens);
+    }
+
+        #[test]
+        fn test_v1_chat_completions_to_v1_messages_roundtrip() {
+            use crate::apis::anthropic::MessagesRequest as AnthropicMessagesRequest;
+            use crate::apis::openai::{ChatCompletionsRequest, Message, Role, MessageContent};
+
+            let openai_req = ChatCompletionsRequest {
+                model: "gpt-4".to_string(),
+                messages: vec![
+                    Message {
+                        role: Role::System,
+                        content: MessageContent::Text("You are a helpful assistant".to_string()),
+                        name: None,
+                        tool_calls: None,
+                        tool_call_id: None,
+                    },
+                    Message {
+                        role: Role::User,
+                        content: MessageContent::Text("Hello!".to_string()),
+                        name: None,
+                        tool_calls: None,
+                        tool_call_id: None,
+                    }
+                ],
+                temperature: Some(0.7),
+                top_p: Some(1.0),
+                max_tokens: Some(128),
+                stream: Some(false),
+                stop: Some(vec!["\n".to_string()]),
+                tools: None,
+                tool_choice: None,
+                parallel_tool_calls: None,
+                ..Default::default()
+            };
+
+            let anthropic_req = AnthropicMessagesRequest::try_from(openai_req.clone()).expect("OpenAI->Anthropic conversion failed");
+            let openai_req2 = ChatCompletionsRequest::try_from(anthropic_req).expect("Anthropic->OpenAI conversion failed");
+
+            assert_eq!(openai_req.model, openai_req2.model);
+            assert_eq!(openai_req.messages[0].role, openai_req2.messages[0].role);
+            assert_eq!(openai_req.messages[0].content.extract_text(), openai_req2.messages[0].content.extract_text());
+            assert_eq!(openai_req.max_tokens, openai_req2.max_tokens);
+        }
+}
diff --git a/crates/hermesllm/src/providers/response.rs b/crates/hermesllm/src/providers/response.rs
index faca303f..13cad0cd 100644
--- a/crates/hermesllm/src/providers/response.rs
+++ b/crates/hermesllm/src/providers/response.rs
@@ -1,76 +1,37 @@
+use crate::providers::id::ProviderId;
+use serde::{Serialize, Deserialize};
 use std::error::Error;
 use std::fmt;
+use std::convert::TryFrom;
+use std::str::FromStr;
 
 use crate::apis::openai::ChatCompletionsResponse;
-use crate::apis::OpenAISseIter;
-use crate::providers::id::ProviderId;
-use crate::providers::adapters::{get_provider_config, AdapterType};
+use crate::apis::openai::ChatCompletionsStreamResponse;
+use crate::apis::anthropic::MessagesStreamEvent;
+use crate::clients::endpoints::SupportedAPIs;
+use crate::apis::anthropic::MessagesResponse;
 
+/// Trait for token usage information
+pub trait TokenUsage {
+    fn completion_tokens(&self) -> usize;
+    fn prompt_tokens(&self) -> usize;
+    fn total_tokens(&self) -> usize;
+}
+
+#[derive(Serialize, Debug, Clone)]
+#[serde(untagged)]
 pub enum ProviderResponseType {
     ChatCompletionsResponse(ChatCompletionsResponse),
-    //MessagesResponse(MessagesResponse),
+    MessagesResponse(MessagesResponse),
 }
 
-pub enum ProviderStreamResponseIter {
-    ChatCompletionsStream(OpenAISseIter<std::vec::IntoIter<String>>),
-    //MessagesStream(AnthropicSseIter<std::vec::IntoIter<String>>),
+#[derive(Serialize, Debug, Clone)]
+#[serde(untagged)]
+pub enum ProviderStreamResponseType {
+    ChatCompletionsStreamResponse(ChatCompletionsStreamResponse),
+    MessagesStreamEvent(MessagesStreamEvent),
 }
 
-impl TryFrom<(&[u8], ProviderId)> for ProviderResponseType {
-    type Error = std::io::Error;
-
-    fn try_from((bytes, provider_id): (&[u8], ProviderId)) -> Result<Self, Self::Error> {
-        let config = get_provider_config(&provider_id);
-        match config.adapter_type {
-            AdapterType::OpenAICompatible => {
-                let chat_completions_response: ChatCompletionsResponse = ChatCompletionsResponse::try_from(bytes)
-                    .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
-                Ok(ProviderResponseType::ChatCompletionsResponse(chat_completions_response))
-            }
-            // Future: handle other adapter types like Claude
-        }
-    }
-}
-
-impl TryFrom<(&[u8], &ProviderId)> for ProviderStreamResponseIter {
-    type Error = Box<dyn std::error::Error + Send + Sync>;
-
-    fn try_from((bytes, provider_id): (&[u8], &ProviderId)) -> Result<Self, Self::Error> {
-        let config = get_provider_config(provider_id);
-
-        // Parse SSE (Server-Sent Events) streaming data - protocol layer
-        let s = std::str::from_utf8(bytes)?;
-        let lines: Vec<String> = s.lines().map(|line| line.to_string()).collect();
-
-        match config.adapter_type {
-            AdapterType::OpenAICompatible => {
-                // Delegate to OpenAI-specific iterator implementation
-                let sse_container = SseStreamIter::new(lines.into_iter());
-                let iter = crate::apis::openai::OpenAISseIter::new(sse_container);
-                Ok(ProviderStreamResponseIter::ChatCompletionsStream(iter))
-            }
-            // Future: AdapterType::Claude => {
-            //     let sse_container = SseStreamIter::new(lines.into_iter());
-            //     let iter = crate::apis::anthropic::AnthropicSseIter::new(sse_container);
-            //     Ok(ProviderStreamResponseIter::MessagesStream(iter))
-            // }
-        }
-    }
-}
-
-
-impl Iterator for ProviderStreamResponseIter {
-    type Item = Result<Box<dyn ProviderStreamResponse>, Box<dyn std::error::Error + Send + Sync>>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        match self {
-            ProviderStreamResponseIter::ChatCompletionsStream(iter) => iter.next(),
-            // Future: ProviderStreamResponseIter::MessagesStream(iter) => iter.next(),
-        }
-    }
-}
-
-
 pub trait ProviderResponse: Send + Sync {
     /// Get usage information if available - returns dynamic trait object
     fn usage(&self) -> Option<&dyn TokenUsage>;
@@ -81,6 +42,22 @@ pub trait ProviderResponse: Send + Sync {
     }
 }
 
+impl ProviderResponse for ProviderResponseType {
+    fn usage(&self) -> Option<&dyn TokenUsage> {
+        match self {
+            ProviderResponseType::ChatCompletionsResponse(resp) => resp.usage(),
+            ProviderResponseType::MessagesResponse(resp) => resp.usage(),
+        }
+    }
+
+    fn extract_usage_counts(&self) -> Option<(usize, usize, usize)> {
+        match self {
+            ProviderResponseType::ChatCompletionsResponse(resp) => resp.extract_usage_counts(),
+            ProviderResponseType::MessagesResponse(resp) => resp.extract_usage_counts(),
+        }
+    }
+}
+
 pub trait ProviderStreamResponse: Send + Sync {
     /// Get the content delta for this chunk
     fn content_delta(&self) -> Option<&str>;
@@ -90,16 +67,313 @@ pub trait ProviderStreamResponse: Send + Sync {
 
     /// Get role information if available
     fn role(&self) -> Option<&str>;
+
+    /// Get event type for SSE streaming (used by Anthropic)
+    fn event_type(&self) -> Option<&str>;
+}
+
+impl ProviderStreamResponse for ProviderStreamResponseType {
+    fn content_delta(&self) -> Option<&str> {
+        match self {
+            ProviderStreamResponseType::ChatCompletionsStreamResponse(resp) => resp.content_delta(),
+            ProviderStreamResponseType::MessagesStreamEvent(resp) => resp.content_delta(),
+        }
+    }
+
+    fn is_final(&self) -> bool {
+        match self {
+            ProviderStreamResponseType::ChatCompletionsStreamResponse(resp) => resp.is_final(),
+            ProviderStreamResponseType::MessagesStreamEvent(resp) => resp.is_final(),
+        }
+    }
+
+    fn role(&self) -> Option<&str> {
+        match self {
+            ProviderStreamResponseType::ChatCompletionsStreamResponse(resp) => resp.role(),
+            ProviderStreamResponseType::MessagesStreamEvent(resp) => resp.role(),
+        }
+    }
+
+    fn event_type(&self) -> Option<&str> {
+        match self {
+            ProviderStreamResponseType::ChatCompletionsStreamResponse(_resp) => None, // OpenAI doesn't use event types
+            ProviderStreamResponseType::MessagesStreamEvent(resp) => resp.event_type(),
+        }
+    }
+}
+
+// ============================================================================
+// SSE EVENT CONTAINER
+// ============================================================================
+
+/// Represents a single Server-Sent Event with the complete wire format
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct SseEvent {
+    #[serde(rename = "data")]
+    pub data: Option<String>,  // The JSON payload after "data: "
+
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub event: Option<String>,  // Optional event type (e.g., "message_start", "content_block_delta")
+
+    #[serde(skip_serializing, skip_deserializing)]
+    pub raw_line: String,  // The complete line as received including "data: " prefix and "\n\n"
+
+     #[serde(skip_serializing, skip_deserializing)]
+    pub sse_transform_buffer: String,  // The complete line as received including "data: " prefix and "\n\n"
+
+    #[serde(skip_serializing, skip_deserializing)]
+    pub provider_stream_response: Option<ProviderStreamResponseType>,  // Parsed provider stream response object
+}
+
+impl SseEvent {
+    /// Check if this event represents the end of the stream
+    pub fn is_done(&self) -> bool {
+        self.data == Some("[DONE]".into())
+    }
+
+    /// Check if this event should be skipped during processing
+    /// This includes ping messages and other provider-specific events that don't contain content
+    pub fn should_skip(&self) -> bool {
+        // Skip ping messages (commonly used by providers for connection keep-alive)
+        self.data == Some(r#"{"type": "ping"}"#.into())
+    }
+
+    /// Check if this is an event-only SSE event (no data payload)
+    pub fn is_event_only(&self) -> bool {
+        self.event.is_some() && self.data.is_none()
+    }
+
+    /// Get the parsed provider response if available
+    pub fn provider_response(&self) -> Result<&dyn ProviderStreamResponse, std::io::Error> {
+        self.provider_stream_response.as_ref()
+            .map(|resp| resp as &dyn ProviderStreamResponse)
+            .ok_or_else(|| {
+                std::io::Error::new(std::io::ErrorKind::NotFound, "Provider response not found")
+            })
+    }
+
+}
+
+impl FromStr for SseEvent {
+    type Err = SseParseError;
+
+    fn from_str(line: &str) -> Result<Self, Self::Err> {
+        if line.starts_with("data: ") {
+            let data: String = line[6..].to_string(); // Remove "data: " prefix
+            if data.is_empty() {
+                return Err(SseParseError {
+                    message: "Empty data field is not a valid SSE event".to_string(),
+                });
+            }
+            Ok(SseEvent {
+                data: Some(data),
+                event: None,
+                raw_line: line.to_string(),
+                sse_transform_buffer: line.to_string(),
+                provider_stream_response: None,
+            })
+        } else if line.starts_with("event: ") { //used by Anthropic
+            let event_type = line[7..].to_string();
+            if event_type.is_empty() {
+                return Err(SseParseError {
+                    message: "Empty event field is not a valid SSE event".to_string(),
+                });
+            }
+            Ok(SseEvent {
+                data: None,
+                event: Some(event_type),
+                raw_line: line.to_string(),
+                sse_transform_buffer: line.to_string(),
+                provider_stream_response: None,
+            })
+        } else {
+            Err(SseParseError {
+                message: format!("Line does not start with 'data: ' or 'event: ': {}", line),
+            })
+        }
+    }
+}
+
+impl fmt::Display for SseEvent {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{}", self.sse_transform_buffer)
+    }
+}
+
+// Into implementation to convert SseEvent to bytes for response buffer
+impl Into<Vec<u8>> for SseEvent {
+    fn into(self) -> Vec<u8> {
+        format!("{}\n\n", self.sse_transform_buffer).into_bytes()
+    }
 }
 
 
+// --- Response transformation logic for client API compatibility ---
+impl TryFrom<(&[u8], &SupportedAPIs, &ProviderId)> for ProviderResponseType {
+    type Error = std::io::Error;
+
+    fn try_from((bytes, client_api, provider_id): (&[u8], &SupportedAPIs, &ProviderId)) -> Result<Self, Self::Error> {
+        let upstream_api = provider_id.compatible_api_for_client(client_api);
+        match (&upstream_api, client_api) {
+            (SupportedAPIs::OpenAIChatCompletions(_), SupportedAPIs::OpenAIChatCompletions(_)) => {
+                let resp: ChatCompletionsResponse = ChatCompletionsResponse::try_from(bytes)
+                    .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
+                Ok(ProviderResponseType::ChatCompletionsResponse(resp))
+            }
+            (SupportedAPIs::AnthropicMessagesAPI(_), SupportedAPIs::AnthropicMessagesAPI(_)) => {
+                let resp: MessagesResponse = serde_json::from_slice(bytes)
+                    .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
+                Ok(ProviderResponseType::MessagesResponse(resp))
+            }
+            (SupportedAPIs::AnthropicMessagesAPI(_), SupportedAPIs::OpenAIChatCompletions(_)) => {
+                let anthropic_resp: MessagesResponse = serde_json::from_slice(bytes)
+                    .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
+
+                // Transform to OpenAI ChatCompletions format using the transformer
+                let chat_resp: ChatCompletionsResponse = anthropic_resp.try_into()
+                    .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, format!("Transformation error: {}", e)))?;
+                Ok(ProviderResponseType::ChatCompletionsResponse(chat_resp))
+            }
+            (SupportedAPIs::OpenAIChatCompletions(_), SupportedAPIs::AnthropicMessagesAPI(_)) => {
+                let openai_resp: ChatCompletionsResponse = ChatCompletionsResponse::try_from(bytes)
+                    .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
+
+                // Transform to Anthropic Messages format using the transformer
+                let messages_resp: MessagesResponse = openai_resp.try_into()
+                    .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, format!("Transformation error: {}", e)))?;
+                Ok(ProviderResponseType::MessagesResponse(messages_resp))
+            }
+        }
+    }
+}
+
+// Stream response transformation logic for client API compatibility
+impl TryFrom<(&[u8], &SupportedAPIs, &SupportedAPIs)> for ProviderStreamResponseType {
+    type Error = Box<dyn std::error::Error + Send + Sync>;
+
+    fn try_from((bytes, client_api, upstream_api): (&[u8], &SupportedAPIs, &SupportedAPIs)) -> Result<Self, Self::Error> {
+        match (upstream_api, client_api) {
+            (SupportedAPIs::OpenAIChatCompletions(_), SupportedAPIs::OpenAIChatCompletions(_)) => {
+                let resp: crate::apis::openai::ChatCompletionsStreamResponse = serde_json::from_slice(bytes)?;
+                Ok(ProviderStreamResponseType::ChatCompletionsStreamResponse(resp))
+            }
+            (SupportedAPIs::AnthropicMessagesAPI(_), SupportedAPIs::AnthropicMessagesAPI(_)) => {
+                let resp: crate::apis::anthropic::MessagesStreamEvent = serde_json::from_slice(bytes)?;
+                Ok(ProviderStreamResponseType::MessagesStreamEvent(resp))
+            }
+            (SupportedAPIs::AnthropicMessagesAPI(_), SupportedAPIs::OpenAIChatCompletions(_)) => {
+                let anthropic_resp: crate::apis::anthropic::MessagesStreamEvent = serde_json::from_slice(bytes)?;
+
+                // Transform to OpenAI ChatCompletions stream format using the transformer
+                let chat_resp: crate::apis::openai::ChatCompletionsStreamResponse = anthropic_resp.try_into()?;
+                Ok(ProviderStreamResponseType::ChatCompletionsStreamResponse(chat_resp))
+            }
+            (SupportedAPIs::OpenAIChatCompletions(_), SupportedAPIs::AnthropicMessagesAPI(_)) => {
+                let openai_resp: crate::apis::openai::ChatCompletionsStreamResponse = serde_json::from_slice(bytes)?;
+
+                // Transform to Anthropic Messages stream format using the transformer
+                let messages_resp: crate::apis::anthropic::MessagesStreamEvent = openai_resp.try_into()?;
+                Ok(ProviderStreamResponseType::MessagesStreamEvent(messages_resp))
+            }
+        }
+    }
+}
+
+// TryFrom implementation to convert raw bytes to SseEvent with parsed provider response
+impl TryFrom<(SseEvent, &SupportedAPIs, &SupportedAPIs)> for SseEvent {
+    type Error = Box<dyn std::error::Error + Send + Sync>;
+
+    fn try_from((sse_event, client_api, upstream_api): (SseEvent, &SupportedAPIs, &SupportedAPIs)) -> Result<Self, Self::Error> {
+        // Create a new transformed event based on the original
+        let mut transformed_event = sse_event;
+
+        // If not [DONE] and has data, parse the data as a provider stream response (business logic layer)
+        if !transformed_event.is_done() && transformed_event.data.is_some() {
+            let data_str = transformed_event.data.as_ref().unwrap();
+            let data_bytes = data_str.as_bytes();
+            let transformed_response = ProviderStreamResponseType::try_from((data_bytes, client_api, upstream_api))?;
+            let transformed_json = serde_json::to_string(&transformed_response)?;
+            transformed_event.sse_transform_buffer = format!("data: {}\n\n", transformed_json);
+            transformed_event.provider_stream_response = Some(transformed_response);
+        }
+
+        match (client_api, upstream_api) {
+            (SupportedAPIs::OpenAIChatCompletions(_), SupportedAPIs::OpenAIChatCompletions(_)) => {
+                // No transformation needed
+            }
+            (SupportedAPIs::AnthropicMessagesAPI(_), SupportedAPIs::AnthropicMessagesAPI(_)) => {
+                // No transformation needed
+            }
+            (SupportedAPIs::AnthropicMessagesAPI(_), SupportedAPIs::OpenAIChatCompletions(_)) => {
+                if let Some(provider_response) = &transformed_event.provider_stream_response {
+                    if let Some(event_type) = provider_response.event_type() {
+                        // This ensures the required Anthropic sequence: MessageStart → ContentBlockStart → ContentBlockDelta(s)
+                        if event_type == "message_start" {
+                            let content_block_start_json = serde_json::json!({
+                                "type": "content_block_start",
+                                "index": 0,
+                                "content_block": {
+                                    "type": "text",
+                                    "text": ""
+                                }
+                            });
+                            // Format as proper SSE: MessageStart first, then ContentBlockStart
+                            transformed_event.sse_transform_buffer = format!(
+                                "event: {}\n{}\nevent: content_block_start\ndata: {}\n\n",
+                                event_type,
+                                transformed_event.sse_transform_buffer,
+                                content_block_start_json,
+                            );
+                        } else if event_type == "message_delta" {
+                            let content_block_stop_json = serde_json::json!({
+                                "type": "content_block_stop",
+                                "index": 0
+                            });
+                            // Format as proper SSE: ContentBlockStop first, then MessageDelta
+                            transformed_event.sse_transform_buffer = format!(
+                                "event: content_block_stop\ndata: {}\n\nevent: {}\n{}",
+                                content_block_stop_json,
+                                event_type,
+                                transformed_event.sse_transform_buffer
+                            );
+                        } else {
+                            transformed_event.sse_transform_buffer = format!("event: {}\n{}", event_type, transformed_event.sse_transform_buffer);
+                        }
+                    }
+                    // If event_type is None, we just keep the data line as-is without an event line
+                    // This handles cases where the transformation might not produce a valid event type
+                }
+            }
+            (SupportedAPIs::OpenAIChatCompletions(_), SupportedAPIs::AnthropicMessagesAPI(_)) => {
+                if transformed_event.is_event_only() && transformed_event.event.is_some() {
+                    transformed_event.sse_transform_buffer = format!("\n"); // suppress the event upstream for OpenAI
+                }
+            }
+        }
+
+        Ok(transformed_event)
+    }
+}
+
+#[derive(Debug)]
+pub struct SseParseError {
+    pub message: String,
+}
+
+impl fmt::Display for SseParseError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "SSE parse error: {}", self.message)
+    }
+}
+
+impl Error for SseParseError {}
 
 // ============================================================================
 // GENERIC SSE STREAMING ITERATOR (Container Only)
 // ============================================================================
 
 /// Generic SSE (Server-Sent Events) streaming iterator container
-/// This is just a simple wrapper - actual Iterator implementation is delegated to provider-specific modules
+/// Parses raw SSE lines into SseEvent objects
 pub struct SseStreamIter<I>
 where
     I: Iterator,
@@ -118,35 +392,45 @@ where
     }
 }
 
+// TryFrom implementation to parse bytes into SseStreamIter
+impl TryFrom<&[u8]> for SseStreamIter<std::vec::IntoIter<String>> {
+    type Error = Box<dyn std::error::Error + Send + Sync>;
 
-impl ProviderResponse for ProviderResponseType {
-    fn usage(&self) -> Option<&dyn TokenUsage> {
-        match self {
-            ProviderResponseType::ChatCompletionsResponse(resp) => resp.usage(),
-            // Future: ProviderResponseType::MessagesResponse(resp) => resp.usage(),
-        }
-    }
-
-    fn extract_usage_counts(&self) -> Option<(usize, usize, usize)> {
-        match self {
-            ProviderResponseType::ChatCompletionsResponse(resp) => resp.extract_usage_counts(),
-            // Future: ProviderResponseType::MessagesResponse(resp) => resp.extract_usage_counts(),
-        }
+    fn try_from(bytes: &[u8]) -> Result<Self, Self::Error> {
+        let s = std::str::from_utf8(bytes)?;
+        let lines: Vec<String> = s.lines().map(|line| line.to_string()).collect();
+        Ok(SseStreamIter::new(lines.into_iter()))
     }
 }
 
-// Implement Send + Sync for the enum to match the original trait requirements
-unsafe impl Send for ProviderStreamResponseIter {}
-unsafe impl Sync for ProviderStreamResponseIter {}
+impl<I> Iterator for SseStreamIter<I>
+where
+    I: Iterator,
+    I::Item: AsRef<str>,
+{
+    type Item = SseEvent;
 
-/// Trait for token usage information
-pub trait TokenUsage {
-    fn completion_tokens(&self) -> usize;
-    fn prompt_tokens(&self) -> usize;
-    fn total_tokens(&self) -> usize;
+    fn next(&mut self) -> Option<Self::Item> {
+        for line in &mut self.lines {
+            let line_str = line.as_ref();
+
+            // Try to parse as either data: or event: line
+            if let Ok(event) = line_str.parse::<SseEvent>() {
+                // For data: lines, check if this is the [DONE] marker - if so, end the stream
+                if event.data.is_some() && event.is_done() {
+                    return None;
+                }
+                // For data: lines, skip events that should be filtered at the transport layer
+                if event.data.is_some() && event.should_skip() {
+                    continue;
+                }
+                return Some(event);
+            }
+        }
+        None
+    }
 }
 
-
 #[derive(Debug)]
 pub struct ProviderResponseError {
     pub message: String,
@@ -165,3 +449,331 @@ impl Error for ProviderResponseError {
         self.source.as_ref().map(|e| e.as_ref() as &(dyn Error + 'static))
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::clients::endpoints::SupportedAPIs;
+    use crate::providers::id::ProviderId;
+    use crate::apis::openai::OpenAIApi;
+    use crate::apis::anthropic::AnthropicApi;
+    use serde_json::json;
+
+    #[test]
+    fn test_openai_response_from_bytes() {
+        let resp = json!({
+            "id": "chatcmpl-123",
+            "object": "chat.completion",
+            "created": 1234567890,
+            "model": "gpt-4",
+            "choices": [
+                {
+                    "index": 0,
+                    "message": { "role": "assistant", "content": "Hello!" },
+                    "finish_reason": "stop"
+                }
+            ],
+            "usage": { "prompt_tokens": 5, "completion_tokens": 7, "total_tokens": 12 },
+            "system_fingerprint": null
+        });
+        let bytes = serde_json::to_vec(&resp).unwrap();
+        let result = ProviderResponseType::try_from((bytes.as_slice(), &SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions), &ProviderId::OpenAI));
+        assert!(result.is_ok());
+        match result.unwrap() {
+            ProviderResponseType::ChatCompletionsResponse(r) => {
+                assert_eq!(r.model, "gpt-4");
+                assert_eq!(r.choices.len(), 1);
+            },
+            _ => panic!("Expected ChatCompletionsResponse variant"),
+        }
+    }
+
+    #[test]
+    fn test_anthropic_response_from_bytes() {
+        let resp = json!({
+            "id": "msg_01ABC123",
+            "type": "message",
+            "role": "assistant",
+            "content": [
+                { "type": "text", "text": "Hello! How can I help you today?" }
+            ],
+            "model": "claude-3-sonnet-20240229",
+            "stop_reason": "end_turn",
+            "usage": { "input_tokens": 10, "output_tokens": 25, "cache_creation_input_tokens": 5, "cache_read_input_tokens": 3 }
+        });
+        let bytes = serde_json::to_vec(&resp).unwrap();
+        let result = ProviderResponseType::try_from((bytes.as_slice(), &SupportedAPIs::AnthropicMessagesAPI(AnthropicApi::Messages), &ProviderId::Anthropic));
+        assert!(result.is_ok());
+        match result.unwrap() {
+            ProviderResponseType::MessagesResponse(r) => {
+                assert_eq!(r.model, "claude-3-sonnet-20240229");
+                assert_eq!(r.content.len(), 1);
+            },
+            _ => panic!("Expected MessagesResponse variant"),
+        }
+    }
+
+    #[test]
+    fn test_anthropic_response_from_bytes_with_openai_provider() {
+        // OpenAI provider receives OpenAI response but client expects Anthropic format
+        // Upstream API = OpenAI, Client API = Anthropic -> parse OpenAI, convert to Anthropic
+        let resp = json!({
+            "id": "chatcmpl-123",
+            "object": "chat.completion",
+            "created": 1234567890,
+            "model": "gpt-4",
+            "choices": [
+                {
+                    "index": 0,
+                    "message": { "role": "assistant", "content": "Hello! How can I help you today?" },
+                    "finish_reason": "stop"
+                }
+            ],
+            "usage": { "prompt_tokens": 10, "completion_tokens": 25, "total_tokens": 35 }
+        });
+        let bytes = serde_json::to_vec(&resp).unwrap();
+        let result = ProviderResponseType::try_from((bytes.as_slice(), &SupportedAPIs::AnthropicMessagesAPI(AnthropicApi::Messages), &ProviderId::OpenAI));
+        assert!(result.is_ok());
+        match result.unwrap() {
+            ProviderResponseType::MessagesResponse(r) => {
+                assert_eq!(r.model, "gpt-4");
+                assert_eq!(r.usage.input_tokens, 10);
+                assert_eq!(r.usage.output_tokens, 25);
+            },
+            _ => panic!("Expected MessagesResponse variant"),
+        }
+    }
+
+    #[test]
+    fn test_openai_response_from_bytes_with_claude_provider() {
+        // Claude provider using OpenAI-compatible API returns OpenAI format response
+        // Client API = OpenAI, Provider = Anthropic -> Anthropic returns OpenAI format via their compatible API
+        let resp = json!({
+            "id": "chatcmpl-01ABC123",
+            "object": "chat.completion",
+            "created": 1677652288,
+            "model": "claude-3-sonnet-20240229",
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "content": "Hello! How can I help you today?"
+                    },
+                    "finish_reason": "stop"
+                }
+            ],
+            "usage": {
+                "prompt_tokens": 10,
+                "completion_tokens": 25,
+                "total_tokens": 35
+            }
+        });
+        let bytes = serde_json::to_vec(&resp).unwrap();
+        let result = ProviderResponseType::try_from((bytes.as_slice(), &SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions), &ProviderId::Anthropic));
+        assert!(result.is_ok());
+        match result.unwrap() {
+            ProviderResponseType::ChatCompletionsResponse(r) => {
+                assert_eq!(r.model, "claude-3-sonnet-20240229");
+                assert_eq!(r.usage.prompt_tokens, 10);
+                assert_eq!(r.usage.completion_tokens, 25);
+            },
+            _ => panic!("Expected ChatCompletionsResponse variant"),
+        }
+    }
+
+    #[test]
+    fn test_sse_event_parsing() {
+        // Test valid SSE data line
+        let line = "data: {\"id\":\"test\",\"object\":\"chat.completion.chunk\"}\n\n";
+        let event: Result<SseEvent, _> = line.parse();
+        assert!(event.is_ok());
+        let event = event.unwrap();
+        assert_eq!(event.data, Some("{\"id\":\"test\",\"object\":\"chat.completion.chunk\"}\n\n".to_string()));
+
+        // Test conversion back to line using Display trait
+        let wire_format = event.to_string();
+        assert_eq!(wire_format, "data: {\"id\":\"test\",\"object\":\"chat.completion.chunk\"}\n\n");
+
+        // Test [DONE] marker - should be valid SSE event
+        let done_line = "data: [DONE]";
+        let done_result: Result<SseEvent, _> = done_line.parse();
+        assert!(done_result.is_ok());
+        let done_event = done_result.unwrap();
+        assert_eq!(done_event.data, Some("[DONE]".to_string()));
+        assert!(done_event.is_done()); // Test the helper method
+
+        // Test non-DONE event
+        assert!(!event.is_done());
+
+        // Test empty data - should return error
+        let empty_line = "data: ";
+        let empty_result: Result<SseEvent, _> = empty_line.parse();
+        assert!(empty_result.is_err());
+
+        // Test non-data line - should return error
+        let comment_line = ": this is a comment";
+        let comment_result: Result<SseEvent, _> = comment_line.parse();
+        assert!(comment_result.is_err());
+    }
+
+    #[test]
+    fn test_sse_event_serde() {
+        // Test serialization and deserialization with serde
+        let event = SseEvent {
+            data: Some(r#"{"id":"test","object":"chat.completion.chunk"}"#.to_string()),
+            event: None,
+            raw_line: r#"data: {"id":"test","object":"chat.completion.chunk"}
+
+        "#.to_string(),
+            sse_transform_buffer: r#"data: {"id":"test","object":"chat.completion.chunk"}
+
+        "#.to_string(),
+            provider_stream_response: None,
+        };
+
+        // Test JSON serialization - raw_line should be skipped
+        let json = serde_json::to_string(&event).unwrap();
+        assert!(json.contains("test"));
+        assert!(json.contains("chat.completion.chunk"));
+        assert!(!json.contains("raw_line")); // Should be excluded from serialization
+
+        // Test JSON deserialization
+        let deserialized: SseEvent = serde_json::from_str(&json).unwrap();
+        assert_eq!(deserialized.data, event.data);
+        assert_eq!(deserialized.raw_line, ""); // Should be empty since it's skipped
+
+        // Test round trip for data field only
+        assert_eq!(event.data, deserialized.data);
+    }
+
+    #[test]
+    fn test_sse_event_should_skip() {
+        // Test ping message should be skipped
+        let ping_event = SseEvent {
+            data: Some(r#"{"type": "ping"}"#.to_string()),
+            event: None,
+            raw_line: r#"data: {"type": "ping"}"#.to_string(),
+            sse_transform_buffer: r#"data: {"type": "ping"}"#.to_string(),
+            provider_stream_response: None,
+        };
+        assert!(ping_event.should_skip());
+        assert!(!ping_event.is_done());
+
+        // Test normal event should not be skipped
+        let normal_event = SseEvent {
+            data: Some(r#"{"id": "test", "object": "chat.completion.chunk"}"#.to_string()),
+            event: Some("content_block_delta".to_string()),
+            raw_line: r#"data: {"id": "test", "object": "chat.completion.chunk"}"#.to_string(),
+            sse_transform_buffer: r#"data: {"id": "test", "object": "chat.completion.chunk"}"#.to_string(),
+            provider_stream_response: None,
+        };
+        assert!(!normal_event.should_skip());
+        assert!(!normal_event.is_done());
+
+        // Test [DONE] event should not be skipped (but is handled separately)
+        let done_event = SseEvent {
+            data: Some("[DONE]".to_string()),
+            event: None,
+            raw_line: "data: [DONE]".to_string(),
+            sse_transform_buffer: "data: [DONE]".to_string(),
+            provider_stream_response: None,
+        };
+        assert!(!done_event.should_skip());
+        assert!(done_event.is_done());
+    }
+
+    #[test]
+    fn test_sse_stream_iter_filters_ping_messages() {
+        // Create test data with ping messages mixed in
+        let test_lines = vec![
+            "data: {\"id\": \"msg1\", \"object\": \"chat.completion.chunk\"}".to_string(),
+            "data: {\"type\": \"ping\"}".to_string(), // This should be filtered out
+            "data: {\"id\": \"msg2\", \"object\": \"chat.completion.chunk\"}".to_string(),
+            "data: {\"type\": \"ping\"}".to_string(), // This should be filtered out
+            "data: [DONE]".to_string(), // This should end the stream
+        ];
+
+        let mut iter = SseStreamIter::new(test_lines.into_iter());
+
+        // First event should be msg1 (ping filtered out)
+        let event1 = iter.next().unwrap();
+        assert!(event1.data.as_ref().unwrap().contains("msg1"));
+        assert!(!event1.should_skip());
+
+        // Second event should be msg2 (ping filtered out)
+        let event2 = iter.next().unwrap();
+        assert!(event2.data.as_ref().unwrap().contains("msg2"));
+        assert!(!event2.should_skip());
+
+        // Iterator should end at [DONE] (no more events)
+        assert!(iter.next().is_none());
+    }
+
+    #[test]
+    fn test_sse_stream_iter_handles_anthropic_events() {
+        // Create test data with Anthropic-style event/data pairs
+        let test_lines = vec![
+            "event: message_start".to_string(),
+            "data: {\"type\":\"message_start\",\"message\":{\"id\":\"msg_123\"}}".to_string(),
+            "event: content_block_delta".to_string(),
+            "data: {\"type\":\"content_block_delta\",\"delta\":{\"text\":\"Hello\"}}".to_string(),
+            "data: [DONE]".to_string(),
+        ];
+
+        let mut iter = SseStreamIter::new(test_lines.into_iter());
+
+        // First event should be the event: line
+        let event1 = iter.next().unwrap();
+        assert!(event1.is_event_only());
+        assert_eq!(event1.event, Some("message_start".to_string()));
+        assert_eq!(event1.data, None);
+
+        // Second event should be the data: line
+        let event2 = iter.next().unwrap();
+        assert!(!event2.is_event_only());
+        assert_eq!(event2.event, None);
+        assert!(event2.data.as_ref().unwrap().contains("message_start"));
+
+        // Third event should be another event: line
+        let event3 = iter.next().unwrap();
+        assert!(event3.is_event_only());
+        assert_eq!(event3.event, Some("content_block_delta".to_string()));
+
+        // Fourth event should be the content delta data
+        let event4 = iter.next().unwrap();
+        assert!(!event4.is_event_only());
+        assert!(event4.data.as_ref().unwrap().contains("Hello"));
+
+        // Iterator should end at [DONE]
+        assert!(iter.next().is_none());
+    }
+
+    #[test]
+    fn test_provider_stream_response_event_type() {
+        use crate::apis::anthropic::{MessagesStreamEvent, MessagesContentDelta};
+        use crate::apis::openai::ChatCompletionsStreamResponse;
+
+        // Test Anthropic event type
+        let anthropic_event = MessagesStreamEvent::ContentBlockDelta {
+            index: 0,
+            delta: MessagesContentDelta::TextDelta { text: "Hello".to_string() },
+        };
+        let provider_type = ProviderStreamResponseType::MessagesStreamEvent(anthropic_event);
+        assert_eq!(provider_type.event_type(), Some("content_block_delta"));
+
+        // Test OpenAI event type (should be None)
+        let openai_event = ChatCompletionsStreamResponse {
+            id: "test".to_string(),
+            object: "chat.completion.chunk".to_string(),
+            created: 123456789,
+            model: "gpt-4".to_string(),
+            choices: vec![],
+            usage: None,
+            system_fingerprint: None,
+            service_tier: None,
+        };
+        let provider_type = ProviderStreamResponseType::ChatCompletionsStreamResponse(openai_event);
+        assert_eq!(provider_type.event_type(), None);
+    }
+}
diff --git a/crates/llm_gateway/src/filter_context.rs b/crates/llm_gateway/src/filter_context.rs
index fc31355a..258a1a1c 100644
--- a/crates/llm_gateway/src/filter_context.rs
+++ b/crates/llm_gateway/src/filter_context.rs
@@ -89,7 +89,6 @@ impl RootContext for FilterContext {
         );
 
         Some(Box::new(StreamContext::new(
-            context_id,
             Rc::clone(&self.metrics),
             Rc::clone(
                 self.llm_providers
diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs
index 6b2c5f15..da86296d 100644
--- a/crates/llm_gateway/src/stream_context.rs
+++ b/crates/llm_gateway/src/stream_context.rs
@@ -1,19 +1,3 @@
-use crate::metrics::Metrics;
-use common::configuration::{LlmProvider, LlmProviderType, Overrides};
-use common::consts::{
-    ARCH_PROVIDER_HINT_HEADER, ARCH_ROUTING_HEADER, CHAT_COMPLETIONS_PATH, HEALTHZ_PATH,
-    RATELIMIT_SELECTOR_HEADER_KEY, REQUEST_ID_HEADER, TRACE_PARENT_HEADER,
-};
-use common::errors::ServerError;
-use common::llm_providers::LlmProviders;
-use common::ratelimit::Header;
-use common::stats::{IncrementingMetric, RecordingMetric};
-use common::tracing::{Event, Span, TraceData, Traceparent};
-use common::{ratelimit, routing, tokenizer};
-use hermesllm::providers::response::ProviderStreamResponseIter;
-use hermesllm::{
-    ProviderId, ProviderRequest, ProviderRequestType, ProviderResponse, ProviderResponseType,
-};
 use http::StatusCode;
 use log::{debug, info, warn};
 use proxy_wasm::hostcalls::get_current_time;
@@ -25,13 +9,31 @@ use std::rc::Rc;
 use std::sync::{Arc, Mutex};
 use std::time::{Duration, SystemTime, UNIX_EPOCH};
 
+use crate::metrics::Metrics;
+use common::configuration::{LlmProvider, LlmProviderType, Overrides};
+use common::consts::{
+    ARCH_PROVIDER_HINT_HEADER, ARCH_ROUTING_HEADER, HEALTHZ_PATH, RATELIMIT_SELECTOR_HEADER_KEY,
+    REQUEST_ID_HEADER, TRACE_PARENT_HEADER,
+};
+use common::errors::ServerError;
+use common::llm_providers::LlmProviders;
+use common::ratelimit::Header;
+use common::stats::{IncrementingMetric, RecordingMetric};
+use common::tracing::{Event, Span, TraceData, Traceparent};
+use common::{ratelimit, routing, tokenizer};
+use hermesllm::clients::endpoints::SupportedAPIs;
+use hermesllm::providers::response::{ProviderResponse, SseEvent, SseStreamIter};
+use hermesllm::{ProviderId, ProviderRequest, ProviderRequestType, ProviderResponseType};
+
 pub struct StreamContext {
-    context_id: u32,
     metrics: Rc<Metrics>,
     ratelimit_selector: Option<Header>,
     streaming_response: bool,
     response_tokens: usize,
-    is_chat_completions_request: bool,
+    /// The API that is requested by the client (before compatibility mapping)
+    client_api: Option<SupportedAPIs>,
+    /// The API that should be used for the upstream provider (after compatibility mapping)
+    resolved_api: Option<SupportedAPIs>,
     llm_providers: Rc<LlmProviders>,
     llm_provider: Option<Rc<LlmProvider>>,
     request_id: Option<String>,
@@ -47,20 +49,19 @@ pub struct StreamContext {
 
 impl StreamContext {
     pub fn new(
-        context_id: u32,
         metrics: Rc<Metrics>,
         llm_providers: Rc<LlmProviders>,
         traces_queue: Arc<Mutex<VecDeque<TraceData>>>,
         overrides: Rc<Option<Overrides>>,
     ) -> Self {
         StreamContext {
-            context_id,
             metrics,
             overrides,
             ratelimit_selector: None,
             streaming_response: false,
             response_tokens: 0,
-            is_chat_completions_request: false,
+            client_api: None,
+            resolved_api: None,
             llm_providers,
             llm_provider: None,
             request_id: None,
@@ -73,6 +74,16 @@ impl StreamContext {
             user_message: None,
         }
     }
+
+    /// Returns the appropriate request identifier for logging.
+    /// Uses request_id (from x-request-id header) when available, otherwise returns a literal indicating no request ID.
+    fn request_identifier(&self) -> String {
+        self.request_id
+            .as_ref()
+            .filter(|id| !id.is_empty()) // Filter out empty strings
+            .map(|id| id.clone())
+            .unwrap_or_else(|| "NO_REQUEST_ID".to_string())
+    }
     fn llm_provider(&self) -> &LlmProvider {
         self.llm_provider
             .as_ref()
@@ -83,6 +94,18 @@ impl StreamContext {
         self.llm_provider().to_provider_id()
     }
 
+    //This function assumes that the provider has been set.
+    fn update_upstream_path(&mut self, request_path: &str) {
+        let hermes_provider_id = self.llm_provider().to_provider_id();
+        if let Some(api) = &self.client_api {
+            let target_endpoint =
+                api.target_endpoint_for_provider(&hermes_provider_id, request_path);
+            if target_endpoint != request_path {
+                self.set_http_request_header(":path", Some(&target_endpoint));
+            }
+        }
+    }
+
     fn select_llm_provider(&mut self) {
         let provider_hint = self
             .get_http_request_header(ARCH_PROVIDER_HINT_HEADER)
@@ -93,32 +116,11 @@ impl StreamContext {
             provider_hint,
         ));
 
-        match self.llm_provider.as_ref().unwrap().provider_interface {
-            LlmProviderType::Groq => {
-                if let Some(path) = self.get_http_request_header(":path") {
-                    if path.starts_with("/v1/") {
-                        let new_path = format!("/openai{}", path);
-                        self.set_http_request_header(":path", Some(new_path.as_str()));
-                    }
-                }
-            }
-            LlmProviderType::Gemini => {
-                if let Some(path) = self.get_http_request_header(":path") {
-                    if path == "/v1/chat/completions" {
-                        self.set_http_request_header(
-                            ":path",
-                            Some("/v1beta/openai/chat/completions"),
-                        );
-                    }
-                }
-            }
-            _ => {}
-        }
-
-        debug!(
-            "request received: llm provider hint: {}, selected provider: {}",
+        info!(
+            "[ARCHGW_REQ_ID:{}] PROVIDER_SELECTION: Hint='{}' -> Selected='{}'",
+            self.request_identifier(),
             self.get_http_request_header(ARCH_PROVIDER_HINT_HEADER)
-                .unwrap_or_default(),
+                .unwrap_or("none".to_string()),
             self.llm_provider.as_ref().unwrap().name
         );
     }
@@ -135,9 +137,23 @@ impl StreamContext {
                     ),
                 })?;
 
-        let authorization_header_value = format!("Bearer {}", llm_provider_api_key_value);
-
-        self.set_http_request_header("Authorization", Some(&authorization_header_value));
+        // Set API-specific headers based on the resolved upstream API
+        match self.resolved_api.as_ref() {
+            Some(SupportedAPIs::AnthropicMessagesAPI(_)) => {
+                // Anthropic API requires x-api-key and anthropic-version headers
+                // Remove any existing Authorization header since Anthropic doesn't use it
+                self.remove_http_request_header("Authorization");
+                self.set_http_request_header("x-api-key", Some(llm_provider_api_key_value));
+                self.set_http_request_header("anthropic-version", Some("2023-06-01"));
+            }
+            Some(SupportedAPIs::OpenAIChatCompletions(_)) | None => {
+                // OpenAI and default: use Authorization Bearer token
+                // Remove any existing x-api-key header since OpenAI doesn't use it
+                self.remove_http_request_header("x-api-key");
+                let authorization_header_value = format!("Bearer {}", llm_provider_api_key_value);
+                self.set_http_request_header("Authorization", Some(&authorization_header_value));
+            }
+        }
 
         Ok(())
     }
@@ -179,7 +195,13 @@ impl StreamContext {
         // Tokenize and record token count.
         let token_count = tokenizer::token_count(model, json_string).unwrap_or(0);
 
-        debug!("Recorded input token count: {}", token_count);
+        info!(
+            "[ARCHGW_REQ_ID:{}] TOKEN_COUNT: model='{}' input_tokens={}",
+            self.request_identifier(),
+            model,
+            token_count
+        );
+
         // Record the token count to metrics.
         self.metrics
             .input_sequence_length
@@ -187,18 +209,361 @@ impl StreamContext {
 
         // Check if rate limiting needs to be applied.
         if let Some(selector) = self.ratelimit_selector.take() {
-            log::debug!("Applying ratelimit for model: {}", model);
+            info!(
+                "[ARCHGW_REQ_ID:{}] RATELIMIT_CHECK: model='{}' selector='{}:{}'",
+                self.request_identifier(),
+                model,
+                selector.key,
+                selector.value
+            );
             ratelimit::ratelimits(None).read().unwrap().check_limit(
                 model.to_owned(),
                 selector,
                 NonZero::new(token_count as u32).unwrap(),
             )?;
         } else {
-            debug!("No rate limit applied for model: {}", model);
+            debug!(
+                "[ARCHGW_REQ_ID:{}] RATELIMIT_SKIP: model='{}' (no selector)",
+                self.request_identifier(),
+                model
+            );
         }
 
         Ok(())
     }
+
+    // === Helper methods extracted from on_http_response_body (no behavior change) ===
+    #[inline]
+    fn record_ttft_if_needed(&mut self) {
+        if self.ttft_duration.is_none() {
+            let current_time = get_current_time().unwrap();
+            self.ttft_time = Some(current_time_ns());
+            match current_time.duration_since(self.start_time) {
+                Ok(duration) => {
+                    let duration_ms = duration.as_millis();
+                    info!(
+                        "[ARCHGW_REQ_ID:{}] TIME_TO_FIRST_TOKEN: {}ms",
+                        self.request_identifier(),
+                        duration_ms
+                    );
+                    self.ttft_duration = Some(duration);
+                    self.metrics.time_to_first_token.record(duration_ms as u64);
+                }
+                Err(e) => {
+                    warn!(
+                        "[ARCHGW_REQ_ID:{}] TIME_MEASUREMENT_ERROR: {:?}",
+                        self.request_identifier(),
+                        e
+                    );
+                }
+            }
+        }
+    }
+    fn handle_end_of_stream_metrics_and_traces(&mut self, current_time: SystemTime) {
+        // All streaming responses end with bytes=0 and end_stream=true
+        // Record the latency for the request
+        match current_time.duration_since(self.start_time) {
+            Ok(duration) => {
+                // Convert the duration to milliseconds
+                let duration_ms = duration.as_millis();
+                info!(
+                    "[ARCHGW_REQ_ID:{}] REQUEST_COMPLETE: latency={}ms tokens={}",
+                    self.request_identifier(),
+                    duration_ms,
+                    self.response_tokens
+                );
+                // Record the latency to the latency histogram
+                self.metrics.request_latency.record(duration_ms as u64);
+
+                if self.response_tokens > 0 {
+                    // Compute the time per output token
+                    let tpot = duration_ms as u64 / self.response_tokens as u64;
+
+                    // Record the time per output token
+                    self.metrics.time_per_output_token.record(tpot);
+
+                    info!(
+                        "[ARCHGW_REQ_ID:{}] TOKEN_THROUGHPUT: time_per_token={}ms tokens_per_second={}",
+                        self.request_identifier(),
+                        tpot,
+                        1000 / tpot
+                    );
+                    // Record the tokens per second
+                    self.metrics.tokens_per_second.record(1000 / tpot);
+                }
+            }
+            Err(e) => {
+                warn!("SystemTime error: {:?}", e);
+            }
+        }
+        // Record the output sequence length
+        self.metrics
+            .output_sequence_length
+            .record(self.response_tokens as u64);
+
+        if let Some(traceparent) = self.traceparent.as_ref() {
+            let current_time_ns = current_time_ns();
+
+            match Traceparent::try_from(traceparent.to_string()) {
+                Err(e) => {
+                    warn!("traceparent header is invalid: {}", e);
+                }
+                Ok(traceparent) => {
+                    let mut trace_data = common::tracing::TraceData::new();
+                    let mut llm_span = Span::new(
+                        "egress_traffic".to_string(),
+                        Some(traceparent.trace_id),
+                        Some(traceparent.parent_id),
+                        self.request_body_sent_time.unwrap(),
+                        current_time_ns,
+                    );
+                    llm_span
+                        .add_attribute("model".to_string(), self.llm_provider().name.to_string());
+
+                    if let Some(user_message) = &self.user_message {
+                        llm_span.add_attribute("user_message".to_string(), user_message.clone());
+                    }
+
+                    if self.ttft_time.is_some() {
+                        llm_span.add_event(Event::new(
+                            "time_to_first_token".to_string(),
+                            self.ttft_time.unwrap(),
+                        ));
+                        trace_data.add_span(llm_span);
+                    }
+
+                    self.traces_queue.lock().unwrap().push_back(trace_data);
+                }
+            };
+        }
+    }
+
+    fn read_raw_response_body(&mut self, body_size: usize) -> Result<Vec<u8>, Action> {
+        if self.streaming_response {
+            let chunk_size = body_size;
+            debug!(
+                "[ARCHGW_REQ_ID:{}] UPSTREAM_RESPONSE_CHUNK: streaming=true chunk_size={}",
+                self.request_identifier(),
+                chunk_size
+            );
+            let streaming_chunk = match self.get_http_response_body(0, chunk_size) {
+                Some(chunk) => chunk,
+                None => {
+                    warn!(
+                        "[ARCHGW_REQ_ID:{}] UPSTREAM_RESPONSE_ERROR: empty chunk, size={}",
+                        self.request_identifier(),
+                        chunk_size
+                    );
+                    return Err(Action::Continue);
+                }
+            };
+
+            if streaming_chunk.len() != chunk_size {
+                warn!(
+                    "[ARCHGW_REQ_ID:{}] UPSTREAM_RESPONSE_MISMATCH: expected={} actual={}",
+                    self.request_identifier(),
+                    chunk_size,
+                    streaming_chunk.len()
+                );
+            }
+            Ok(streaming_chunk)
+        } else {
+            if body_size == 0 {
+                return Err(Action::Continue);
+            }
+            debug!(
+                "[ARCHGW_REQ_ID:{}] UPSTREAM_RESPONSE_COMPLETE: streaming=false body_size={}",
+                self.request_identifier(),
+                body_size
+            );
+            match self.get_http_response_body(0, body_size) {
+                Some(body) => Ok(body),
+                None => {
+                    warn!("non streaming response body empty");
+                    Err(Action::Continue)
+                }
+            }
+        }
+    }
+
+    fn debug_log_body(&self, body: &[u8]) {
+        debug!(
+            "[ARCHGW_REQ_ID:{}] UPSTREAM_RAW_RESPONSE: body_size={} content={}",
+            self.request_identifier(),
+            body.len(),
+            String::from_utf8_lossy(body)
+        );
+    }
+
+    fn handle_streaming_response(
+        &mut self,
+        body: &[u8],
+        provider_id: ProviderId,
+    ) -> Result<Vec<u8>, Action> {
+        debug!(
+            "[ARCHGW_REQ_ID:{}] STREAMING_PROCESS: provider_id={:?} chunk_size={}",
+            self.request_identifier(),
+            provider_id,
+            body.len()
+        );
+        match self.client_api.as_ref() {
+            Some(client_api) => {
+                let client_api = client_api.clone(); // Clone to avoid borrowing issues
+                let upstream_api = provider_id.compatible_api_for_client(&client_api);
+
+                // Parse body into SSE iterator using TryFrom
+                let sse_iter: SseStreamIter<std::vec::IntoIter<String>> =
+                    match SseStreamIter::try_from(body) {
+                        Ok(iter) => iter,
+                        Err(e) => {
+                            warn!("Failed to parse body into SSE iterator: {}", e);
+                            return Err(Action::Continue);
+                        }
+                    };
+
+                let mut response_buffer = Vec::new();
+
+                // Process each SSE event
+                for sse_event in sse_iter {
+                    // Transform event if upstream API != client API
+                    let transformed_event: SseEvent =
+                        match SseEvent::try_from((sse_event, &client_api, &upstream_api)) {
+                            Ok(event) => event,
+                            Err(e) => {
+                                warn!("Failed to transform SSE event: {}", e);
+                                return Err(Action::Continue);
+                            }
+                        };
+
+                    // Extract ProviderStreamResponse for processing (token counting, etc.)
+                    if !transformed_event.is_done() {
+                        match transformed_event.provider_response() {
+                            Ok(provider_response) => {
+                                self.record_ttft_if_needed();
+
+                                if provider_response.is_final() {
+                                    debug!(
+                                        "[ARCHGW_REQ_ID:{}] STREAMING_FINAL_CHUNK: total_tokens={}",
+                                        self.request_identifier(),
+                                        self.response_tokens
+                                    );
+                                }
+
+                                if let Some(content) = provider_response.content_delta() {
+                                    let estimated_tokens = content.len() / 4;
+                                    self.response_tokens += estimated_tokens.max(1);
+                                    debug!(
+                                        "[ARCHGW_REQ_ID:{}] STREAMING_TOKEN_UPDATE: delta_chars={} estimated_tokens={} total_tokens={}",
+                                        self.request_identifier(),
+                                        content.len(),
+                                        estimated_tokens.max(1),
+                                        self.response_tokens
+                                    );
+                                }
+                            }
+                            Err(e) => {
+                                warn!(
+                                    "[ARCHGW_REQ_ID:{}] STREAMING_CHUNK_ERROR: {}",
+                                    self.request_identifier(),
+                                    e
+                                );
+                                return Err(Action::Continue);
+                            }
+                        }
+                    }
+
+                    // Add transformed event to response buffer
+                    let bytes: Vec<u8> = transformed_event.into();
+                    response_buffer.extend_from_slice(&bytes);
+                }
+
+                Ok(response_buffer)
+            }
+            None => {
+                warn!("Missing client_api for non-streaming response");
+                Err(Action::Continue)
+            }
+        }
+    }
+
+    fn handle_non_streaming_response(
+        &mut self,
+        body: &[u8],
+        provider_id: ProviderId,
+    ) -> Result<Vec<u8>, Action> {
+        info!(
+            "[ARCHGW_REQ_ID:{}] NON_STREAMING_PROCESS: provider_id={:?} body_size={}",
+            self.request_identifier(),
+            provider_id,
+            body.len()
+        );
+
+        let response: ProviderResponseType = match self.client_api.as_ref() {
+            Some(client_api) => {
+                match ProviderResponseType::try_from((body, client_api, &provider_id)) {
+                    Ok(response) => response,
+                    Err(e) => {
+                        warn!(
+                            "[ARCHGW_REQ_ID:{}] UPSTREAM_RESPONSE_PARSE_ERROR: {} | body: {}",
+                            self.request_identifier(),
+                            e,
+                            String::from_utf8_lossy(body)
+                        );
+                        self.send_server_error(
+                            ServerError::LogicError(format!("Response parsing error: {}", e)),
+                            Some(StatusCode::BAD_REQUEST),
+                        );
+                        return Err(Action::Continue);
+                    }
+                }
+            }
+            None => {
+                warn!(
+                    "[ARCHGW_REQ_ID:{}] UPSTREAM_RESPONSE_ERROR: missing client_api",
+                    self.request_identifier()
+                );
+                return Err(Action::Continue);
+            }
+        };
+
+        // Use provider interface to extract usage information
+        if let Some((prompt_tokens, completion_tokens, total_tokens)) =
+            response.extract_usage_counts()
+        {
+            info!(
+                "[ARCHGW_REQ_ID:{}] RESPONSE_USAGE: prompt_tokens={} completion_tokens={} total_tokens={}",
+                self.request_identifier(),
+                prompt_tokens,
+                completion_tokens,
+                total_tokens
+            );
+            self.response_tokens = completion_tokens;
+        } else {
+            warn!(
+                "[ARCHGW_REQ_ID:{}] RESPONSE_USAGE: no usage information found",
+                self.request_identifier()
+            );
+        }
+        // Serialize the normalized response back to JSON bytes
+        match serde_json::to_vec(&response) {
+            Ok(bytes) => {
+                debug!(
+                    "[ARCHGW_REQ_ID:{}] CLIENT_RESPONSE_PAYLOAD: {}",
+                    self.request_identifier(),
+                    String::from_utf8_lossy(&bytes)
+                );
+                Ok(bytes)
+            }
+            Err(e) => {
+                warn!("Failed to serialize normalized response: {}", e);
+                self.send_server_error(
+                    ServerError::LogicError(format!("Response serialization error: {}", e)),
+                    Some(StatusCode::INTERNAL_SERVER_ERROR),
+                );
+                Err(Action::Continue)
+            }
+        }
+    }
 }
 
 // HttpContext is the trait that allows the Rust code to interact with HTTP objects.
@@ -212,8 +577,6 @@ impl HttpContext for StreamContext {
             return Action::Continue;
         }
 
-        self.is_chat_completions_request = CHAT_COMPLETIONS_PATH == request_path;
-
         let use_agent_orchestrator = match self.overrides.as_ref() {
             Some(overrides) => overrides.use_agent_orchestrator.unwrap_or_default(),
             None => false,
@@ -227,10 +590,35 @@ impl HttpContext for StreamContext {
             self.llm_provider = Some(Rc::new(LlmProvider {
                 name: routing_header_value.to_string(),
                 provider_interface: LlmProviderType::OpenAI,
-                ..Default::default()
+                ..Default::default() //TODO: THiS IS BROKEN. WHY ARE WE ASSUMING OPENAI FOR UPSTREAM?
             }));
         } else {
+            //TODO: Fix this brittle code path. We need to return values and have compile time
             self.select_llm_provider();
+
+            // Check if this is a supported API endpoint
+            if SupportedAPIs::from_endpoint(&request_path).is_none() {
+                self.send_http_response(404, vec![], Some(b"Unsupported endpoint"));
+                return Action::Continue;
+            }
+
+            // Get the SupportedApi for routing decisions
+            let supported_api: Option<SupportedAPIs> = SupportedAPIs::from_endpoint(&request_path);
+            self.client_api = supported_api;
+
+            // Debug: log provider, client API, resolved API, and request path
+            if let (Some(api), Some(provider)) =
+                (self.client_api.as_ref(), self.llm_provider.as_ref())
+            {
+                let provider_id = provider.to_provider_id();
+                self.resolved_api = Some(provider_id.compatible_api_for_client(api));
+            } else {
+                self.resolved_api = None;
+            }
+
+            //We need to update the upstream path if there is a variation for a provider like Gemini/Groq, etc.
+            self.update_upstream_path(&request_path);
+
             if self.llm_provider().endpoint.is_some() {
                 self.add_http_request_header(
                     ARCH_ROUTING_HEADER,
@@ -265,8 +653,10 @@ impl HttpContext for StreamContext {
 
     fn on_http_request_body(&mut self, body_size: usize, end_of_stream: bool) -> Action {
         debug!(
-            "on_http_request_body [S={}] bytes={} end_stream={}",
-            self.context_id, body_size, end_of_stream
+            "[ARCHGW_REQ_ID:{}] REQUEST_BODY_CHUNK: bytes={} end_stream={}",
+            self.request_identifier(),
+            body_size,
+            end_of_stream
         );
 
         // Let the client send the gateway all the data before sending to the LLM_provider.
@@ -298,23 +688,47 @@ impl HttpContext for StreamContext {
             }
         };
 
-        let provider_id = self.get_provider_id();
+        //We need to deserialize the request body based on the resolved API
+        let mut deserialized_client_request: ProviderRequestType = match self.client_api.as_ref() {
+            Some(the_client_api) => {
+                info!(
+                    "[ARCHGW_REQ_ID:{}] CLIENT_REQUEST_RECEIVED: api={:?} body_size={}",
+                    self.request_identifier(),
+                    the_client_api,
+                    body_bytes.len()
+                );
 
-        let mut deserialized_body =
-            match ProviderRequestType::try_from((&body_bytes[..], &provider_id)) {
-                Ok(deserialized) => deserialized,
-                Err(e) => {
-                    debug!(
-                        "on_http_request_body: request body: {}",
-                        String::from_utf8_lossy(&body_bytes)
-                    );
-                    self.send_server_error(
-                        ServerError::LogicError(format!("Request parsing error: {}", e)),
-                        Some(StatusCode::BAD_REQUEST),
-                    );
-                    return Action::Pause;
+                debug!(
+                    "[ARCHGW_REQ_ID:{}] CLIENT_REQUEST_PAYLOAD: {}",
+                    self.request_identifier(),
+                    String::from_utf8_lossy(&body_bytes)
+                );
+
+                match ProviderRequestType::try_from((&body_bytes[..], the_client_api)) {
+                    Ok(deserialized) => deserialized,
+                    Err(e) => {
+                        warn!(
+                            "[ARCHGW_REQ_ID:{}] CLIENT_REQUEST_PARSE_ERROR: {} | body: {}",
+                            self.request_identifier(),
+                            e,
+                            String::from_utf8_lossy(&body_bytes)
+                        );
+                        self.send_server_error(
+                            ServerError::LogicError(format!("Request parsing error: {}", e)),
+                            Some(StatusCode::BAD_REQUEST),
+                        );
+                        return Action::Pause;
+                    }
                 }
-            };
+            }
+            None => {
+                self.send_server_error(
+                    ServerError::LogicError("No resolved API for provider".to_string()),
+                    Some(StatusCode::BAD_REQUEST),
+                );
+                return Action::Pause;
+            }
+        };
 
         let model_name = match self.llm_provider.as_ref() {
             Some(llm_provider) => llm_provider.model.as_ref(),
@@ -327,7 +741,7 @@ impl HttpContext for StreamContext {
         };
 
         // Store the original model for logging
-        let model_requested = deserialized_body.model().to_string();
+        let model_requested = deserialized_client_request.model().to_string();
 
         // Apply model name resolution logic using the trait method
         let resolved_model = match model_name {
@@ -336,6 +750,13 @@ impl HttpContext for StreamContext {
                 if use_agent_orchestrator {
                     "agent_orchestrator".to_string()
                 } else {
+                    warn!(
+                        "[ARCHGW_REQ_ID:{}] MODEL_RESOLUTION_ERROR: no model specified | req_model='{}' provider='{}' config_model={:?}",
+                        self.request_identifier(),
+                        model_requested,
+                        self.llm_provider().name,
+                        self.llm_provider().model
+                    );
                     self.send_server_error(
                         ServerError::BadRequest {
                             why: format!(
@@ -353,23 +774,25 @@ impl HttpContext for StreamContext {
         };
 
         // Set the resolved model using the trait method
-        deserialized_body.set_model(resolved_model.clone());
+        deserialized_client_request.set_model(resolved_model.clone());
 
         // Extract user message for tracing
-        self.user_message = deserialized_body.get_recent_user_message();
+        self.user_message = deserialized_client_request.get_recent_user_message();
 
         info!(
-            "on_http_request_body: provider: {}, model requested (in body): {}, model selected: {}",
-            self.llm_provider().name,
+            "[ARCHGW_REQ_ID:{}] MODEL_RESOLUTION: req_model='{}' -> resolved_model='{}' provider='{}' streaming={}",
+            self.request_identifier(),
             model_requested,
-            model_name.unwrap_or(&"None".to_string()),
+            resolved_model,
+            self.llm_provider().name,
+            deserialized_client_request.is_streaming()
         );
 
         // Use provider interface for streaming detection and setup
-        self.streaming_response = deserialized_body.is_streaming();
+        self.streaming_response = deserialized_client_request.is_streaming();
 
         // Use provider interface for text extraction (after potential mutation)
-        let input_tokens_str = deserialized_body.extract_messages_text();
+        let input_tokens_str = deserialized_client_request.extract_messages_text();
         // enforce ratelimits on ingress
         if let Err(e) = self.enforce_ratelimits(&resolved_model, input_tokens_str.as_str()) {
             self.send_server_error(
@@ -381,28 +804,64 @@ impl HttpContext for StreamContext {
         }
 
         // Convert chat completion request to llm provider specific request using provider interface
-        let deserialized_body_bytes = match deserialized_body.to_bytes() {
-            Ok(bytes) => bytes,
-            Err(e) => {
-                warn!("Failed to serialize request body: {}", e);
-                self.send_server_error(
-                    ServerError::LogicError(format!("Request serialization error: {}", e)),
-                    Some(StatusCode::BAD_REQUEST),
+        let serialized_body_bytes_upstream =
+            match self.resolved_api.as_ref() {
+                Some(upstream) => {
+                    info!(
+                    "[ARCHGW_REQ_ID:{}] UPSTREAM_TRANSFORM: client_api={:?} -> upstream_api={:?}",
+                    self.request_identifier(), self.client_api, upstream
                 );
-                return Action::Pause;
-            }
-        };
 
-        self.set_http_request_body(0, body_size, &deserialized_body_bytes);
+                    match ProviderRequestType::try_from((deserialized_client_request, upstream)) {
+                        Ok(request) => {
+                            debug!(
+                                "[ARCHGW_REQ_ID:{}] UPSTREAM_REQUEST_PAYLOAD: {}",
+                                self.request_identifier(),
+                                String::from_utf8_lossy(&request.to_bytes().unwrap_or_default())
+                            );
 
+                            match request.to_bytes() {
+                                Ok(bytes) => bytes,
+                                Err(e) => {
+                                    warn!("Failed to serialize request body: {}", e);
+                                    self.send_server_error(
+                                        ServerError::LogicError(format!(
+                                            "Request serialization error: {}",
+                                            e
+                                        )),
+                                        Some(StatusCode::BAD_REQUEST),
+                                    );
+                                    return Action::Pause;
+                                }
+                            }
+                        }
+                        Err(e) => {
+                            warn!("Failed to create provider request: {}", e);
+                            self.send_server_error(
+                                ServerError::LogicError(format!("Provider request error: {}", e)),
+                                Some(StatusCode::BAD_REQUEST),
+                            );
+                            return Action::Pause;
+                        }
+                    }
+                }
+                None => {
+                    warn!("No upstream API resolved");
+                    self.send_server_error(
+                        ServerError::LogicError("No upstream API resolved".into()),
+                        Some(StatusCode::BAD_REQUEST),
+                    );
+                    return Action::Pause;
+                }
+            };
+
+        self.set_http_request_body(0, body_size, &serialized_body_bytes_upstream);
         Action::Continue
     }
 
-    fn on_http_response_headers(&mut self, _num_headers: usize, end_of_stream: bool) -> Action {
-        debug!(
-            "on_http_response_headers [S={}] end_stream={}",
-            self.context_id, end_of_stream
-        );
+    fn on_http_response_headers(&mut self, _num_headers: usize, _end_of_stream: bool) -> Action {
+        self.remove_http_response_header("content-length");
+        self.remove_http_response_header("content-encoding");
 
         self.set_property(
             vec!["metadata", "filter_metadata", "llm_filter", "user_prompt"],
@@ -413,248 +872,57 @@ impl HttpContext for StreamContext {
     }
 
     fn on_http_response_body(&mut self, body_size: usize, end_of_stream: bool) -> Action {
-        debug!(
-            "on_http_response_body [S={}] bytes={} end_stream={}",
-            self.context_id, body_size, end_of_stream
-        );
-
         if self.request_body_sent_time.is_none() {
             debug!("on_http_response_body: request body not sent, not doing any processing in llm filter");
             return Action::Continue;
         }
 
-        if !self.is_chat_completions_request {
-            info!("on_http_response_body: non-chatcompletion request");
-            return Action::Continue;
+        match self.client_api {
+            Some(SupportedAPIs::OpenAIChatCompletions(_)) => {}
+            Some(SupportedAPIs::AnthropicMessagesAPI(_)) => {}
+            _ => {
+                let api_info = match &self.client_api {
+                    Some(api) => format!("{}", api),
+                    None => "None".to_string(),
+                };
+                info!(
+                    "[ARCHGW_REQ_ID:{}], UNSUPPORTED API: {}",
+                    self.request_identifier(),
+                    api_info
+                );
+                return Action::Continue;
+            }
         }
 
         let current_time = get_current_time().unwrap();
         if end_of_stream && body_size == 0 {
-            // All streaming responses end with bytes=0 and end_stream=true
-            // Record the latency for the request
-            match current_time.duration_since(self.start_time) {
-                Ok(duration) => {
-                    // Convert the duration to milliseconds
-                    let duration_ms = duration.as_millis();
-                    info!("on_http_response_body: request latency: {}ms", duration_ms);
-                    // Record the latency to the latency histogram
-                    self.metrics.request_latency.record(duration_ms as u64);
-
-                    if self.response_tokens > 0 {
-                        // Compute the time per output token
-                        let tpot = duration_ms as u64 / self.response_tokens as u64;
-
-                        // Record the time per output token
-                        self.metrics.time_per_output_token.record(tpot);
-
-                        debug!(
-                            "time per token: {}ms, tokens per second: {}",
-                            tpot,
-                            1000 / tpot
-                        );
-                        // Record the tokens per second
-                        self.metrics.tokens_per_second.record(1000 / tpot);
-                    }
-                }
-                Err(e) => {
-                    warn!("SystemTime error: {:?}", e);
-                }
-            }
-            // Record the output sequence length
-            self.metrics
-                .output_sequence_length
-                .record(self.response_tokens as u64);
-
-            if let Some(traceparent) = self.traceparent.as_ref() {
-                let current_time_ns = current_time_ns();
-
-                match Traceparent::try_from(traceparent.to_string()) {
-                    Err(e) => {
-                        warn!("traceparent header is invalid: {}", e);
-                    }
-                    Ok(traceparent) => {
-                        let mut trace_data = common::tracing::TraceData::new();
-                        let mut llm_span = Span::new(
-                            "egress_traffic".to_string(),
-                            Some(traceparent.trace_id),
-                            Some(traceparent.parent_id),
-                            self.request_body_sent_time.unwrap(),
-                            current_time_ns,
-                        );
-                        llm_span.add_attribute(
-                            "model".to_string(),
-                            self.llm_provider().name.to_string(),
-                        );
-
-                        if let Some(user_message) = &self.user_message {
-                            llm_span
-                                .add_attribute("user_message".to_string(), user_message.clone());
-                        }
-
-                        if self.ttft_time.is_some() {
-                            llm_span.add_event(Event::new(
-                                "time_to_first_token".to_string(),
-                                self.ttft_time.unwrap(),
-                            ));
-                            trace_data.add_span(llm_span);
-                        }
-
-                        self.traces_queue.lock().unwrap().push_back(trace_data);
-                    }
-                };
-            }
-
+            self.handle_end_of_stream_metrics_and_traces(current_time);
             return Action::Continue;
         }
 
-        let body = if self.streaming_response {
-            let chunk_start = 0;
-            let chunk_size = body_size;
-            debug!(
-                "on_http_response_body: streaming response reading, {}..{}",
-                chunk_start, chunk_size
-            );
-            let streaming_chunk = match self.get_http_response_body(0, chunk_size) {
-                Some(chunk) => chunk,
-                None => {
-                    warn!(
-                        "response body empty, chunk_start: {}, chunk_size: {}",
-                        chunk_start, chunk_size
-                    );
-                    return Action::Continue;
-                }
-            };
-
-            if streaming_chunk.len() != chunk_size {
-                warn!(
-                    "chunk size mismatch: read: {} != requested: {}",
-                    streaming_chunk.len(),
-                    chunk_size
-                );
-            }
-            streaming_chunk
-        } else {
-            if body_size == 0 {
-                return Action::Continue;
-            }
-            debug!("non streaming response bytes read: 0:{}", body_size);
-            match self.get_http_response_body(0, body_size) {
-                Some(body) => body,
-                None => {
-                    warn!("non streaming response body empty");
-                    return Action::Continue;
-                }
-            }
+        let body = match self.read_raw_response_body(body_size) {
+            Ok(bytes) => bytes,
+            Err(action) => return action,
         };
 
-        if log::log_enabled!(log::Level::Debug) {
-            debug!(
-                "response data (converted to utf8): {}",
-                String::from_utf8_lossy(&body)
-            );
-        }
+        self.debug_log_body(&body);
 
+        let provider_id = self.get_provider_id();
         if self.streaming_response {
-            debug!("processing streaming response");
-            match ProviderStreamResponseIter::try_from((&body[..], &self.get_provider_id())) {
-                Ok(mut streaming_response) => {
-                    // Process each streaming chunk
-                    while let Some(chunk_result) = streaming_response.next() {
-                        match chunk_result {
-                            Ok(chunk) => {
-                                // Compute TTFT on first chunk
-                                if self.ttft_duration.is_none() {
-                                    let current_time = get_current_time().unwrap();
-                                    self.ttft_time = Some(current_time_ns());
-                                    match current_time.duration_since(self.start_time) {
-                                        Ok(duration) => {
-                                            let duration_ms = duration.as_millis();
-                                            info!(
-                                                "on_http_response_body: time to first token: {}ms",
-                                                duration_ms
-                                            );
-                                            self.ttft_duration = Some(duration);
-                                            self.metrics
-                                                .time_to_first_token
-                                                .record(duration_ms as u64);
-                                        }
-                                        Err(e) => {
-                                            warn!("SystemTime error: {:?}", e);
-                                        }
-                                    }
-                                }
-
-                                // For streaming responses, we handle token counting differently
-                                // The ProviderStreamResponse trait provides content_delta, is_final, and role
-                                // Token counting for streaming responses typically happens with final usage chunk
-                                if chunk.is_final() {
-                                    // For now, we'll implement basic token estimation
-                                    // In a complete implementation, the final chunk would contain usage information
-                                    debug!("Received final streaming chunk");
-                                }
-
-                                // For now, estimate tokens from content delta
-                                if let Some(content) = chunk.content_delta() {
-                                    // Rough estimation: ~4 characters per token
-                                    let estimated_tokens = content.len() / 4;
-                                    self.response_tokens += estimated_tokens.max(1);
-                                }
-                            }
-                            Err(e) => {
-                                warn!("Error processing streaming chunk: {}", e);
-                                return Action::Continue;
-                            }
-                        }
-                    }
-                }
-                Err(e) => {
-                    warn!("Failed to parse streaming response: {}", e);
+            match self.handle_streaming_response(&body, provider_id) {
+                Ok(serialized_body) => {
+                    self.set_http_response_body(0, body_size, &serialized_body);
                 }
+                Err(action) => return action,
             }
         } else {
-            debug!("non streaming response");
-            let provider_id = self.get_provider_id();
-            let response: ProviderResponseType =
-                match ProviderResponseType::try_from((&body[..], provider_id)) {
-                    Ok(response) => response,
-                    Err(e) => {
-                        warn!(
-                            "could not parse response: {}, body str: {}",
-                            e,
-                            String::from_utf8_lossy(&body)
-                        );
-                        debug!(
-                            "on_http_response_body: S[{}], response body: {}",
-                            self.context_id,
-                            String::from_utf8_lossy(&body)
-                        );
-                        self.send_server_error(
-                            ServerError::LogicError(format!("Response parsing error: {}", e)),
-                            Some(StatusCode::BAD_REQUEST),
-                        );
-                        return Action::Continue;
-                    }
-                };
-
-            // Use provider interface to extract usage information
-            if let Some((prompt_tokens, completion_tokens, total_tokens)) =
-                response.extract_usage_counts()
-            {
-                debug!(
-                    "Response usage: prompt={}, completion={}, total={}",
-                    prompt_tokens, completion_tokens, total_tokens
-                );
-                self.response_tokens = completion_tokens;
-            } else {
-                warn!("No usage information found in response");
+            match self.handle_non_streaming_response(&body, provider_id) {
+                Ok(serialized_body) => {
+                    self.set_http_response_body(0, body_size, &serialized_body);
+                }
+                Err(action) => return action,
             }
         }
-
-        debug!(
-            "recv [S={}] total_tokens={} end_stream={}",
-            self.context_id, self.response_tokens, end_of_stream
-        );
-
         Action::Continue
     }
 }
diff --git a/crates/llm_gateway/tests/integration.rs b/crates/llm_gateway/tests/integration.rs
index 82ae8322..48d4bc86 100644
--- a/crates/llm_gateway/tests/integration.rs
+++ b/crates/llm_gateway/tests/integration.rs
@@ -31,14 +31,15 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) {
         )
         .returning(None)
         .expect_log(
-            Some(LogLevel::Debug),
-            Some("request received: llm provider hint: default, selected provider: open-ai-gpt-4"),
+            Some(LogLevel::Info),
+            None, // Dynamic request ID - could be context_id or x-request-id
         )
         .expect_add_header_map_value(
             Some(MapType::HttpRequestHeaders),
             Some("x-arch-llm-provider"),
             Some("openai"),
         )
+        .expect_remove_header_map_value(Some(MapType::HttpRequestHeaders), Some("x-api-key"))
         .expect_replace_header_map_value(
             Some(MapType::HttpRequestHeaders),
             Some("Authorization"),
@@ -193,10 +194,7 @@ fn llm_gateway_successful_request_to_open_ai_chat_completions() {
 
     module
         .call_proxy_on_context_create(http_context, filter_context)
-        .expect_log(
-            Some(LogLevel::Trace),
-            Some("||| create_http_context called with context_id: 2 |||"),
-        )
+        .expect_log(Some(LogLevel::Trace), None)
         .execute_and_expect(ReturnType::None)
         .unwrap();
 
@@ -211,15 +209,19 @@ fn llm_gateway_successful_request_to_open_ai_chat_completions() {
             chat_completions_request_body.len() as i32,
             true,
         )
-        .expect_log(Some(LogLevel::Debug), None)
+        .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID - REQUEST_BODY_CHUNK
         .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
         .returning(Some(chat_completions_request_body))
-        .expect_log(Some(LogLevel::Info), None)
-        .expect_log(Some(LogLevel::Debug), None)
-        .expect_log(Some(LogLevel::Debug), None)
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - CLIENT_REQUEST_RECEIVED
+        .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID - CLIENT_REQUEST_PAYLOAD
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - MODEL_RESOLUTION
+        .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - TOKEN_COUNT
         .expect_metric_record("input_sequence_length", 21)
-        .expect_log(Some(LogLevel::Debug), None)
-        .expect_log(Some(LogLevel::Debug), None)
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - RATELIMIT_CHECK
+        .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=21"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - UPSTREAM_TRANSFORM
+        .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID - UPSTREAM_REQUEST_PAYLOAD
         .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
         .execute_and_expect(ReturnType::Action(Action::Continue))
         .unwrap();
@@ -263,15 +265,19 @@ fn llm_gateway_bad_request_to_open_ai_chat_completions() {
             incomplete_chat_completions_request_body.len() as i32,
             true,
         )
+        .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID - REQUEST_BODY_CHUNK
         .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
         .returning(Some(incomplete_chat_completions_request_body))
-        .expect_log(Some(LogLevel::Debug), None)
-        .expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): gpt-1, model selected: gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some("getting token count model=gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some("Recorded input token count: 13"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - CLIENT_REQUEST_RECEIVED
+        .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID - CLIENT_REQUEST_PAYLOAD
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - MODEL_RESOLUTION
+        .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - TOKEN_COUNT
         .expect_metric_record("input_sequence_length", 13)
-        .expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some(r#"Checking limit for provider=gpt-4, with selector=Header { key: "selector-key", value: "selector-value" }, consuming tokens=13"#))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - RATELIMIT_CHECK
+        .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=13"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - RATELIMIT_CHECK
+        .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
         .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
         .execute_and_expect(ReturnType::Action(Action::Continue))
         .unwrap();
@@ -322,16 +328,18 @@ fn llm_gateway_request_ratelimited() {
             chat_completions_request_body.len() as i32,
             true,
         )
-        .expect_log(Some(LogLevel::Debug), None)
+        .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID)
         .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
         .returning(Some(chat_completions_request_body))
         // The actual call is not important in this test, we just need to grab the token_id
-        .expect_log(Some(LogLevel::Info), None)
-        .expect_log(Some(LogLevel::Debug), None)
-        .expect_log(Some(LogLevel::Debug), None)
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] CLIENT_REQUEST_PAYLOAD: {\"messages\": [{\"role\": \"system\",\"content\": \"You are a helpful poetic assistant!, skilled in explaining complex programming concepts with creative flair. Be sure to be concise and to the point.\"},{\"role\": \"user\",\"content\": \"Compose a poem that explains the concept of recursion in programming. Compose a poem that explains the concept of recursion in programming. Compose a poem that explains the concept of recursion in programming. And also summarize it how a 4th graded would understand it. Compose a poem that explains the concept of recursion in programming. And also summarize it how a 4th graded would understand it.\"}],\"model\": \"gpt-4\"}"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
+        .expect_log(Some(LogLevel::Info), None)// Dynamic request ID)
         .expect_metric_record("input_sequence_length", 107)
-        .expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4"))
-        .expect_log(Some(LogLevel::Debug), None)
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=107"))
         .expect_log(Some(LogLevel::Warn), Some(r#"server error occurred: exceeded limit provider=gpt-4, selector=Header { key: "selector-key", value: "selector-value" }, tokens_used=107"#))
         .expect_send_local_response(
             Some(StatusCode::TOO_MANY_REQUESTS.as_u16().into()),
@@ -376,16 +384,21 @@ fn llm_gateway_request_not_ratelimited() {
             chat_completions_request_body.len() as i32,
             true,
         )
-        .expect_log(Some(LogLevel::Debug), None)
+        .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID)
         .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
         .returning(Some(chat_completions_request_body))
         // The actual call is not important in this test, we just need to grab the token_id
         .expect_log(Some(LogLevel::Info), None)
-        .expect_log(Some(LogLevel::Debug), Some("getting token count model=gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some("Recorded input token count: 29"))
+         // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
         .expect_metric_record("input_sequence_length", 29)
-        .expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some(r#"Checking limit for provider=gpt-4, with selector=Header { key: "selector-key", value: "selector-value" }, consuming tokens=29"#))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
         .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
         .execute_and_expect(ReturnType::Action(Action::Continue))
         .unwrap();
@@ -423,16 +436,20 @@ fn llm_gateway_override_model_name() {
             chat_completions_request_body.len() as i32,
             true,
         )
+        .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID)
         .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
         .returning(Some(chat_completions_request_body))
         // The actual call is not important in this test, we just need to grab the token_id
-        .expect_log(Some(LogLevel::Debug), None)
-        .expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): gpt-1, model selected: gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some("getting token count model=gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some("Recorded input token count: 29"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
         .expect_metric_record("input_sequence_length", 29)
-        .expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some(r#"Checking limit for provider=gpt-4, with selector=Header { key: "selector-key", value: "selector-value" }, consuming tokens=29"#))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
         .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
         .execute_and_expect(ReturnType::Action(Action::Continue))
         .unwrap();
@@ -470,19 +487,23 @@ fn llm_gateway_override_use_default_model() {
             chat_completions_request_body.len() as i32,
             true,
         )
-        .expect_log(Some(LogLevel::Debug), None)
+        .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID)
         .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
         .returning(Some(chat_completions_request_body))
         // The actual call is not important in this test, we just need to grab the token_id
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}"))
         .expect_log(
             Some(LogLevel::Info),
-            Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): gpt-1, model selected: gpt-4"),
+            None // Dynamic request ID,
         )
-        .expect_log(Some(LogLevel::Debug), Some("getting token count model=gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some("Recorded input token count: 29"))
+        .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
         .expect_metric_record("input_sequence_length", 29)
-        .expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some(r#"Checking limit for provider=gpt-4, with selector=Header { key: "selector-key", value: "selector-value" }, consuming tokens=29"#))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
         .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
         .execute_and_expect(ReturnType::Action(Action::Continue))
         .unwrap();
@@ -520,16 +541,21 @@ fn llm_gateway_override_use_model_name_none() {
             chat_completions_request_body.len() as i32,
             true,
         )
+        .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID)
         .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
         .returning(Some(chat_completions_request_body))
         // The actual call is not important in this test, we just need to grab the token_id
-        .expect_log(Some(LogLevel::Debug), None)
-        .expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): none, model selected: gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some("getting token count model=gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some("Recorded input token count: 29"))
+        .expect_log(Some(LogLevel::Info), None)
+         // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] CLIENT_REQUEST_PAYLOAD: {\"model\":\"none\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
         .expect_metric_record("input_sequence_length", 29)
-        .expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some(r#"Checking limit for provider=gpt-4, with selector=Header { key: "selector-key", value: "selector-value" }, consuming tokens=29"#))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
         .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
         .execute_and_expect(ReturnType::Action(Action::Continue))
         .unwrap();
diff --git a/demos/samples_java/weather_forcecast_service/src/main/java/weather/controller/WeatherController.java b/demos/samples_java/weather_forcecast_service/src/main/java/weather/controller/WeatherController.java
index bb41227a..d7caeaf9 100644
--- a/demos/samples_java/weather_forcecast_service/src/main/java/weather/controller/WeatherController.java
+++ b/demos/samples_java/weather_forcecast_service/src/main/java/weather/controller/WeatherController.java
@@ -7,7 +7,6 @@ import org.springframework.web.bind.annotation.PostMapping;
 import org.springframework.web.bind.annotation.RequestBody;
 import org.springframework.web.bind.annotation.RestController;
 
-import java.time.Instant;
 import java.time.LocalDate;
 import java.util.ArrayList;
 import java.util.List;
diff --git a/demos/samples_python/weather_forecast/arch_config.yaml b/demos/samples_python/weather_forecast/arch_config.yaml
index afc0ef04..6d33a5c5 100644
--- a/demos/samples_python/weather_forecast/arch_config.yaml
+++ b/demos/samples_python/weather_forecast/arch_config.yaml
@@ -7,6 +7,12 @@ listeners:
     message_format: openai
     timeout: 30s
 
+  egress_traffic:
+    address: 0.0.0.0
+    port: 12000
+    message_format: openai
+    timeout: 30s
+
 endpoints:
   weather_forecast_service:
     endpoint: host.docker.internal:18083
@@ -24,6 +30,12 @@ llm_providers:
     model: openai/gpt-4o
     default: true
 
+  - access_key: $OPENAI_API_KEY
+    model: openai/gpt-4o-mini
+
+  - access_key: $ANTHROPIC_API_KEY
+    model: anthropic/claude-sonnet-4-20250514
+
 system_prompt: |
   You are a helpful assistant.
 
diff --git a/demos/shared/chatbot_ui/requirements.txt b/demos/shared/chatbot_ui/requirements.txt
index b8e20cba..da4ac00b 100644
--- a/demos/shared/chatbot_ui/requirements.txt
+++ b/demos/shared/chatbot_ui/requirements.txt
@@ -5,4 +5,4 @@ asyncio==3.4.3
 httpx==0.27.0
 python-dotenv==1.0.1
 pydantic==2.8.2
-openai==1.51.0
+openai==1.54.0
diff --git a/demos/use_cases/llm_routing/arch_config.yaml b/demos/use_cases/llm_routing/arch_config.yaml
index addaae66..f90643ff 100644
--- a/demos/use_cases/llm_routing/arch_config.yaml
+++ b/demos/use_cases/llm_routing/arch_config.yaml
@@ -23,10 +23,10 @@ llm_providers:
     model: mistral/ministral-3b-latest
 
   - access_key: $ANTHROPIC_API_KEY
-    model: claude/claude-3-7-sonnet-latest
+    model: anthropic/claude-3-7-sonnet-latest
 
   - access_key: $ANTHROPIC_API_KEY
-    model: claude/claude-sonnet-4-0
+    model: anthropic/claude-sonnet-4-0
 
   - access_key: $DEEPSEEK_API_KEY
     model: deepseek/deepseek-reasoner
diff --git a/demos/use_cases/preference_based_routing/arch_config.yaml b/demos/use_cases/preference_based_routing/arch_config.yaml
index 33136325..cb9f685a 100644
--- a/demos/use_cases/preference_based_routing/arch_config.yaml
+++ b/demos/use_cases/preference_based_routing/arch_config.yaml
@@ -19,8 +19,8 @@ llm_providers:
       - name: code understanding
         description: understand and explain existing code snippets, functions, or libraries
 
-  - model: openai/gpt-4.1
-    access_key: $OPENAI_API_KEY
+  - model: anthropic/claude-sonnet-4-20250514
+    access_key: $ANTHROPIC_API_KEY
     routing_preferences:
       - name: code generation
         description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
diff --git a/demos/use_cases/preference_based_routing/hurl_tests/simple.hurl b/demos/use_cases/preference_based_routing/hurl_tests/simple.hurl
index d9b243e7..1aa56271 100644
--- a/demos/use_cases/preference_based_routing/hurl_tests/simple.hurl
+++ b/demos/use_cases/preference_based_routing/hurl_tests/simple.hurl
@@ -2,7 +2,7 @@ POST http://localhost:12000/v1/chat/completions
 Content-Type: application/json
 
 {
-  "model": "openai/gpt-4.1",
+  "model": "openai/gpt-4o-mini",
   "messages": [
     {
       "role": "user",
@@ -13,7 +13,7 @@ Content-Type: application/json
 HTTP 200
 [Asserts]
 header "content-type" == "application/json"
-jsonpath "$.model" matches /^gpt-4.1/
+jsonpath "$.model" matches /^gpt-4o-mini/
 jsonpath "$.usage" != null
 jsonpath "$.choices[0].message.content" != null
 jsonpath "$.choices[0].message.role" == "assistant"
diff --git a/tests/e2e/docker-compose.yaml b/tests/e2e/docker-compose.yaml
index 53b4338d..a78c5632 100644
--- a/tests/e2e/docker-compose.yaml
+++ b/tests/e2e/docker-compose.yaml
@@ -16,5 +16,6 @@ services:
     environment:
       - OPENAI_API_KEY=${OPENAI_API_KEY:?error}
       - MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
+      - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:?error}
       - OTEL_TRACING_HTTP_ENDPOINT=http://host.docker.internal:4318/v1/traces
       - MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-51000}
diff --git a/tests/e2e/poetry.lock b/tests/e2e/poetry.lock
index 7e296f88..35f9156b 100644
--- a/tests/e2e/poetry.lock
+++ b/tests/e2e/poetry.lock
@@ -1,4 +1,63 @@
-# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.1.2 and should not be changed by hand.
+
+[[package]]
+name = "annotated-types"
+version = "0.7.0"
+description = "Reusable constraint types to use with typing.Annotated"
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+    {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"},
+    {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"},
+]
+
+[[package]]
+name = "anthropic"
+version = "0.66.0"
+description = "The official Python library for the anthropic API"
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+    {file = "anthropic-0.66.0-py3-none-any.whl", hash = "sha256:67b8cd4486f3cdd09211598dc5325cc8e4e349c106a03041231d551603551c06"},
+    {file = "anthropic-0.66.0.tar.gz", hash = "sha256:5aa8b18da57dc27d83fc1d82c9fb860977e5adfae3e0c215d7ab2ebd70afb9cb"},
+]
+
+[package.dependencies]
+anyio = ">=3.5.0,<5"
+distro = ">=1.7.0,<2"
+httpx = ">=0.25.0,<1"
+jiter = ">=0.4.0,<1"
+pydantic = ">=1.9.0,<3"
+sniffio = "*"
+typing-extensions = ">=4.10,<5"
+
+[package.extras]
+aiohttp = ["aiohttp", "httpx-aiohttp (>=0.1.8)"]
+bedrock = ["boto3 (>=1.28.57)", "botocore (>=1.31.57)"]
+vertex = ["google-auth[requests] (>=2,<3)"]
+
+[[package]]
+name = "anyio"
+version = "4.10.0"
+description = "High-level concurrency and networking framework on top of asyncio or Trio"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "anyio-4.10.0-py3-none-any.whl", hash = "sha256:60e474ac86736bbfd6f210f7a61218939c318f43f9972497381f1c5e930ed3d1"},
+    {file = "anyio-4.10.0.tar.gz", hash = "sha256:3f3fae35c96039744587aa5b8371e7e8e603c0702999535961dd336026973ba6"},
+]
+
+[package.dependencies]
+exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""}
+idna = ">=2.8"
+sniffio = ">=1.1"
+typing_extensions = {version = ">=4.5", markers = "python_version < \"3.13\""}
+
+[package.extras]
+trio = ["trio (>=0.26.1)"]
 
 [[package]]
 name = "attrs"
@@ -6,18 +65,19 @@ version = "25.1.0"
 description = "Classes Without Boilerplate"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "attrs-25.1.0-py3-none-any.whl", hash = "sha256:c75a69e28a550a7e93789579c22aa26b0f5b83b75dc4e08fe092980051e1090a"},
     {file = "attrs-25.1.0.tar.gz", hash = "sha256:1c97078a80c814273a76b2a298a932eb681c87415c11dee0a6921de7f1b02c3e"},
 ]
 
 [package.extras]
-benchmark = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
-cov = ["cloudpickle", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
-dev = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pre-commit-uv", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
+benchmark = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"]
+cov = ["cloudpickle ; platform_python_implementation == \"CPython\"", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"]
+dev = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pre-commit-uv", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"]
 docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier (<24.7)"]
-tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
-tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"]
+tests = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"]
+tests-mypy = ["mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\""]
 
 [[package]]
 name = "certifi"
@@ -25,6 +85,7 @@ version = "2025.1.31"
 description = "Python package for providing Mozilla's CA Bundle."
 optional = false
 python-versions = ">=3.6"
+groups = ["main"]
 files = [
     {file = "certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe"},
     {file = "certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651"},
@@ -36,6 +97,8 @@ version = "1.17.1"
 description = "Foreign Function Interface for Python calling C code."
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "os_name == \"nt\" and implementation_name != \"pypy\""
 files = [
     {file = "cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14"},
     {file = "cffi-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67"},
@@ -115,6 +178,7 @@ version = "3.4.1"
 description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
 optional = false
 python-versions = ">=3.7"
+groups = ["main"]
 files = [
     {file = "charset_normalizer-3.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de"},
     {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176"},
@@ -216,10 +280,12 @@ version = "0.4.6"
 description = "Cross-platform colored terminal text."
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
+groups = ["main", "dev"]
 files = [
     {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
     {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
 ]
+markers = {main = "sys_platform == \"win32\" or platform_system == \"Windows\"", dev = "sys_platform == \"win32\""}
 
 [[package]]
 name = "coverage"
@@ -227,6 +293,7 @@ version = "7.6.12"
 description = "Code coverage measurement for Python"
 optional = false
 python-versions = ">=3.9"
+groups = ["dev"]
 files = [
     {file = "coverage-7.6.12-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:704c8c8c6ce6569286ae9622e534b4f5b9759b6f2cd643f1c1a61f666d534fe8"},
     {file = "coverage-7.6.12-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ad7525bf0241e5502168ae9c643a2f6c219fa0a283001cee4cf23a9b7da75879"},
@@ -297,7 +364,7 @@ files = [
 tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""}
 
 [package.extras]
-toml = ["tomli"]
+toml = ["tomli ; python_full_version <= \"3.11.0a6\""]
 
 [[package]]
 name = "deepdiff"
@@ -305,6 +372,7 @@ version = "8.2.0"
 description = "Deep Difference and Search of any Python object/data. Recreate objects by adding adding deltas to each other."
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "deepdiff-8.2.0-py3-none-any.whl", hash = "sha256:5091f2cdfd372b1b9f6bfd8065ba323ae31118dc4e42594371b38c8bea3fd0a4"},
     {file = "deepdiff-8.2.0.tar.gz", hash = "sha256:6ec78f65031485735545ffbe7a61e716c3c2d12ca6416886d5e9291fc76c46c3"},
@@ -317,12 +385,26 @@ orderly-set = ">=5.3.0,<6"
 cli = ["click (==8.1.8)", "pyyaml (==6.0.2)"]
 optimize = ["orjson"]
 
+[[package]]
+name = "distro"
+version = "1.9.0"
+description = "Distro - an OS platform information API"
+optional = false
+python-versions = ">=3.6"
+groups = ["main"]
+files = [
+    {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"},
+    {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"},
+]
+
 [[package]]
 name = "exceptiongroup"
 version = "1.2.2"
 description = "Backport of PEP 654 (exception groups)"
 optional = false
 python-versions = ">=3.7"
+groups = ["main", "dev"]
+markers = "python_version == \"3.10\""
 files = [
     {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"},
     {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"},
@@ -337,17 +419,66 @@ version = "0.14.0"
 description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
 optional = false
 python-versions = ">=3.7"
+groups = ["main"]
 files = [
     {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"},
     {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
 ]
 
+[[package]]
+name = "httpcore"
+version = "1.0.8"
+description = "A minimal low-level HTTP client."
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+    {file = "httpcore-1.0.8-py3-none-any.whl", hash = "sha256:5254cf149bcb5f75e9d1b2b9f729ea4a4b883d1ad7379fc632b727cec23674be"},
+    {file = "httpcore-1.0.8.tar.gz", hash = "sha256:86e94505ed24ea06514883fd44d2bc02d90e77e7979c8eb71b90f41d364a1bad"},
+]
+
+[package.dependencies]
+certifi = "*"
+h11 = ">=0.13,<0.15"
+
+[package.extras]
+asyncio = ["anyio (>=4.0,<5.0)"]
+http2 = ["h2 (>=3,<5)"]
+socks = ["socksio (==1.*)"]
+trio = ["trio (>=0.22.0,<1.0)"]
+
+[[package]]
+name = "httpx"
+version = "0.28.1"
+description = "The next generation HTTP client."
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+    {file = "httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad"},
+    {file = "httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc"},
+]
+
+[package.dependencies]
+anyio = "*"
+certifi = "*"
+httpcore = "==1.*"
+idna = "*"
+
+[package.extras]
+brotli = ["brotli ; platform_python_implementation == \"CPython\"", "brotlicffi ; platform_python_implementation != \"CPython\""]
+cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"]
+http2 = ["h2 (>=3,<5)"]
+socks = ["socksio (==1.*)"]
+zstd = ["zstandard (>=0.18.0)"]
+
 [[package]]
 name = "idna"
 version = "3.10"
 description = "Internationalized Domain Names in Applications (IDNA)"
 optional = false
 python-versions = ">=3.6"
+groups = ["main"]
 files = [
     {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"},
     {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"},
@@ -362,17 +493,134 @@ version = "2.0.0"
 description = "brain-dead simple config-ini parsing"
 optional = false
 python-versions = ">=3.7"
+groups = ["main", "dev"]
 files = [
     {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"},
     {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
 ]
 
+[[package]]
+name = "jiter"
+version = "0.10.0"
+description = "Fast iterable JSON parser."
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "jiter-0.10.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:cd2fb72b02478f06a900a5782de2ef47e0396b3e1f7d5aba30daeb1fce66f303"},
+    {file = "jiter-0.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:32bb468e3af278f095d3fa5b90314728a6916d89ba3d0ffb726dd9bf7367285e"},
+    {file = "jiter-0.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa8b3e0068c26ddedc7abc6fac37da2d0af16b921e288a5a613f4b86f050354f"},
+    {file = "jiter-0.10.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:286299b74cc49e25cd42eea19b72aa82c515d2f2ee12d11392c56d8701f52224"},
+    {file = "jiter-0.10.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6ed5649ceeaeffc28d87fb012d25a4cd356dcd53eff5acff1f0466b831dda2a7"},
+    {file = "jiter-0.10.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2ab0051160cb758a70716448908ef14ad476c3774bd03ddce075f3c1f90a3d6"},
+    {file = "jiter-0.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:03997d2f37f6b67d2f5c475da4412be584e1cec273c1cfc03d642c46db43f8cf"},
+    {file = "jiter-0.10.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c404a99352d839fed80d6afd6c1d66071f3bacaaa5c4268983fc10f769112e90"},
+    {file = "jiter-0.10.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:66e989410b6666d3ddb27a74c7e50d0829704ede652fd4c858e91f8d64b403d0"},
+    {file = "jiter-0.10.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b532d3af9ef4f6374609a3bcb5e05a1951d3bf6190dc6b176fdb277c9bbf15ee"},
+    {file = "jiter-0.10.0-cp310-cp310-win32.whl", hash = "sha256:da9be20b333970e28b72edc4dff63d4fec3398e05770fb3205f7fb460eb48dd4"},
+    {file = "jiter-0.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:f59e533afed0c5b0ac3eba20d2548c4a550336d8282ee69eb07b37ea526ee4e5"},
+    {file = "jiter-0.10.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:3bebe0c558e19902c96e99217e0b8e8b17d570906e72ed8a87170bc290b1e978"},
+    {file = "jiter-0.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:558cc7e44fd8e507a236bee6a02fa17199ba752874400a0ca6cd6e2196cdb7dc"},
+    {file = "jiter-0.10.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d613e4b379a07d7c8453c5712ce7014e86c6ac93d990a0b8e7377e18505e98d"},
+    {file = "jiter-0.10.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f62cf8ba0618eda841b9bf61797f21c5ebd15a7a1e19daab76e4e4b498d515b2"},
+    {file = "jiter-0.10.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:919d139cdfa8ae8945112398511cb7fca58a77382617d279556b344867a37e61"},
+    {file = "jiter-0.10.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:13ddbc6ae311175a3b03bd8994881bc4635c923754932918e18da841632349db"},
+    {file = "jiter-0.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c440ea003ad10927a30521a9062ce10b5479592e8a70da27f21eeb457b4a9c5"},
+    {file = "jiter-0.10.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dc347c87944983481e138dea467c0551080c86b9d21de6ea9306efb12ca8f606"},
+    {file = "jiter-0.10.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:13252b58c1f4d8c5b63ab103c03d909e8e1e7842d302473f482915d95fefd605"},
+    {file = "jiter-0.10.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7d1bbf3c465de4a24ab12fb7766a0003f6f9bce48b8b6a886158c4d569452dc5"},
+    {file = "jiter-0.10.0-cp311-cp311-win32.whl", hash = "sha256:db16e4848b7e826edca4ccdd5b145939758dadf0dc06e7007ad0e9cfb5928ae7"},
+    {file = "jiter-0.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:9c9c1d5f10e18909e993f9641f12fe1c77b3e9b533ee94ffa970acc14ded3812"},
+    {file = "jiter-0.10.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:1e274728e4a5345a6dde2d343c8da018b9d4bd4350f5a472fa91f66fda44911b"},
+    {file = "jiter-0.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7202ae396446c988cb2a5feb33a543ab2165b786ac97f53b59aafb803fef0744"},
+    {file = "jiter-0.10.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23ba7722d6748b6920ed02a8f1726fb4b33e0fd2f3f621816a8b486c66410ab2"},
+    {file = "jiter-0.10.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:371eab43c0a288537d30e1f0b193bc4eca90439fc08a022dd83e5e07500ed026"},
+    {file = "jiter-0.10.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6c675736059020365cebc845a820214765162728b51ab1e03a1b7b3abb70f74c"},
+    {file = "jiter-0.10.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0c5867d40ab716e4684858e4887489685968a47e3ba222e44cde6e4a2154f959"},
+    {file = "jiter-0.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:395bb9a26111b60141757d874d27fdea01b17e8fac958b91c20128ba8f4acc8a"},
+    {file = "jiter-0.10.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6842184aed5cdb07e0c7e20e5bdcfafe33515ee1741a6835353bb45fe5d1bd95"},
+    {file = "jiter-0.10.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:62755d1bcea9876770d4df713d82606c8c1a3dca88ff39046b85a048566d56ea"},
+    {file = "jiter-0.10.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:533efbce2cacec78d5ba73a41756beff8431dfa1694b6346ce7af3a12c42202b"},
+    {file = "jiter-0.10.0-cp312-cp312-win32.whl", hash = "sha256:8be921f0cadd245e981b964dfbcd6fd4bc4e254cdc069490416dd7a2632ecc01"},
+    {file = "jiter-0.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:a7c7d785ae9dda68c2678532a5a1581347e9c15362ae9f6e68f3fdbfb64f2e49"},
+    {file = "jiter-0.10.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:e0588107ec8e11b6f5ef0e0d656fb2803ac6cf94a96b2b9fc675c0e3ab5e8644"},
+    {file = "jiter-0.10.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cafc4628b616dc32530c20ee53d71589816cf385dd9449633e910d596b1f5c8a"},
+    {file = "jiter-0.10.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:520ef6d981172693786a49ff5b09eda72a42e539f14788124a07530f785c3ad6"},
+    {file = "jiter-0.10.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:554dedfd05937f8fc45d17ebdf298fe7e0c77458232bcb73d9fbbf4c6455f5b3"},
+    {file = "jiter-0.10.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5bc299da7789deacf95f64052d97f75c16d4fc8c4c214a22bf8d859a4288a1c2"},
+    {file = "jiter-0.10.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5161e201172de298a8a1baad95eb85db4fb90e902353b1f6a41d64ea64644e25"},
+    {file = "jiter-0.10.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e2227db6ba93cb3e2bf67c87e594adde0609f146344e8207e8730364db27041"},
+    {file = "jiter-0.10.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:15acb267ea5e2c64515574b06a8bf393fbfee6a50eb1673614aa45f4613c0cca"},
+    {file = "jiter-0.10.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:901b92f2e2947dc6dfcb52fd624453862e16665ea909a08398dde19c0731b7f4"},
+    {file = "jiter-0.10.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:d0cb9a125d5a3ec971a094a845eadde2db0de85b33c9f13eb94a0c63d463879e"},
+    {file = "jiter-0.10.0-cp313-cp313-win32.whl", hash = "sha256:48a403277ad1ee208fb930bdf91745e4d2d6e47253eedc96e2559d1e6527006d"},
+    {file = "jiter-0.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:75f9eb72ecb640619c29bf714e78c9c46c9c4eaafd644bf78577ede459f330d4"},
+    {file = "jiter-0.10.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:28ed2a4c05a1f32ef0e1d24c2611330219fed727dae01789f4a335617634b1ca"},
+    {file = "jiter-0.10.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14a4c418b1ec86a195f1ca69da8b23e8926c752b685af665ce30777233dfe070"},
+    {file = "jiter-0.10.0-cp313-cp313t-win_amd64.whl", hash = "sha256:d7bfed2fe1fe0e4dda6ef682cee888ba444b21e7a6553e03252e4feb6cf0adca"},
+    {file = "jiter-0.10.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:5e9251a5e83fab8d87799d3e1a46cb4b7f2919b895c6f4483629ed2446f66522"},
+    {file = "jiter-0.10.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:023aa0204126fe5b87ccbcd75c8a0d0261b9abdbbf46d55e7ae9f8e22424eeb8"},
+    {file = "jiter-0.10.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c189c4f1779c05f75fc17c0c1267594ed918996a231593a21a5ca5438445216"},
+    {file = "jiter-0.10.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:15720084d90d1098ca0229352607cd68256c76991f6b374af96f36920eae13c4"},
+    {file = "jiter-0.10.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e4f2fb68e5f1cfee30e2b2a09549a00683e0fde4c6a2ab88c94072fc33cb7426"},
+    {file = "jiter-0.10.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ce541693355fc6da424c08b7edf39a2895f58d6ea17d92cc2b168d20907dee12"},
+    {file = "jiter-0.10.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:31c50c40272e189d50006ad5c73883caabb73d4e9748a688b216e85a9a9ca3b9"},
+    {file = "jiter-0.10.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fa3402a2ff9815960e0372a47b75c76979d74402448509ccd49a275fa983ef8a"},
+    {file = "jiter-0.10.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:1956f934dca32d7bb647ea21d06d93ca40868b505c228556d3373cbd255ce853"},
+    {file = "jiter-0.10.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:fcedb049bdfc555e261d6f65a6abe1d5ad68825b7202ccb9692636c70fcced86"},
+    {file = "jiter-0.10.0-cp314-cp314-win32.whl", hash = "sha256:ac509f7eccca54b2a29daeb516fb95b6f0bd0d0d8084efaf8ed5dfc7b9f0b357"},
+    {file = "jiter-0.10.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:5ed975b83a2b8639356151cef5c0d597c68376fc4922b45d0eb384ac058cfa00"},
+    {file = "jiter-0.10.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3aa96f2abba33dc77f79b4cf791840230375f9534e5fac927ccceb58c5e604a5"},
+    {file = "jiter-0.10.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:bd6292a43c0fc09ce7c154ec0fa646a536b877d1e8f2f96c19707f65355b5a4d"},
+    {file = "jiter-0.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:39de429dcaeb6808d75ffe9effefe96a4903c6a4b376b2f6d08d77c1aaee2f18"},
+    {file = "jiter-0.10.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:52ce124f13a7a616fad3bb723f2bfb537d78239d1f7f219566dc52b6f2a9e48d"},
+    {file = "jiter-0.10.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:166f3606f11920f9a1746b2eea84fa2c0a5d50fd313c38bdea4edc072000b0af"},
+    {file = "jiter-0.10.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:28dcecbb4ba402916034fc14eba7709f250c4d24b0c43fc94d187ee0580af181"},
+    {file = "jiter-0.10.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:86c5aa6910f9bebcc7bc4f8bc461aff68504388b43bfe5e5c0bd21efa33b52f4"},
+    {file = "jiter-0.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ceeb52d242b315d7f1f74b441b6a167f78cea801ad7c11c36da77ff2d42e8a28"},
+    {file = "jiter-0.10.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ff76d8887c8c8ee1e772274fcf8cc1071c2c58590d13e33bd12d02dc9a560397"},
+    {file = "jiter-0.10.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a9be4d0fa2b79f7222a88aa488bd89e2ae0a0a5b189462a12def6ece2faa45f1"},
+    {file = "jiter-0.10.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9ab7fd8738094139b6c1ab1822d6f2000ebe41515c537235fd45dabe13ec9324"},
+    {file = "jiter-0.10.0-cp39-cp39-win32.whl", hash = "sha256:5f51e048540dd27f204ff4a87f5d79294ea0aa3aa552aca34934588cf27023cf"},
+    {file = "jiter-0.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:1b28302349dc65703a9e4ead16f163b1c339efffbe1049c30a44b001a2a4fff9"},
+    {file = "jiter-0.10.0.tar.gz", hash = "sha256:07a7142c38aacc85194391108dc91b5b57093c978a9932bd86a36862759d9500"},
+]
+
+[[package]]
+name = "openai"
+version = "1.106.1"
+description = "The official Python library for the openai API"
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+    {file = "openai-1.106.1-py3-none-any.whl", hash = "sha256:bfdef37c949f80396c59f2c17e0eda35414979bc07ef3379596a93c9ed044f3a"},
+    {file = "openai-1.106.1.tar.gz", hash = "sha256:5f575967e3a05555825c43829cdcd50be6e49ab6a3e5262f0937a3f791f917f1"},
+]
+
+[package.dependencies]
+anyio = ">=3.5.0,<5"
+distro = ">=1.7.0,<2"
+httpx = ">=0.23.0,<1"
+jiter = ">=0.4.0,<1"
+pydantic = ">=1.9.0,<3"
+sniffio = "*"
+tqdm = ">4"
+typing-extensions = ">=4.11,<5"
+
+[package.extras]
+aiohttp = ["aiohttp", "httpx-aiohttp (>=0.1.8)"]
+datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"]
+realtime = ["websockets (>=13,<16)"]
+voice-helpers = ["numpy (>=2.0.2)", "sounddevice (>=0.5.1)"]
+
 [[package]]
 name = "orderly-set"
 version = "5.3.0"
 description = "Orderly set"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "orderly_set-5.3.0-py3-none-any.whl", hash = "sha256:c2c0bfe604f5d3d9b24e8262a06feb612594f37aa3845650548befd7772945d1"},
     {file = "orderly_set-5.3.0.tar.gz", hash = "sha256:80b3d8fdd3d39004d9aad389eaa0eab02c71f0a0511ba3a6d54a935a6c6a0acc"},
@@ -384,6 +632,7 @@ version = "1.3.0.post0"
 description = "Capture the outcome of Python function calls."
 optional = false
 python-versions = ">=3.7"
+groups = ["main"]
 files = [
     {file = "outcome-1.3.0.post0-py2.py3-none-any.whl", hash = "sha256:e771c5ce06d1415e356078d3bdd68523f284b4ce5419828922b6871e65eda82b"},
     {file = "outcome-1.3.0.post0.tar.gz", hash = "sha256:9dcf02e65f2971b80047b377468e72a268e15c0af3cf1238e6ff14f7f91143b8"},
@@ -398,6 +647,7 @@ version = "24.2"
 description = "Core utilities for Python packages"
 optional = false
 python-versions = ">=3.8"
+groups = ["main", "dev"]
 files = [
     {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"},
     {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"},
@@ -409,6 +659,7 @@ version = "1.5.0"
 description = "plugin and hook calling mechanisms for python"
 optional = false
 python-versions = ">=3.8"
+groups = ["main", "dev"]
 files = [
     {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"},
     {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"},
@@ -424,17 +675,154 @@ version = "2.22"
 description = "C parser in Python"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "os_name == \"nt\" and implementation_name != \"pypy\""
 files = [
     {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"},
     {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"},
 ]
 
+[[package]]
+name = "pydantic"
+version = "2.11.7"
+description = "Data validation using Python type hints"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "pydantic-2.11.7-py3-none-any.whl", hash = "sha256:dde5df002701f6de26248661f6835bbe296a47bf73990135c7d07ce741b9623b"},
+    {file = "pydantic-2.11.7.tar.gz", hash = "sha256:d989c3c6cb79469287b1569f7447a17848c998458d49ebe294e975b9baf0f0db"},
+]
+
+[package.dependencies]
+annotated-types = ">=0.6.0"
+pydantic-core = "2.33.2"
+typing-extensions = ">=4.12.2"
+typing-inspection = ">=0.4.0"
+
+[package.extras]
+email = ["email-validator (>=2.0.0)"]
+timezone = ["tzdata ; python_version >= \"3.9\" and platform_system == \"Windows\""]
+
+[[package]]
+name = "pydantic-core"
+version = "2.33.2"
+description = "Core functionality for Pydantic validation and serialization"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "pydantic_core-2.33.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2b3d326aaef0c0399d9afffeb6367d5e26ddc24d351dbc9c636840ac355dc5d8"},
+    {file = "pydantic_core-2.33.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0e5b2671f05ba48b94cb90ce55d8bdcaaedb8ba00cc5359f6810fc918713983d"},
+    {file = "pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0069c9acc3f3981b9ff4cdfaf088e98d83440a4c7ea1bc07460af3d4dc22e72d"},
+    {file = "pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d53b22f2032c42eaaf025f7c40c2e3b94568ae077a606f006d206a463bc69572"},
+    {file = "pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0405262705a123b7ce9f0b92f123334d67b70fd1f20a9372b907ce1080c7ba02"},
+    {file = "pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4b25d91e288e2c4e0662b8038a28c6a07eaac3e196cfc4ff69de4ea3db992a1b"},
+    {file = "pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6bdfe4b3789761f3bcb4b1ddf33355a71079858958e3a552f16d5af19768fef2"},
+    {file = "pydantic_core-2.33.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:efec8db3266b76ef9607c2c4c419bdb06bf335ae433b80816089ea7585816f6a"},
+    {file = "pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:031c57d67ca86902726e0fae2214ce6770bbe2f710dc33063187a68744a5ecac"},
+    {file = "pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:f8de619080e944347f5f20de29a975c2d815d9ddd8be9b9b7268e2e3ef68605a"},
+    {file = "pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:73662edf539e72a9440129f231ed3757faab89630d291b784ca99237fb94db2b"},
+    {file = "pydantic_core-2.33.2-cp310-cp310-win32.whl", hash = "sha256:0a39979dcbb70998b0e505fb1556a1d550a0781463ce84ebf915ba293ccb7e22"},
+    {file = "pydantic_core-2.33.2-cp310-cp310-win_amd64.whl", hash = "sha256:b0379a2b24882fef529ec3b4987cb5d003b9cda32256024e6fe1586ac45fc640"},
+    {file = "pydantic_core-2.33.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:4c5b0a576fb381edd6d27f0a85915c6daf2f8138dc5c267a57c08a62900758c7"},
+    {file = "pydantic_core-2.33.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e799c050df38a639db758c617ec771fd8fb7a5f8eaaa4b27b101f266b216a246"},
+    {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc46a01bf8d62f227d5ecee74178ffc448ff4e5197c756331f71efcc66dc980f"},
+    {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a144d4f717285c6d9234a66778059f33a89096dfb9b39117663fd8413d582dcc"},
+    {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:73cf6373c21bc80b2e0dc88444f41ae60b2f070ed02095754eb5a01df12256de"},
+    {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3dc625f4aa79713512d1976fe9f0bc99f706a9dee21dfd1810b4bbbf228d0e8a"},
+    {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:881b21b5549499972441da4758d662aeea93f1923f953e9cbaff14b8b9565aef"},
+    {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bdc25f3681f7b78572699569514036afe3c243bc3059d3942624e936ec93450e"},
+    {file = "pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:fe5b32187cbc0c862ee201ad66c30cf218e5ed468ec8dc1cf49dec66e160cc4d"},
+    {file = "pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:bc7aee6f634a6f4a95676fcb5d6559a2c2a390330098dba5e5a5f28a2e4ada30"},
+    {file = "pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:235f45e5dbcccf6bd99f9f472858849f73d11120d76ea8707115415f8e5ebebf"},
+    {file = "pydantic_core-2.33.2-cp311-cp311-win32.whl", hash = "sha256:6368900c2d3ef09b69cb0b913f9f8263b03786e5b2a387706c5afb66800efd51"},
+    {file = "pydantic_core-2.33.2-cp311-cp311-win_amd64.whl", hash = "sha256:1e063337ef9e9820c77acc768546325ebe04ee38b08703244c1309cccc4f1bab"},
+    {file = "pydantic_core-2.33.2-cp311-cp311-win_arm64.whl", hash = "sha256:6b99022f1d19bc32a4c2a0d544fc9a76e3be90f0b3f4af413f87d38749300e65"},
+    {file = "pydantic_core-2.33.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a7ec89dc587667f22b6a0b6579c249fca9026ce7c333fc142ba42411fa243cdc"},
+    {file = "pydantic_core-2.33.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3c6db6e52c6d70aa0d00d45cdb9b40f0433b96380071ea80b09277dba021ddf7"},
+    {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e61206137cbc65e6d5256e1166f88331d3b6238e082d9f74613b9b765fb9025"},
+    {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eb8c529b2819c37140eb51b914153063d27ed88e3bdc31b71198a198e921e011"},
+    {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c52b02ad8b4e2cf14ca7b3d918f3eb0ee91e63b3167c32591e57c4317e134f8f"},
+    {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:96081f1605125ba0855dfda83f6f3df5ec90c61195421ba72223de35ccfb2f88"},
+    {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f57a69461af2a5fa6e6bbd7a5f60d3b7e6cebb687f55106933188e79ad155c1"},
+    {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:572c7e6c8bb4774d2ac88929e3d1f12bc45714ae5ee6d9a788a9fb35e60bb04b"},
+    {file = "pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:db4b41f9bd95fbe5acd76d89920336ba96f03e149097365afe1cb092fceb89a1"},
+    {file = "pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:fa854f5cf7e33842a892e5c73f45327760bc7bc516339fda888c75ae60edaeb6"},
+    {file = "pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5f483cfb75ff703095c59e365360cb73e00185e01aaea067cd19acffd2ab20ea"},
+    {file = "pydantic_core-2.33.2-cp312-cp312-win32.whl", hash = "sha256:9cb1da0f5a471435a7bc7e439b8a728e8b61e59784b2af70d7c169f8dd8ae290"},
+    {file = "pydantic_core-2.33.2-cp312-cp312-win_amd64.whl", hash = "sha256:f941635f2a3d96b2973e867144fde513665c87f13fe0e193c158ac51bfaaa7b2"},
+    {file = "pydantic_core-2.33.2-cp312-cp312-win_arm64.whl", hash = "sha256:cca3868ddfaccfbc4bfb1d608e2ccaaebe0ae628e1416aeb9c4d88c001bb45ab"},
+    {file = "pydantic_core-2.33.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:1082dd3e2d7109ad8b7da48e1d4710c8d06c253cbc4a27c1cff4fbcaa97a9e3f"},
+    {file = "pydantic_core-2.33.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f517ca031dfc037a9c07e748cefd8d96235088b83b4f4ba8939105d20fa1dcd6"},
+    {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a9f2c9dd19656823cb8250b0724ee9c60a82f3cdf68a080979d13092a3b0fef"},
+    {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2b0a451c263b01acebe51895bfb0e1cc842a5c666efe06cdf13846c7418caa9a"},
+    {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ea40a64d23faa25e62a70ad163571c0b342b8bf66d5fa612ac0dec4f069d916"},
+    {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0fb2d542b4d66f9470e8065c5469ec676978d625a8b7a363f07d9a501a9cb36a"},
+    {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fdac5d6ffa1b5a83bca06ffe7583f5576555e6c8b3a91fbd25ea7780f825f7d"},
+    {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:04a1a413977ab517154eebb2d326da71638271477d6ad87a769102f7c2488c56"},
+    {file = "pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c8e7af2f4e0194c22b5b37205bfb293d166a7344a5b0d0eaccebc376546d77d5"},
+    {file = "pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:5c92edd15cd58b3c2d34873597a1e20f13094f59cf88068adb18947df5455b4e"},
+    {file = "pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:65132b7b4a1c0beded5e057324b7e16e10910c106d43675d9bd87d4f38dde162"},
+    {file = "pydantic_core-2.33.2-cp313-cp313-win32.whl", hash = "sha256:52fb90784e0a242bb96ec53f42196a17278855b0f31ac7c3cc6f5c1ec4811849"},
+    {file = "pydantic_core-2.33.2-cp313-cp313-win_amd64.whl", hash = "sha256:c083a3bdd5a93dfe480f1125926afcdbf2917ae714bdb80b36d34318b2bec5d9"},
+    {file = "pydantic_core-2.33.2-cp313-cp313-win_arm64.whl", hash = "sha256:e80b087132752f6b3d714f041ccf74403799d3b23a72722ea2e6ba2e892555b9"},
+    {file = "pydantic_core-2.33.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:61c18fba8e5e9db3ab908620af374db0ac1baa69f0f32df4f61ae23f15e586ac"},
+    {file = "pydantic_core-2.33.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95237e53bb015f67b63c91af7518a62a8660376a6a0db19b89acc77a4d6199f5"},
+    {file = "pydantic_core-2.33.2-cp313-cp313t-win_amd64.whl", hash = "sha256:c2fc0a768ef76c15ab9238afa6da7f69895bb5d1ee83aeea2e3509af4472d0b9"},
+    {file = "pydantic_core-2.33.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:a2b911a5b90e0374d03813674bf0a5fbbb7741570dcd4b4e85a2e48d17def29d"},
+    {file = "pydantic_core-2.33.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6fa6dfc3e4d1f734a34710f391ae822e0a8eb8559a85c6979e14e65ee6ba2954"},
+    {file = "pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c54c939ee22dc8e2d545da79fc5381f1c020d6d3141d3bd747eab59164dc89fb"},
+    {file = "pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:53a57d2ed685940a504248187d5685e49eb5eef0f696853647bf37c418c538f7"},
+    {file = "pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:09fb9dd6571aacd023fe6aaca316bd01cf60ab27240d7eb39ebd66a3a15293b4"},
+    {file = "pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0e6116757f7959a712db11f3e9c0a99ade00a5bbedae83cb801985aa154f071b"},
+    {file = "pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d55ab81c57b8ff8548c3e4947f119551253f4e3787a7bbc0b6b3ca47498a9d3"},
+    {file = "pydantic_core-2.33.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c20c462aa4434b33a2661701b861604913f912254e441ab8d78d30485736115a"},
+    {file = "pydantic_core-2.33.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:44857c3227d3fb5e753d5fe4a3420d6376fa594b07b621e220cd93703fe21782"},
+    {file = "pydantic_core-2.33.2-cp39-cp39-musllinux_1_1_armv7l.whl", hash = "sha256:eb9b459ca4df0e5c87deb59d37377461a538852765293f9e6ee834f0435a93b9"},
+    {file = "pydantic_core-2.33.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9fcd347d2cc5c23b06de6d3b7b8275be558a0c90549495c699e379a80bf8379e"},
+    {file = "pydantic_core-2.33.2-cp39-cp39-win32.whl", hash = "sha256:83aa99b1285bc8f038941ddf598501a86f1536789740991d7d8756e34f1e74d9"},
+    {file = "pydantic_core-2.33.2-cp39-cp39-win_amd64.whl", hash = "sha256:f481959862f57f29601ccced557cc2e817bce7533ab8e01a797a48b49c9692b3"},
+    {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5c4aa4e82353f65e548c476b37e64189783aa5384903bfea4f41580f255fddfa"},
+    {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d946c8bf0d5c24bf4fe333af284c59a19358aa3ec18cb3dc4370080da1e8ad29"},
+    {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:87b31b6846e361ef83fedb187bb5b4372d0da3f7e28d85415efa92d6125d6e6d"},
+    {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa9d91b338f2df0508606f7009fde642391425189bba6d8c653afd80fd6bb64e"},
+    {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2058a32994f1fde4ca0480ab9d1e75a0e8c87c22b53a3ae66554f9af78f2fe8c"},
+    {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:0e03262ab796d986f978f79c943fc5f620381be7287148b8010b4097f79a39ec"},
+    {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:1a8695a8d00c73e50bff9dfda4d540b7dee29ff9b8053e38380426a85ef10052"},
+    {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:fa754d1850735a0b0e03bcffd9d4b4343eb417e47196e4485d9cca326073a42c"},
+    {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a11c8d26a50bfab49002947d3d237abe4d9e4b5bdc8846a63537b6488e197808"},
+    {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:dd14041875d09cc0f9308e37a6f8b65f5585cf2598a53aa0123df8b129d481f8"},
+    {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:d87c561733f66531dced0da6e864f44ebf89a8fba55f31407b00c2f7f9449593"},
+    {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f82865531efd18d6e07a04a17331af02cb7a651583c418df8266f17a63c6612"},
+    {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bfb5112df54209d820d7bf9317c7a6c9025ea52e49f46b6a2060104bba37de7"},
+    {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:64632ff9d614e5eecfb495796ad51b0ed98c453e447a76bcbeeb69615079fc7e"},
+    {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:f889f7a40498cc077332c7ab6b4608d296d852182211787d4f3ee377aaae66e8"},
+    {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:de4b83bb311557e439b9e186f733f6c645b9417c84e2eb8203f3f820a4b988bf"},
+    {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:82f68293f055f51b51ea42fafc74b6aad03e70e191799430b90c13d643059ebb"},
+    {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:329467cecfb529c925cf2bbd4d60d2c509bc2fb52a20c1045bf09bb70971a9c1"},
+    {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:87acbfcf8e90ca885206e98359d7dca4bcbb35abdc0ff66672a293e1d7a19101"},
+    {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:7f92c15cd1e97d4b12acd1cc9004fa092578acfa57b67ad5e43a197175d01a64"},
+    {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d3f26877a748dc4251cfcfda9dfb5f13fcb034f5308388066bcfe9031b63ae7d"},
+    {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dac89aea9af8cd672fa7b510e7b8c33b0bba9a43186680550ccf23020f32d535"},
+    {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:970919794d126ba8645f3837ab6046fb4e72bbc057b3709144066204c19a455d"},
+    {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:3eb3fe62804e8f859c49ed20a8451342de53ed764150cb14ca71357c765dc2a6"},
+    {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:3abcd9392a36025e3bd55f9bd38d908bd17962cc49bc6da8e7e96285336e2bca"},
+    {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:3a1c81334778f9e3af2f8aeb7a960736e5cab1dfebfb26aabca09afd2906c039"},
+    {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:2807668ba86cb38c6817ad9bc66215ab8584d1d304030ce4f0887336f28a5e27"},
+    {file = "pydantic_core-2.33.2.tar.gz", hash = "sha256:7cb8bc3605c29176e1b105350d2e6474142d7c1bd1d9327c4a9bdb46bf827acc"},
+]
+
+[package.dependencies]
+typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0"
+
 [[package]]
 name = "pysocks"
 version = "1.7.1"
 description = "A Python SOCKS client module. See https://github.com/Anorov/PySocks for more information."
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+groups = ["main"]
 files = [
     {file = "PySocks-1.7.1-py27-none-any.whl", hash = "sha256:08e69f092cc6dbe92a0fdd16eeb9b9ffbc13cadfe5ca4c7bd92ffb078b293299"},
     {file = "PySocks-1.7.1-py3-none-any.whl", hash = "sha256:2725bd0a9925919b9b51739eea5f9e2bae91e83288108a9ad338b2e3a4435ee5"},
@@ -447,6 +835,7 @@ version = "8.3.5"
 description = "pytest: simple powerful testing with Python"
 optional = false
 python-versions = ">=3.8"
+groups = ["main", "dev"]
 files = [
     {file = "pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820"},
     {file = "pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845"},
@@ -469,6 +858,7 @@ version = "4.1.0"
 description = "Pytest plugin for measuring coverage."
 optional = false
 python-versions = ">=3.7"
+groups = ["dev"]
 files = [
     {file = "pytest-cov-4.1.0.tar.gz", hash = "sha256:3904b13dfbfec47f003b8e77fd5b589cd11904a21ddf1ab38a64f204d6a10ef6"},
     {file = "pytest_cov-4.1.0-py3-none-any.whl", hash = "sha256:6ba70b9e97e69fcc3fb45bfeab2d0a138fb65c4d0d6a41ef33983ad114be8c3a"},
@@ -487,6 +877,7 @@ version = "1.7.0"
 description = "Adds the ability to retry flaky tests in CI environments"
 optional = false
 python-versions = ">=3.9"
+groups = ["main"]
 files = [
     {file = "pytest_retry-1.7.0-py3-none-any.whl", hash = "sha256:a2dac85b79a4e2375943f1429479c65beb6c69553e7dae6b8332be47a60954f4"},
     {file = "pytest_retry-1.7.0.tar.gz", hash = "sha256:f8d52339f01e949df47c11ba9ee8d5b362f5824dff580d3870ec9ae0057df80f"},
@@ -504,6 +895,7 @@ version = "1.0.0"
 description = "pytest-sugar is a plugin for pytest that changes the default look and feel of pytest (e.g. progressbar, show tests that fail instantly)."
 optional = false
 python-versions = "*"
+groups = ["main"]
 files = [
     {file = "pytest-sugar-1.0.0.tar.gz", hash = "sha256:6422e83258f5b0c04ce7c632176c7732cab5fdb909cb39cca5c9139f81276c0a"},
     {file = "pytest_sugar-1.0.0-py3-none-any.whl", hash = "sha256:70ebcd8fc5795dc457ff8b69d266a4e2e8a74ae0c3edc749381c64b5246c8dfd"},
@@ -523,6 +915,7 @@ version = "2.32.3"
 description = "Python HTTP for Humans."
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"},
     {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"},
@@ -544,6 +937,7 @@ version = "4.29.0"
 description = "Official Python bindings for Selenium WebDriver"
 optional = false
 python-versions = ">=3.9"
+groups = ["main"]
 files = [
     {file = "selenium-4.29.0-py3-none-any.whl", hash = "sha256:ce5d26f1ddc1111641113653af33694c13947dd36c2df09cdd33f554351d372e"},
     {file = "selenium-4.29.0.tar.gz", hash = "sha256:3a62f7ec33e669364a6c0562a701deb69745b569c50d55f1a912bf8eb33358ba"},
@@ -563,6 +957,7 @@ version = "1.3.1"
 description = "Sniff out which async library your code is running under"
 optional = false
 python-versions = ">=3.7"
+groups = ["main"]
 files = [
     {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"},
     {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
@@ -574,6 +969,7 @@ version = "2.4.0"
 description = "Sorted Containers -- Sorted List, Sorted Dict, Sorted Set"
 optional = false
 python-versions = "*"
+groups = ["main"]
 files = [
     {file = "sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0"},
     {file = "sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88"},
@@ -585,6 +981,7 @@ version = "2.5.0"
 description = "ANSI color formatting for output in terminal"
 optional = false
 python-versions = ">=3.9"
+groups = ["main"]
 files = [
     {file = "termcolor-2.5.0-py3-none-any.whl", hash = "sha256:37b17b5fc1e604945c2642c872a3764b5d547a48009871aea3edd3afa180afb8"},
     {file = "termcolor-2.5.0.tar.gz", hash = "sha256:998d8d27da6d48442e8e1f016119076b690d962507531df4890fcd2db2ef8a6f"},
@@ -599,6 +996,7 @@ version = "2.2.1"
 description = "A lil' TOML parser"
 optional = false
 python-versions = ">=3.8"
+groups = ["main", "dev"]
 files = [
     {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"},
     {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"},
@@ -633,6 +1031,29 @@ files = [
     {file = "tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc"},
     {file = "tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff"},
 ]
+markers = {main = "python_version == \"3.10\"", dev = "python_full_version <= \"3.11.0a6\""}
+
+[[package]]
+name = "tqdm"
+version = "4.67.1"
+description = "Fast, Extensible Progress Meter"
+optional = false
+python-versions = ">=3.7"
+groups = ["main"]
+files = [
+    {file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"},
+    {file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"},
+]
+
+[package.dependencies]
+colorama = {version = "*", markers = "platform_system == \"Windows\""}
+
+[package.extras]
+dev = ["nbval", "pytest (>=6)", "pytest-asyncio (>=0.24)", "pytest-cov", "pytest-timeout"]
+discord = ["requests"]
+notebook = ["ipywidgets (>=6)"]
+slack = ["slack-sdk"]
+telegram = ["requests"]
 
 [[package]]
 name = "trio"
@@ -640,6 +1061,7 @@ version = "0.29.0"
 description = "A friendly Python library for async concurrency and I/O"
 optional = false
 python-versions = ">=3.9"
+groups = ["main"]
 files = [
     {file = "trio-0.29.0-py3-none-any.whl", hash = "sha256:d8c463f1a9cc776ff63e331aba44c125f423a5a13c684307e828d930e625ba66"},
     {file = "trio-0.29.0.tar.gz", hash = "sha256:ea0d3967159fc130acb6939a0be0e558e364fee26b5deeecc893a6b08c361bdf"},
@@ -660,6 +1082,7 @@ version = "0.12.2"
 description = "WebSocket library for Trio"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "trio_websocket-0.12.2-py3-none-any.whl", hash = "sha256:df605665f1db533f4a386c94525870851096a223adcb97f72a07e8b4beba45b6"},
     {file = "trio_websocket-0.12.2.tar.gz", hash = "sha256:22c72c436f3d1e264d0910a3951934798dcc5b00ae56fc4ee079d46c7cf20fae"},
@@ -677,17 +1100,34 @@ version = "4.12.2"
 description = "Backported and Experimental Type Hints for Python 3.8+"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
     {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
 ]
 
+[[package]]
+name = "typing-inspection"
+version = "0.4.1"
+description = "Runtime typing introspection tools"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "typing_inspection-0.4.1-py3-none-any.whl", hash = "sha256:389055682238f53b04f7badcb49b989835495a96700ced5dab2d8feae4b26f51"},
+    {file = "typing_inspection-0.4.1.tar.gz", hash = "sha256:6ae134cc0203c33377d43188d4064e9b357dba58cff3185f22924610e70a9d28"},
+]
+
+[package.dependencies]
+typing-extensions = ">=4.12.0"
+
 [[package]]
 name = "urllib3"
 version = "2.3.0"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
+groups = ["main"]
 files = [
     {file = "urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df"},
     {file = "urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d"},
@@ -697,7 +1137,7 @@ files = [
 pysocks = {version = ">=1.5.6,<1.5.7 || >1.5.7,<2.0", optional = true, markers = "extra == \"socks\""}
 
 [package.extras]
-brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"]
+brotli = ["brotli (>=1.0.9) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\""]
 h2 = ["h2 (>=4,<5)"]
 socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
 zstd = ["zstandard (>=0.18.0)"]
@@ -708,6 +1148,7 @@ version = "1.8.0"
 description = "WebSocket client for Python with low level API options"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526"},
     {file = "websocket_client-1.8.0.tar.gz", hash = "sha256:3239df9f44da632f96012472805d40a23281a991027ce11d2f45a6f24ac4c3da"},
@@ -724,6 +1165,7 @@ version = "1.2.0"
 description = "WebSockets state-machine based protocol implementation"
 optional = false
 python-versions = ">=3.7.0"
+groups = ["main"]
 files = [
     {file = "wsproto-1.2.0-py3-none-any.whl", hash = "sha256:b9acddd652b585d75b20477888c56642fdade28bdfd3579aa24a4d2c037dd736"},
     {file = "wsproto-1.2.0.tar.gz", hash = "sha256:ad565f26ecb92588a3e43bc3d96164de84cd9902482b130d0ddbaa9664a85065"},
@@ -733,6 +1175,6 @@ files = [
 h11 = ">=0.9.0,<1"
 
 [metadata]
-lock-version = "2.0"
+lock-version = "2.1"
 python-versions = "^3.10"
-content-hash = "a40015b90325879e50f82cca6a26a730d763cad26589671df798832d41c42db3"
+content-hash = "11665d36dc4efcfa5bebf8c7005808ff752bd3c62f6570c21d9cbce4b7f2e2ee"
diff --git a/tests/e2e/pyproject.toml b/tests/e2e/pyproject.toml
index f9a0defd..42ebbcb9 100644
--- a/tests/e2e/pyproject.toml
+++ b/tests/e2e/pyproject.toml
@@ -15,6 +15,8 @@ selenium = "^4.11.2"
 pytest-sugar = "^1.0.0"
 deepdiff = "^8.0.1"
 pytest-retry = "^1.6.3"
+anthropic = "^0.66.0"
+openai = "^1.0.0"
 
 [tool.poetry.dev-dependencies]
 pytest-cov = "^4.1.0"
diff --git a/tests/e2e/run_e2e_tests.sh b/tests/e2e/run_e2e_tests.sh
index 5e6b6dbc..c716a182 100644
--- a/tests/e2e/run_e2e_tests.sh
+++ b/tests/e2e/run_e2e_tests.sh
@@ -48,6 +48,9 @@ cd ../../
 archgw build
 cd -
 
+# Once we build archgw we have to install the dependencies again to a new virtual environment.
+poetry install
+
 log startup arch gateway with function calling demo
 cd ../../
 tail -F ~/archgw_logs/modelserver.log &
@@ -59,7 +62,6 @@ cd -
 
 log running e2e tests
 log =================
-poetry install
 poetry run pytest
 
 log shutting down the arch gateway service
diff --git a/tests/e2e/test_prompt_gateway.py b/tests/e2e/test_prompt_gateway.py
index e6a10f3a..362be227 100644
--- a/tests/e2e/test_prompt_gateway.py
+++ b/tests/e2e/test_prompt_gateway.py
@@ -3,9 +3,12 @@ import pytest
 import requests
 from deepdiff import DeepDiff
 import re
+import anthropic
+import openai
 
 from common import (
     PROMPT_GATEWAY_ENDPOINT,
+    LLM_GATEWAY_ENDPOINT,
     PREFILL_LIST,
     get_arch_messages,
     get_data_chunks,
@@ -352,3 +355,178 @@ def test_prompt_gateway_prompt_guard_jailbreak(stream):
             response_json.get("choices")[0]["message"]["content"]
             == "Looks like you're curious about my abilities, but I can only provide assistance for weather forecasting."
         )
+
+
+def test_claude_v1_messages_api():
+    """Test Claude client using /v1/messages API through llm_gateway (port 12000)"""
+    # Get the base URL from the LLM gateway endpoint
+    base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
+
+    client = anthropic.Anthropic(
+        api_key="test-key", base_url=base_url  # Dummy key for testing
+    )
+
+    message = client.messages.create(
+        model="claude-sonnet-4-20250514",  # Use working model from smoke test
+        max_tokens=50,
+        messages=[
+            {
+                "role": "user",
+                "content": "Hello, please respond with exactly: Hello from Claude!",
+            }
+        ],
+    )
+
+    assert message.content[0].text == "Hello from Claude!"
+
+
+def test_claude_v1_messages_api_streaming():
+    base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
+
+    client = anthropic.Anthropic(api_key="test-key", base_url=base_url)
+
+    with client.messages.stream(
+        model="claude-sonnet-4-20250514",
+        max_tokens=50,
+        messages=[
+            {
+                "role": "user",
+                "content": "Hello, please respond with exactly: Hello from Claude!",
+            }
+        ],
+    ) as stream:
+        # This yields only text deltas in order
+        pieces = [t for t in stream.text_stream]
+        full_text = "".join(pieces)
+
+        # You can also get the fully-assembled Message object
+        final = stream.get_final_message()
+        # A safe way to reassemble text from the content blocks:
+        final_text = "".join(b.text for b in final.content if b.type == "text")
+
+    assert full_text == "Hello from Claude!"
+    assert final_text == "Hello from Claude!"
+
+
+def test_anthropic_client_with_openai_model_streaming():
+    """Test Anthropic client using /v1/messages API with OpenAI model (gpt-4o-mini)
+    This tests the transformation: OpenAI upstream -> Anthropic client format with proper event lines
+    """
+    base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
+
+    client = anthropic.Anthropic(api_key="test-key", base_url=base_url)
+
+    with client.messages.stream(
+        model="gpt-4o-mini",  # OpenAI model via Anthropic client
+        max_tokens=50,
+        messages=[
+            {
+                "role": "user",
+                "content": "Hello, please respond with exactly: Hello from GPT-4o-mini via Anthropic!",
+            }
+        ],
+    ) as stream:
+        # This yields only text deltas in order
+        pieces = [t for t in stream.text_stream]
+        full_text = "".join(pieces)
+
+        # You can also get the fully-assembled Message object
+        final = stream.get_final_message()
+        # A safe way to reassemble text from the content blocks:
+        final_text = "".join(b.text for b in final.content if b.type == "text")
+
+    assert full_text == "Hello from GPT-4o-mini via Anthropic!"
+    assert final_text == "Hello from GPT-4o-mini via Anthropic!"
+
+
+def test_openai_gpt4o_mini_v1_messages_api():
+    """Test OpenAI GPT-4o-mini using /v1/chat/completions API through llm_gateway (port 12000)"""
+    # Get the base URL from the LLM gateway endpoint
+    base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
+
+    client = openai.OpenAI(
+        api_key="test-key",  # Dummy key for testing
+        base_url=f"{base_url}/v1",  # OpenAI needs /v1 suffix in base_url
+    )
+
+    completion = client.chat.completions.create(
+        model="gpt-4o-mini",
+        max_tokens=50,
+        messages=[
+            {
+                "role": "user",
+                "content": "Hello, please respond with exactly: Hello from GPT-4o-mini!",
+            }
+        ],
+    )
+
+    assert completion.choices[0].message.content == "Hello from GPT-4o-mini!"
+
+
+def test_openai_gpt4o_mini_v1_messages_api_streaming():
+    """Test OpenAI GPT-4o-mini using /v1/chat/completions API with streaming through llm_gateway (port 12000)"""
+    # Get the base URL from the LLM gateway endpoint
+    base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
+
+    client = openai.OpenAI(
+        api_key="test-key",  # Dummy key for testing
+        base_url=f"{base_url}/v1",  # OpenAI needs /v1 suffix in base_url
+    )
+
+    stream = client.chat.completions.create(
+        model="gpt-4o-mini",
+        max_tokens=50,
+        messages=[
+            {
+                "role": "user",
+                "content": "Hello, please respond with exactly: Hello from GPT-4o-mini!",
+            }
+        ],
+        stream=True,
+    )
+
+    # Collect all the streaming chunks
+    content_chunks = []
+    for chunk in stream:
+        if chunk.choices[0].delta.content:
+            content_chunks.append(chunk.choices[0].delta.content)
+
+    # Reconstruct the full message
+    full_content = "".join(content_chunks)
+    assert full_content == "Hello from GPT-4o-mini!"
+
+
+def test_openai_client_with_claude_model_streaming():
+    """Test OpenAI client using /v1/chat/completions API with Claude model (claude-sonnet-4-20250514)
+    This tests the transformation: Anthropic upstream -> OpenAI client format with proper chunk handling
+    """
+    # Get the base URL from the LLM gateway endpoint
+    base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
+
+    client = openai.OpenAI(
+        api_key="test-key",  # Dummy key for testing
+        base_url=f"{base_url}/v1",  # OpenAI needs /v1 suffix in base_url
+    )
+
+    stream = client.chat.completions.create(
+        model="claude-sonnet-4-20250514",  # Claude model via OpenAI client
+        max_tokens=50,
+        messages=[
+            {
+                "role": "user",
+                "content": "Who are you? ALWAYS RESPOND WITH:I appreciate the request, but I should clarify that I'm Claude, made by Anthropic, not OpenAI. I don't want to create confusion about my origins.",
+            }
+        ],
+        stream=True,
+        temperature=0.1,
+    )
+
+    # Collect all the streaming chunks
+    content_chunks = []
+    for chunk in stream:
+        if chunk.choices[0].delta.content:
+            content_chunks.append(chunk.choices[0].delta.content)
+
+    # Reconstruct the full message
+    full_content = "".join(content_chunks)
+    assert full_content is not None