From 788ff87a0c5217cc67af4ffa158735f0495ba3c8 Mon Sep 17 00:00:00 2001 From: Salman Paracha Date: Tue, 9 Sep 2025 15:39:56 -0700 Subject: [PATCH] fixing comments from PR --- .../src/handlers/chat_completions.rs | 99 ++++++++++--------- crates/common/src/routing.rs | 3 +- crates/hermesllm/src/apis/anthropic.rs | 8 +- crates/hermesllm/src/lib.rs | 2 +- crates/hermesllm/src/providers/response.rs | 56 +++++------ crates/llm_gateway/src/filter_context.rs | 1 - crates/llm_gateway/src/stream_context.rs | 13 +-- crates/llm_gateway/tests/integration.rs | 22 ++--- tests/e2e/run_e2e_tests.sh | 1 + 9 files changed, 102 insertions(+), 103 deletions(-) diff --git a/crates/brightstaff/src/handlers/chat_completions.rs b/crates/brightstaff/src/handlers/chat_completions.rs index 4c415b80..c2e7e78b 100644 --- a/crates/brightstaff/src/handlers/chat_completions.rs +++ b/crates/brightstaff/src/handlers/chat_completions.rs @@ -4,6 +4,8 @@ use bytes::Bytes; use common::configuration::ModelUsagePreference; use common::consts::ARCH_PROVIDER_HINT_HEADER; use hermesllm::apis::openai::ChatCompletionsRequest; +use hermesllm::clients::SupportedAPIs; +use hermesllm::ProviderRequestType; use http_body_util::combinators::BoxBody; use http_body_util::{BodyExt, Full, StreamBody}; use hyper::body::Frame; @@ -33,54 +35,42 @@ pub async fn chat( let chat_request_bytes = request.collect().await?.to_bytes(); debug!("Received request body (raw utf8): {}", String::from_utf8_lossy(&chat_request_bytes)); - - let chat_request_parsed = serde_json::from_slice::(&chat_request_bytes) - .inspect_err(|err| { - warn!( - "Failed to parse request body as JSON: err: {}, str: {}", - err, - String::from_utf8_lossy(&chat_request_bytes) - ) - }) - .unwrap_or_else(|_| { - warn!( - "Failed to parse request body as JSON: {}", - String::from_utf8_lossy(&chat_request_bytes) - ); - serde_json::Value::Null - }); - - if chat_request_parsed == serde_json::Value::Null { - warn!("Request body is not valid JSON"); - let err_msg = "Request body is not valid JSON".to_string(); - let mut bad_request = Response::new(full(err_msg)); - *bad_request.status_mut() = StatusCode::BAD_REQUEST; - return Ok(bad_request); - } - - let chat_completion_request: ChatCompletionsRequest = - serde_json::from_value(chat_request_parsed.clone()).unwrap(); - - // remove metadata from the request - let mut chat_request_user_preferences_removed = chat_request_parsed; - if let Some(metadata) = chat_request_user_preferences_removed.get_mut("metadata") { - if let Some(m) = metadata.as_object_mut() { - m.remove("archgw_preference_config"); - debug!("Removed archgw_preference_config from metadata"); + let provider_request = match ProviderRequestType::try_from((&chat_request_bytes[..], &SupportedAPIs::from_endpoint(request_path.as_str()).unwrap())) { + Ok(request) => request, + Err(err) => { + warn!("Failed to parse request as ProviderRequestType: {}", err); + let err_msg = format!("Failed to parse request: {}", err); + let mut bad_request = Response::new(full(err_msg)); + *bad_request.status_mut() = StatusCode::BAD_REQUEST; + return Ok(bad_request); } + }; + + // Convert to ChatCompletionsRequest regardless of input type + let chat_completions_request_for_arch_router: ChatCompletionsRequest = + match ProviderRequestType::try_from((provider_request, &SupportedAPIs::OpenAIChatCompletions(hermesllm::apis::OpenAIApi::ChatCompletions))) { + Ok(ProviderRequestType::ChatCompletionsRequest(req)) => req, + Ok(ProviderRequestType::MessagesRequest(_)) => { + // This should not happen after conversion to OpenAI format + warn!("Unexpected: got MessagesRequest after converting to OpenAI format"); + let err_msg = "Request conversion failed".to_string(); + let mut bad_request = Response::new(full(err_msg)); + *bad_request.status_mut() = StatusCode::BAD_REQUEST; + return Ok(bad_request); + }, + Err(err) => { + warn!("Failed to convert request to ChatCompletionsRequest: {}", err); + let err_msg = format!("Failed to convert request: {}", err); + let mut bad_request = Response::new(full(err_msg)); + *bad_request.status_mut() = StatusCode::BAD_REQUEST; + return Ok(bad_request); + } + }; - // if metadata is empty, remove it - if metadata.as_object().map_or(false, |m| m.is_empty()) { - chat_request_user_preferences_removed - .as_object_mut() - .map(|m| m.remove("metadata")); - debug!("Removed empty metadata from request"); - } - } debug!( "[BRIGHTSTAFF -> ARCH_ROUTER] REQ: {}", - &serde_json::to_string(&chat_completion_request).unwrap() + &serde_json::to_string(&chat_completions_request_for_arch_router).unwrap() ); let trace_parent = request_headers @@ -89,7 +79,7 @@ pub async fn chat( .map(|(_, value)| value.to_str().unwrap_or_default().to_string()); let usage_preferences_str: Option = - chat_completion_request.metadata.and_then(|metadata| { + chat_completions_request_for_arch_router.metadata.as_ref().and_then(|metadata| { metadata .get("archgw_preference_config") .map(|value| value.to_string()) @@ -100,7 +90,7 @@ pub async fn chat( .and_then(|s| serde_yaml::from_str(s).ok()); let latest_message_for_log = - chat_completion_request + chat_completions_request_for_arch_router .messages .last() .map_or("None".to_string(), |msg| { @@ -125,7 +115,7 @@ pub async fn chat( let model_name = match router_service .determine_route( - &chat_completion_request.messages, + &chat_completions_request_for_arch_router.messages, trace_parent.clone(), usage_preferences, ) @@ -136,9 +126,9 @@ pub async fn chat( None => { debug!( "No route determined, using default model from request: {}", - chat_completion_request.model + chat_completions_request_for_arch_router.model ); - chat_completion_request.model.clone() + chat_completions_request_for_arch_router.model.clone() } }, Err(err) => { @@ -166,6 +156,19 @@ pub async fn chat( ); } + // remove metadata from the request for downstream calls + let mut chat_request_user_preferences_removed = chat_completions_request_for_arch_router.clone(); + if let Some(ref mut metadata) = chat_request_user_preferences_removed.metadata { + metadata.remove("archgw_preference_config"); + debug!("Removed archgw_preference_config from metadata"); + + // if metadata is empty, remove it + if metadata.is_empty() { + chat_request_user_preferences_removed.metadata = None; + debug!("Removed empty metadata from request"); + } + } + let chat_request_parsed_bytes = serde_json::to_string(&chat_request_user_preferences_removed).unwrap(); diff --git a/crates/common/src/routing.rs b/crates/common/src/routing.rs index 384fa281..2e9bac09 100644 --- a/crates/common/src/routing.rs +++ b/crates/common/src/routing.rs @@ -33,8 +33,7 @@ pub fn get_llm_provider( return provider; } - //This is a fallback to the default provider if no specific provider is found. - //For example, if the client sends in gpt-4-1 and that's not configured in arch_config, we fall back to the default. + if llm_providers.default().is_some() { return llm_providers.default().unwrap(); } diff --git a/crates/hermesllm/src/apis/anthropic.rs b/crates/hermesllm/src/apis/anthropic.rs index 6a2864dc..4125f8d7 100644 --- a/crates/hermesllm/src/apis/anthropic.rs +++ b/crates/hermesllm/src/apis/anthropic.rs @@ -6,7 +6,7 @@ use std::collections::HashMap; use super::ApiDefinition; use crate::providers::request::{ProviderRequest, ProviderRequestError}; -use crate::providers::response::ProviderStreamResponse; +use crate::providers::response::{ProviderResponse, ProviderStreamResponse}; use crate::clients::transformer::ExtractText; use crate::{MESSAGES_PATH}; @@ -416,11 +416,11 @@ impl TokenUsage for MessagesResponse { } } -impl MessagesResponse { - pub fn usage(&self) -> Option<&dyn TokenUsage> { +impl ProviderResponse for MessagesResponse { + fn usage(&self) -> Option<&dyn TokenUsage> { Some(self) } - pub fn extract_usage_counts(&self) -> Option<(usize, usize, usize)> { + fn extract_usage_counts(&self) -> Option<(usize, usize, usize)> { Some((self.usage.input_tokens as usize, self.usage.output_tokens as usize, (self.usage.input_tokens + self.usage.output_tokens) as usize)) } } diff --git a/crates/hermesllm/src/lib.rs b/crates/hermesllm/src/lib.rs index 88a4aa7b..a9e8c48e 100644 --- a/crates/hermesllm/src/lib.rs +++ b/crates/hermesllm/src/lib.rs @@ -56,7 +56,7 @@ mod tests { assert!(sse_event.data.as_ref().unwrap().contains("Hello")); // Test that we can parse the event into a provider stream response - let transformed_event = SseEvent::try_from((&sse_event, &client_api, &upstream_api)); + let transformed_event = SseEvent::try_from((sse_event, &client_api, &upstream_api)); if let Err(e) = &transformed_event { println!("Transform error: {:?}", e); } diff --git a/crates/hermesllm/src/providers/response.rs b/crates/hermesllm/src/providers/response.rs index 478ecfdb..13cad0cd 100644 --- a/crates/hermesllm/src/providers/response.rs +++ b/crates/hermesllm/src/providers/response.rs @@ -119,7 +119,7 @@ pub struct SseEvent { pub raw_line: String, // The complete line as received including "data: " prefix and "\n\n" #[serde(skip_serializing, skip_deserializing)] - pub raw_line_transformed: String, // The complete line as received including "data: " prefix and "\n\n" + pub sse_transform_buffer: String, // The complete line as received including "data: " prefix and "\n\n" #[serde(skip_serializing, skip_deserializing)] pub provider_stream_response: Option, // Parsed provider stream response object @@ -159,7 +159,7 @@ impl FromStr for SseEvent { fn from_str(line: &str) -> Result { if line.starts_with("data: ") { - let data = line[6..].to_string(); // Remove "data: " prefix + let data: String = line[6..].to_string(); // Remove "data: " prefix if data.is_empty() { return Err(SseParseError { message: "Empty data field is not a valid SSE event".to_string(), @@ -168,9 +168,9 @@ impl FromStr for SseEvent { Ok(SseEvent { data: Some(data), event: None, - raw_line: format!("{}\n\n", line), - raw_line_transformed: format!("{}\n\n", line), - provider_stream_response: None, // Will be populated later via TryFrom + raw_line: line.to_string(), + sse_transform_buffer: line.to_string(), + provider_stream_response: None, }) } else if line.starts_with("event: ") { //used by Anthropic let event_type = line[7..].to_string(); @@ -182,8 +182,8 @@ impl FromStr for SseEvent { Ok(SseEvent { data: None, event: Some(event_type), - raw_line: format!("{}\n\n", line), - raw_line_transformed: format!("{}\n\n", line), + raw_line: line.to_string(), + sse_transform_buffer: line.to_string(), provider_stream_response: None, }) } else { @@ -196,14 +196,14 @@ impl FromStr for SseEvent { impl fmt::Display for SseEvent { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.raw_line_transformed) + write!(f, "{}", self.sse_transform_buffer) } } // Into implementation to convert SseEvent to bytes for response buffer impl Into> for SseEvent { fn into(self) -> Vec { - format!("{}\n\n", self.raw_line_transformed).into_bytes() + format!("{}\n\n", self.sse_transform_buffer).into_bytes() } } @@ -280,20 +280,20 @@ impl TryFrom<(&[u8], &SupportedAPIs, &SupportedAPIs)> for ProviderStreamResponse } // TryFrom implementation to convert raw bytes to SseEvent with parsed provider response -impl TryFrom<(&SseEvent, &SupportedAPIs, &SupportedAPIs)> for SseEvent { +impl TryFrom<(SseEvent, &SupportedAPIs, &SupportedAPIs)> for SseEvent { type Error = Box; - fn try_from((sse_event, client_api, upstream_api): (&SseEvent, &SupportedAPIs, &SupportedAPIs)) -> Result { + fn try_from((sse_event, client_api, upstream_api): (SseEvent, &SupportedAPIs, &SupportedAPIs)) -> Result { // Create a new transformed event based on the original - let mut transformed_event = sse_event.clone(); + let mut transformed_event = sse_event; // If not [DONE] and has data, parse the data as a provider stream response (business logic layer) - if !transformed_event.is_done() && sse_event.data.is_some() { - let data_str = sse_event.data.as_ref().unwrap(); + if !transformed_event.is_done() && transformed_event.data.is_some() { + let data_str = transformed_event.data.as_ref().unwrap(); let data_bytes = data_str.as_bytes(); let transformed_response = ProviderStreamResponseType::try_from((data_bytes, client_api, upstream_api))?; let transformed_json = serde_json::to_string(&transformed_response)?; - transformed_event.raw_line_transformed = format!("data: {}\n\n", transformed_json); + transformed_event.sse_transform_buffer = format!("data: {}\n\n", transformed_json); transformed_event.provider_stream_response = Some(transformed_response); } @@ -318,10 +318,10 @@ impl TryFrom<(&SseEvent, &SupportedAPIs, &SupportedAPIs)> for SseEvent { } }); // Format as proper SSE: MessageStart first, then ContentBlockStart - transformed_event.raw_line_transformed = format!( + transformed_event.sse_transform_buffer = format!( "event: {}\n{}\nevent: content_block_start\ndata: {}\n\n", event_type, - transformed_event.raw_line_transformed, + transformed_event.sse_transform_buffer, content_block_start_json, ); } else if event_type == "message_delta" { @@ -330,14 +330,14 @@ impl TryFrom<(&SseEvent, &SupportedAPIs, &SupportedAPIs)> for SseEvent { "index": 0 }); // Format as proper SSE: ContentBlockStop first, then MessageDelta - transformed_event.raw_line_transformed = format!( + transformed_event.sse_transform_buffer = format!( "event: content_block_stop\ndata: {}\n\nevent: {}\n{}", content_block_stop_json, event_type, - transformed_event.raw_line_transformed + transformed_event.sse_transform_buffer ); } else { - transformed_event.raw_line_transformed = format!("event: {}\n{}", event_type, transformed_event.raw_line_transformed); + transformed_event.sse_transform_buffer = format!("event: {}\n{}", event_type, transformed_event.sse_transform_buffer); } } // If event_type is None, we just keep the data line as-is without an event line @@ -345,8 +345,8 @@ impl TryFrom<(&SseEvent, &SupportedAPIs, &SupportedAPIs)> for SseEvent { } } (SupportedAPIs::OpenAIChatCompletions(_), SupportedAPIs::AnthropicMessagesAPI(_)) => { - if sse_event.is_event_only() && sse_event.event.is_some() { - transformed_event.raw_line_transformed = format!("\n"); // suppress the event upstream for OpenAI + if transformed_event.is_event_only() && transformed_event.event.is_some() { + transformed_event.sse_transform_buffer = format!("\n"); // suppress the event upstream for OpenAI } } } @@ -585,11 +585,11 @@ mod tests { #[test] fn test_sse_event_parsing() { // Test valid SSE data line - let line = r#"data: {"id":"test","object":"chat.completion.chunk"}"#; + let line = "data: {\"id\":\"test\",\"object\":\"chat.completion.chunk\"}\n\n"; let event: Result = line.parse(); assert!(event.is_ok()); let event = event.unwrap(); - assert_eq!(event.data, Some(r#"{"id":"test","object":"chat.completion.chunk"}"#.to_string())); + assert_eq!(event.data, Some("{\"id\":\"test\",\"object\":\"chat.completion.chunk\"}\n\n".to_string())); // Test conversion back to line using Display trait let wire_format = event.to_string(); @@ -626,7 +626,7 @@ mod tests { raw_line: r#"data: {"id":"test","object":"chat.completion.chunk"} "#.to_string(), - raw_line_transformed: r#"data: {"id":"test","object":"chat.completion.chunk"} + sse_transform_buffer: r#"data: {"id":"test","object":"chat.completion.chunk"} "#.to_string(), provider_stream_response: None, @@ -654,7 +654,7 @@ mod tests { data: Some(r#"{"type": "ping"}"#.to_string()), event: None, raw_line: r#"data: {"type": "ping"}"#.to_string(), - raw_line_transformed: r#"data: {"type": "ping"}"#.to_string(), + sse_transform_buffer: r#"data: {"type": "ping"}"#.to_string(), provider_stream_response: None, }; assert!(ping_event.should_skip()); @@ -665,7 +665,7 @@ mod tests { data: Some(r#"{"id": "test", "object": "chat.completion.chunk"}"#.to_string()), event: Some("content_block_delta".to_string()), raw_line: r#"data: {"id": "test", "object": "chat.completion.chunk"}"#.to_string(), - raw_line_transformed: r#"data: {"id": "test", "object": "chat.completion.chunk"}"#.to_string(), + sse_transform_buffer: r#"data: {"id": "test", "object": "chat.completion.chunk"}"#.to_string(), provider_stream_response: None, }; assert!(!normal_event.should_skip()); @@ -676,7 +676,7 @@ mod tests { data: Some("[DONE]".to_string()), event: None, raw_line: "data: [DONE]".to_string(), - raw_line_transformed: "data: [DONE]".to_string(), + sse_transform_buffer: "data: [DONE]".to_string(), provider_stream_response: None, }; assert!(!done_event.should_skip()); diff --git a/crates/llm_gateway/src/filter_context.rs b/crates/llm_gateway/src/filter_context.rs index fc31355a..258a1a1c 100644 --- a/crates/llm_gateway/src/filter_context.rs +++ b/crates/llm_gateway/src/filter_context.rs @@ -89,7 +89,6 @@ impl RootContext for FilterContext { ); Some(Box::new(StreamContext::new( - context_id, Rc::clone(&self.metrics), Rc::clone( self.llm_providers diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index d08602ec..da86296d 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -26,7 +26,6 @@ use hermesllm::providers::response::{ProviderResponse, SseEvent, SseStreamIter}; use hermesllm::{ProviderId, ProviderRequest, ProviderRequestType, ProviderResponseType}; pub struct StreamContext { - context_id: u32, metrics: Rc, ratelimit_selector: Option
, streaming_response: bool, @@ -50,14 +49,12 @@ pub struct StreamContext { impl StreamContext { pub fn new( - context_id: u32, metrics: Rc, llm_providers: Rc, traces_queue: Arc>>, overrides: Rc>, ) -> Self { StreamContext { - context_id, metrics, overrides, ratelimit_selector: None, @@ -79,13 +76,13 @@ impl StreamContext { } /// Returns the appropriate request identifier for logging. - /// Uses request_id (from x-request-id header) when available, otherwise falls back to context_id. + /// Uses request_id (from x-request-id header) when available, otherwise returns a literal indicating no request ID. fn request_identifier(&self) -> String { self.request_id .as_ref() .filter(|id| !id.is_empty()) // Filter out empty strings .map(|id| id.clone()) - .unwrap_or_else(|| self.context_id.to_string()) + .unwrap_or_else(|| "NO_REQUEST_ID".to_string()) } fn llm_provider(&self) -> &LlmProvider { self.llm_provider @@ -145,14 +142,14 @@ impl StreamContext { Some(SupportedAPIs::AnthropicMessagesAPI(_)) => { // Anthropic API requires x-api-key and anthropic-version headers // Remove any existing Authorization header since Anthropic doesn't use it - self.set_http_request_header("Authorization", None); + self.remove_http_request_header("Authorization"); self.set_http_request_header("x-api-key", Some(llm_provider_api_key_value)); self.set_http_request_header("anthropic-version", Some("2023-06-01")); } Some(SupportedAPIs::OpenAIChatCompletions(_)) | None => { // OpenAI and default: use Authorization Bearer token // Remove any existing x-api-key header since OpenAI doesn't use it - self.set_http_request_header("x-api-key", None); + self.remove_http_request_header("x-api-key"); let authorization_header_value = format!("Bearer {}", llm_provider_api_key_value); self.set_http_request_header("Authorization", Some(&authorization_header_value)); } @@ -430,7 +427,7 @@ impl StreamContext { for sse_event in sse_iter { // Transform event if upstream API != client API let transformed_event: SseEvent = - match SseEvent::try_from((&sse_event, &client_api, &upstream_api)) { + match SseEvent::try_from((sse_event, &client_api, &upstream_api)) { Ok(event) => event, Err(e) => { warn!("Failed to transform SSE event: {}", e); diff --git a/crates/llm_gateway/tests/integration.rs b/crates/llm_gateway/tests/integration.rs index eba90ad5..48d4bc86 100644 --- a/crates/llm_gateway/tests/integration.rs +++ b/crates/llm_gateway/tests/integration.rs @@ -40,7 +40,7 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) { Some("openai"), ) .expect_remove_header_map_value(Some(MapType::HttpRequestHeaders), Some("x-api-key")) - .expect_add_header_map_value( + .expect_replace_header_map_value( Some(MapType::HttpRequestHeaders), Some("Authorization"), Some("Bearer secret_key"), @@ -277,7 +277,7 @@ fn llm_gateway_bad_request_to_open_ai_chat_completions() { .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - RATELIMIT_CHECK .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=13")) .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - RATELIMIT_CHECK - .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}")) + .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}")) .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None) .execute_and_expect(ReturnType::Action(Action::Continue)) .unwrap(); @@ -333,7 +333,7 @@ fn llm_gateway_request_ratelimited() { .returning(Some(chat_completions_request_body)) // The actual call is not important in this test, we just need to grab the token_id .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) - .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] CLIENT_REQUEST_PAYLOAD: {\"messages\": [{\"role\": \"system\",\"content\": \"You are a helpful poetic assistant!, skilled in explaining complex programming concepts with creative flair. Be sure to be concise and to the point.\"},{\"role\": \"user\",\"content\": \"Compose a poem that explains the concept of recursion in programming. Compose a poem that explains the concept of recursion in programming. Compose a poem that explains the concept of recursion in programming. And also summarize it how a 4th graded would understand it. Compose a poem that explains the concept of recursion in programming. And also summarize it how a 4th graded would understand it.\"}],\"model\": \"gpt-4\"}")) + .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] CLIENT_REQUEST_PAYLOAD: {\"messages\": [{\"role\": \"system\",\"content\": \"You are a helpful poetic assistant!, skilled in explaining complex programming concepts with creative flair. Be sure to be concise and to the point.\"},{\"role\": \"user\",\"content\": \"Compose a poem that explains the concept of recursion in programming. Compose a poem that explains the concept of recursion in programming. Compose a poem that explains the concept of recursion in programming. And also summarize it how a 4th graded would understand it. Compose a poem that explains the concept of recursion in programming. And also summarize it how a 4th graded would understand it.\"}],\"model\": \"gpt-4\"}")) .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4")) .expect_log(Some(LogLevel::Info), None)// Dynamic request ID) @@ -390,7 +390,7 @@ fn llm_gateway_request_not_ratelimited() { // The actual call is not important in this test, we just need to grab the token_id .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) - .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}")) + .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}")) .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4")) .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) @@ -398,7 +398,7 @@ fn llm_gateway_request_not_ratelimited() { .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29")) .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) - .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}")) + .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}")) .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None) .execute_and_expect(ReturnType::Action(Action::Continue)) .unwrap(); @@ -441,7 +441,7 @@ fn llm_gateway_override_model_name() { .returning(Some(chat_completions_request_body)) // The actual call is not important in this test, we just need to grab the token_id .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) - .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}")) + .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}")) .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4")) .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) @@ -449,7 +449,7 @@ fn llm_gateway_override_model_name() { .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29")) .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) - .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}")) + .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}")) .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None) .execute_and_expect(ReturnType::Action(Action::Continue)) .unwrap(); @@ -492,7 +492,7 @@ fn llm_gateway_override_use_default_model() { .returning(Some(chat_completions_request_body)) // The actual call is not important in this test, we just need to grab the token_id .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) - .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}")) + .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}")) .expect_log( Some(LogLevel::Info), None // Dynamic request ID, @@ -503,7 +503,7 @@ fn llm_gateway_override_use_default_model() { .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29")) .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) - .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}")) + .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}")) .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None) .execute_and_expect(ReturnType::Action(Action::Continue)) .unwrap(); @@ -547,7 +547,7 @@ fn llm_gateway_override_use_model_name_none() { // The actual call is not important in this test, we just need to grab the token_id .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) - .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] CLIENT_REQUEST_PAYLOAD: {\"model\":\"none\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}")) + .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] CLIENT_REQUEST_PAYLOAD: {\"model\":\"none\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}")) .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4")) .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) @@ -555,7 +555,7 @@ fn llm_gateway_override_use_model_name_none() { .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29")) .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) - .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}")) + .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}")) .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None) .execute_and_expect(ReturnType::Action(Action::Continue)) .unwrap(); diff --git a/tests/e2e/run_e2e_tests.sh b/tests/e2e/run_e2e_tests.sh index 80ee0295..c716a182 100644 --- a/tests/e2e/run_e2e_tests.sh +++ b/tests/e2e/run_e2e_tests.sh @@ -48,6 +48,7 @@ cd ../../ archgw build cd - +# Once we build archgw we have to install the dependencies again to a new virtual environment. poetry install log startup arch gateway with function calling demo