From 00c543667d99b68a80b7e304c6be6ac0aaf2058d Mon Sep 17 00:00:00 2001 From: Salman Paracha Date: Thu, 4 Sep 2025 21:49:54 -0700 Subject: [PATCH] fixed integration tests and cleaned up logs --- arch/supervisord.conf | 2 +- crates/common/src/tokenizer.rs | 2 +- crates/llm_gateway/src/stream_context.rs | 168 +++++++++++++---------- crates/llm_gateway/tests/integration.rs | 119 +++++++++------- 4 files changed, 172 insertions(+), 119 deletions(-) diff --git a/arch/supervisord.conf b/arch/supervisord.conf index bec147cc..df25eea9 100644 --- a/arch/supervisord.conf +++ b/arch/supervisord.conf @@ -9,7 +9,7 @@ stdout_logfile_maxbytes=0 stderr_logfile_maxbytes=0 [program:envoy] -command=/bin/sh -c "python /app/config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:debug 2>&1 | tee /var/log//envoy.log" +command=/bin/sh -c "python /app/config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:debug --log-format '[%%Y-%%m-%%d %%T.%%e][%%l] %%v' 2>&1 | tee /var/log//envoy.log" stdout_logfile=/dev/stdout redirect_stderr=true stdout_logfile_maxbytes=0 diff --git a/crates/common/src/tokenizer.rs b/crates/common/src/tokenizer.rs index ded885d5..9b11f639 100644 --- a/crates/common/src/tokenizer.rs +++ b/crates/common/src/tokenizer.rs @@ -2,7 +2,7 @@ use log::debug; #[allow(dead_code)] pub fn token_count(model_name: &str, text: &str) -> Result { - debug!("getting token count model={}", model_name); + debug!("TOKENIZER: computing token count for model={}", model_name); //HACK: add support for tokenizing mistral and other models //filed issue https://github.com/katanemo/arch/issues/222 diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index 223e9a90..1465cf97 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -77,6 +77,16 @@ impl StreamContext { user_message: None, } } + + /// Returns the appropriate request identifier for logging. + /// Uses request_id (from x-request-id header) when available, otherwise falls back to context_id. + fn request_identifier(&self) -> String { + self.request_id + .as_ref() + .filter(|id| !id.is_empty()) // Filter out empty strings + .map(|id| id.clone()) + .unwrap_or_else(|| self.context_id.to_string()) + } fn llm_provider(&self) -> &LlmProvider { self.llm_provider .as_ref() @@ -111,7 +121,7 @@ impl StreamContext { info!( "[ARCHGW_REQ_ID:{}] PROVIDER_SELECTION: hint='{}' -> selected='{}'", - self.context_id, + self.request_identifier(), self.get_http_request_header(ARCH_PROVIDER_HINT_HEADER) .unwrap_or("none".to_string()), self.llm_provider.as_ref().unwrap().name @@ -186,7 +196,9 @@ impl StreamContext { info!( "[ARCHGW_REQ_ID:{}] TOKEN_COUNT: model='{}' input_tokens={}", - self.context_id, model, token_count + self.request_identifier(), + model, + token_count ); // Record the token count to metrics. @@ -198,7 +210,10 @@ impl StreamContext { if let Some(selector) = self.ratelimit_selector.take() { info!( "[ARCHGW_REQ_ID:{}] RATELIMIT_CHECK: model='{}' selector='{}:{}'", - self.context_id, model, selector.key, selector.value + self.request_identifier(), + model, + selector.key, + selector.value ); ratelimit::ratelimits(None).read().unwrap().check_limit( model.to_owned(), @@ -208,7 +223,8 @@ impl StreamContext { } else { debug!( "[ARCHGW_REQ_ID:{}] RATELIMIT_SKIP: model='{}' (no selector)", - self.context_id, model + self.request_identifier(), + model ); } @@ -226,7 +242,8 @@ impl StreamContext { let duration_ms = duration.as_millis(); info!( "[ARCHGW_REQ_ID:{}] TIME_TO_FIRST_TOKEN: {}ms", - self.context_id, duration_ms + self.request_identifier(), + duration_ms ); self.ttft_duration = Some(duration); self.metrics.time_to_first_token.record(duration_ms as u64); @@ -234,7 +251,8 @@ impl StreamContext { Err(e) => { warn!( "[ARCHGW_REQ_ID:{}] TIME_MEASUREMENT_ERROR: {:?}", - self.context_id, e + self.request_identifier(), + e ); } } @@ -249,7 +267,9 @@ impl StreamContext { let duration_ms = duration.as_millis(); info!( "[ARCHGW_REQ_ID:{}] REQUEST_COMPLETE: latency={}ms tokens={}", - self.context_id, duration_ms, self.response_tokens + self.request_identifier(), + duration_ms, + self.response_tokens ); // Record the latency to the latency histogram self.metrics.request_latency.record(duration_ms as u64); @@ -263,7 +283,7 @@ impl StreamContext { info!( "[ARCHGW_REQ_ID:{}] TOKEN_THROUGHPUT: time_per_token={}ms tokens_per_second={}", - self.context_id, + self.request_identifier(), tpot, 1000 / tpot ); @@ -322,14 +342,16 @@ impl StreamContext { let chunk_size = body_size; debug!( "[ARCHGW_REQ_ID:{}] UPSTREAM_RESPONSE_CHUNK: streaming=true chunk_size={}", - self.context_id, chunk_size + self.request_identifier(), + chunk_size ); let streaming_chunk = match self.get_http_response_body(0, chunk_size) { Some(chunk) => chunk, None => { warn!( "[ARCHGW_REQ_ID:{}] UPSTREAM_RESPONSE_ERROR: empty chunk, size={}", - self.context_id, chunk_size + self.request_identifier(), + chunk_size ); return Err(Action::Continue); } @@ -338,7 +360,7 @@ impl StreamContext { if streaming_chunk.len() != chunk_size { warn!( "[ARCHGW_REQ_ID:{}] UPSTREAM_RESPONSE_MISMATCH: expected={} actual={}", - self.context_id, + self.request_identifier(), chunk_size, streaming_chunk.len() ); @@ -350,7 +372,8 @@ impl StreamContext { } debug!( "[ARCHGW_REQ_ID:{}] UPSTREAM_RESPONSE_COMPLETE: streaming=false body_size={}", - self.context_id, body_size + self.request_identifier(), + body_size ); match self.get_http_response_body(0, body_size) { Some(body) => Ok(body), @@ -365,7 +388,7 @@ impl StreamContext { fn debug_log_body(&self, body: &[u8]) { debug!( "[ARCHGW_REQ_ID:{}] UPSTREAM_RAW_RESPONSE: body_size={} content={}", - self.context_id, + self.request_identifier(), body.len(), String::from_utf8_lossy(body) ); @@ -378,7 +401,7 @@ impl StreamContext { ) -> Result, Action> { debug!( "[ARCHGW_REQ_ID:{}] STREAMING_PROCESS: provider_id={:?} chunk_size={}", - self.context_id, + self.request_identifier(), provider_id, body.len() ); @@ -420,7 +443,8 @@ impl StreamContext { if provider_response.is_final() { debug!( "[ARCHGW_REQ_ID:{}] STREAMING_FINAL_CHUNK: total_tokens={}", - self.context_id, self.response_tokens + self.request_identifier(), + self.response_tokens ); } @@ -429,7 +453,7 @@ impl StreamContext { self.response_tokens += estimated_tokens.max(1); debug!( "[ARCHGW_REQ_ID:{}] STREAMING_TOKEN_UPDATE: delta_chars={} estimated_tokens={} total_tokens={}", - self.context_id, + self.request_identifier(), content.len(), estimated_tokens.max(1), self.response_tokens @@ -439,7 +463,8 @@ impl StreamContext { Err(e) => { warn!( "[ARCHGW_REQ_ID:{}] STREAMING_CHUNK_ERROR: {}", - self.context_id, e + self.request_identifier(), + e ); return Err(Action::Continue); } @@ -467,7 +492,7 @@ impl StreamContext { ) -> Result, Action> { info!( "[ARCHGW_REQ_ID:{}] NON_STREAMING_PROCESS: provider_id={:?} body_size={}", - self.context_id, + self.request_identifier(), provider_id, body.len() ); @@ -479,7 +504,7 @@ impl StreamContext { Err(e) => { warn!( "[ARCHGW_REQ_ID:{}] UPSTREAM_RESPONSE_PARSE_ERROR: {} | body: {}", - self.context_id, + self.request_identifier(), e, String::from_utf8_lossy(body) ); @@ -494,7 +519,7 @@ impl StreamContext { None => { warn!( "[ARCHGW_REQ_ID:{}] UPSTREAM_RESPONSE_ERROR: missing client_api", - self.context_id + self.request_identifier() ); return Err(Action::Continue); } @@ -506,7 +531,7 @@ impl StreamContext { { info!( "[ARCHGW_REQ_ID:{}] RESPONSE_USAGE: prompt_tokens={} completion_tokens={} total_tokens={}", - self.context_id, + self.request_identifier(), prompt_tokens, completion_tokens, total_tokens @@ -515,7 +540,7 @@ impl StreamContext { } else { warn!( "[ARCHGW_REQ_ID:{}] RESPONSE_USAGE: no usage information found", - self.context_id + self.request_identifier() ); } // Serialize the normalized response back to JSON bytes @@ -523,7 +548,7 @@ impl StreamContext { Ok(bytes) => { debug!( "[ARCHGW_REQ_ID:{}] CLIENT_RESPONSE_PAYLOAD: {}", - self.context_id, + self.request_identifier(), String::from_utf8_lossy(&bytes) ); Ok(bytes) @@ -628,7 +653,9 @@ impl HttpContext for StreamContext { fn on_http_request_body(&mut self, body_size: usize, end_of_stream: bool) -> Action { debug!( "[ARCHGW_REQ_ID:{}] REQUEST_BODY_CHUNK: bytes={} end_stream={}", - self.context_id, body_size, end_of_stream + self.request_identifier(), + body_size, + end_of_stream ); // Let the client send the gateway all the data before sending to the LLM_provider. @@ -665,14 +692,14 @@ impl HttpContext for StreamContext { Some(the_client_api) => { info!( "[ARCHGW_REQ_ID:{}] CLIENT_REQUEST_RECEIVED: api={:?} body_size={}", - self.context_id, + self.request_identifier(), the_client_api, body_bytes.len() ); debug!( "[ARCHGW_REQ_ID:{}] CLIENT_REQUEST_PAYLOAD: {}", - self.context_id, + self.request_identifier(), String::from_utf8_lossy(&body_bytes) ); @@ -681,7 +708,7 @@ impl HttpContext for StreamContext { Err(e) => { warn!( "[ARCHGW_REQ_ID:{}] CLIENT_REQUEST_PARSE_ERROR: {} | body: {}", - self.context_id, + self.request_identifier(), e, String::from_utf8_lossy(&body_bytes) ); @@ -724,7 +751,7 @@ impl HttpContext for StreamContext { } else { warn!( "[ARCHGW_REQ_ID:{}] MODEL_RESOLUTION_ERROR: no model specified | req_model='{}' provider='{}' config_model={:?}", - self.context_id, + self.request_identifier(), model_requested, self.llm_provider().name, self.llm_provider().model @@ -753,7 +780,7 @@ impl HttpContext for StreamContext { info!( "[ARCHGW_REQ_ID:{}] MODEL_RESOLUTION: req_model='{}' -> resolved_model='{}' provider='{}' streaming={}", - self.context_id, + self.request_identifier(), model_requested, resolved_model, self.llm_provider().name, @@ -776,55 +803,56 @@ impl HttpContext for StreamContext { } // Convert chat completion request to llm provider specific request using provider interface - let serialized_body_bytes_upstream = match self.resolved_api.as_ref() { - Some(upstream) => { - info!( + let serialized_body_bytes_upstream = + match self.resolved_api.as_ref() { + Some(upstream) => { + info!( "[ARCHGW_REQ_ID:{}] UPSTREAM_TRANSFORM: client_api={:?} -> upstream_api={:?}", - self.context_id, self.client_api, upstream + self.request_identifier(), self.client_api, upstream ); - match ProviderRequestType::try_from((&deserialized_client_request, upstream)) { - Ok(request) => { - debug!( - "[ARCHGW_REQ_ID:{}] UPSTREAM_REQUEST_PAYLOAD: {}", - self.context_id, - String::from_utf8_lossy(&request.to_bytes().unwrap_or_default()) - ); + match ProviderRequestType::try_from((&deserialized_client_request, upstream)) { + Ok(request) => { + debug!( + "[ARCHGW_REQ_ID:{}] UPSTREAM_REQUEST_PAYLOAD: {}", + self.request_identifier(), + String::from_utf8_lossy(&request.to_bytes().unwrap_or_default()) + ); - match request.to_bytes() { - Ok(bytes) => bytes, - Err(e) => { - warn!("Failed to serialize request body: {}", e); - self.send_server_error( - ServerError::LogicError(format!( - "Request serialization error: {}", - e - )), - Some(StatusCode::BAD_REQUEST), - ); - return Action::Pause; + match request.to_bytes() { + Ok(bytes) => bytes, + Err(e) => { + warn!("Failed to serialize request body: {}", e); + self.send_server_error( + ServerError::LogicError(format!( + "Request serialization error: {}", + e + )), + Some(StatusCode::BAD_REQUEST), + ); + return Action::Pause; + } } } - } - Err(e) => { - warn!("Failed to create provider request: {}", e); - self.send_server_error( - ServerError::LogicError(format!("Provider request error: {}", e)), - Some(StatusCode::BAD_REQUEST), - ); - return Action::Pause; + Err(e) => { + warn!("Failed to create provider request: {}", e); + self.send_server_error( + ServerError::LogicError(format!("Provider request error: {}", e)), + Some(StatusCode::BAD_REQUEST), + ); + return Action::Pause; + } } } - } - None => { - warn!("No upstream API resolved"); - self.send_server_error( - ServerError::LogicError("No upstream API resolved".into()), - Some(StatusCode::BAD_REQUEST), - ); - return Action::Pause; - } - }; + None => { + warn!("No upstream API resolved"); + self.send_server_error( + ServerError::LogicError("No upstream API resolved".into()), + Some(StatusCode::BAD_REQUEST), + ); + return Action::Pause; + } + }; self.set_http_request_body(0, body_size, &serialized_body_bytes_upstream); Action::Continue diff --git a/crates/llm_gateway/tests/integration.rs b/crates/llm_gateway/tests/integration.rs index 82ae8322..24301111 100644 --- a/crates/llm_gateway/tests/integration.rs +++ b/crates/llm_gateway/tests/integration.rs @@ -31,8 +31,8 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) { ) .returning(None) .expect_log( - Some(LogLevel::Debug), - Some("request received: llm provider hint: default, selected provider: open-ai-gpt-4"), + Some(LogLevel::Info), + None, // Dynamic request ID - could be context_id or x-request-id ) .expect_add_header_map_value( Some(MapType::HttpRequestHeaders), @@ -193,10 +193,7 @@ fn llm_gateway_successful_request_to_open_ai_chat_completions() { module .call_proxy_on_context_create(http_context, filter_context) - .expect_log( - Some(LogLevel::Trace), - Some("||| create_http_context called with context_id: 2 |||"), - ) + .expect_log(Some(LogLevel::Trace), None) .execute_and_expect(ReturnType::None) .unwrap(); @@ -211,15 +208,19 @@ fn llm_gateway_successful_request_to_open_ai_chat_completions() { chat_completions_request_body.len() as i32, true, ) - .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID - REQUEST_BODY_CHUNK .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) .returning(Some(chat_completions_request_body)) - .expect_log(Some(LogLevel::Info), None) - .expect_log(Some(LogLevel::Debug), None) - .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - CLIENT_REQUEST_RECEIVED + .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID - CLIENT_REQUEST_PAYLOAD + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - MODEL_RESOLUTION + .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4")) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - TOKEN_COUNT .expect_metric_record("input_sequence_length", 21) - .expect_log(Some(LogLevel::Debug), None) - .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - RATELIMIT_CHECK + .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=21")) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - UPSTREAM_TRANSFORM + .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID - UPSTREAM_REQUEST_PAYLOAD .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None) .execute_and_expect(ReturnType::Action(Action::Continue)) .unwrap(); @@ -263,15 +264,19 @@ fn llm_gateway_bad_request_to_open_ai_chat_completions() { incomplete_chat_completions_request_body.len() as i32, true, ) + .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID - REQUEST_BODY_CHUNK .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) .returning(Some(incomplete_chat_completions_request_body)) - .expect_log(Some(LogLevel::Debug), None) - .expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): gpt-1, model selected: gpt-4")) - .expect_log(Some(LogLevel::Debug), Some("getting token count model=gpt-4")) - .expect_log(Some(LogLevel::Debug), Some("Recorded input token count: 13")) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - CLIENT_REQUEST_RECEIVED + .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID - CLIENT_REQUEST_PAYLOAD + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - MODEL_RESOLUTION + .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4")) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - TOKEN_COUNT .expect_metric_record("input_sequence_length", 13) - .expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4")) - .expect_log(Some(LogLevel::Debug), Some(r#"Checking limit for provider=gpt-4, with selector=Header { key: "selector-key", value: "selector-value" }, consuming tokens=13"#)) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - RATELIMIT_CHECK + .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=13")) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - RATELIMIT_CHECK + .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}")) .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None) .execute_and_expect(ReturnType::Action(Action::Continue)) .unwrap(); @@ -322,16 +327,18 @@ fn llm_gateway_request_ratelimited() { chat_completions_request_body.len() as i32, true, ) - .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID) .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) .returning(Some(chat_completions_request_body)) // The actual call is not important in this test, we just need to grab the token_id - .expect_log(Some(LogLevel::Info), None) - .expect_log(Some(LogLevel::Debug), None) - .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) + .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] CLIENT_REQUEST_PAYLOAD: {\"messages\": [{\"role\": \"system\",\"content\": \"You are a helpful poetic assistant!, skilled in explaining complex programming concepts with creative flair. Be sure to be concise and to the point.\"},{\"role\": \"user\",\"content\": \"Compose a poem that explains the concept of recursion in programming. Compose a poem that explains the concept of recursion in programming. Compose a poem that explains the concept of recursion in programming. And also summarize it how a 4th graded would understand it. Compose a poem that explains the concept of recursion in programming. And also summarize it how a 4th graded would understand it.\"}],\"model\": \"gpt-4\"}")) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) + .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4")) + .expect_log(Some(LogLevel::Info), None)// Dynamic request ID) .expect_metric_record("input_sequence_length", 107) - .expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4")) - .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) + .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=107")) .expect_log(Some(LogLevel::Warn), Some(r#"server error occurred: exceeded limit provider=gpt-4, selector=Header { key: "selector-key", value: "selector-value" }, tokens_used=107"#)) .expect_send_local_response( Some(StatusCode::TOO_MANY_REQUESTS.as_u16().into()), @@ -376,16 +383,21 @@ fn llm_gateway_request_not_ratelimited() { chat_completions_request_body.len() as i32, true, ) - .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID) .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) .returning(Some(chat_completions_request_body)) // The actual call is not important in this test, we just need to grab the token_id .expect_log(Some(LogLevel::Info), None) - .expect_log(Some(LogLevel::Debug), Some("getting token count model=gpt-4")) - .expect_log(Some(LogLevel::Debug), Some("Recorded input token count: 29")) + // Dynamic request ID) + .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}")) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) + .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4")) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) .expect_metric_record("input_sequence_length", 29) - .expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4")) - .expect_log(Some(LogLevel::Debug), Some(r#"Checking limit for provider=gpt-4, with selector=Header { key: "selector-key", value: "selector-value" }, consuming tokens=29"#)) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) + .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29")) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) + .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}")) .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None) .execute_and_expect(ReturnType::Action(Action::Continue)) .unwrap(); @@ -423,16 +435,20 @@ fn llm_gateway_override_model_name() { chat_completions_request_body.len() as i32, true, ) + .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID) .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) .returning(Some(chat_completions_request_body)) // The actual call is not important in this test, we just need to grab the token_id - .expect_log(Some(LogLevel::Debug), None) - .expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): gpt-1, model selected: gpt-4")) - .expect_log(Some(LogLevel::Debug), Some("getting token count model=gpt-4")) - .expect_log(Some(LogLevel::Debug), Some("Recorded input token count: 29")) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) + .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}")) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) + .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4")) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) .expect_metric_record("input_sequence_length", 29) - .expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4")) - .expect_log(Some(LogLevel::Debug), Some(r#"Checking limit for provider=gpt-4, with selector=Header { key: "selector-key", value: "selector-value" }, consuming tokens=29"#)) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) + .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29")) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) + .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}")) .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None) .execute_and_expect(ReturnType::Action(Action::Continue)) .unwrap(); @@ -470,19 +486,23 @@ fn llm_gateway_override_use_default_model() { chat_completions_request_body.len() as i32, true, ) - .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID) .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) .returning(Some(chat_completions_request_body)) // The actual call is not important in this test, we just need to grab the token_id + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) + .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}")) .expect_log( Some(LogLevel::Info), - Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): gpt-1, model selected: gpt-4"), + None // Dynamic request ID, ) - .expect_log(Some(LogLevel::Debug), Some("getting token count model=gpt-4")) - .expect_log(Some(LogLevel::Debug), Some("Recorded input token count: 29")) + .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4")) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) .expect_metric_record("input_sequence_length", 29) - .expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4")) - .expect_log(Some(LogLevel::Debug), Some(r#"Checking limit for provider=gpt-4, with selector=Header { key: "selector-key", value: "selector-value" }, consuming tokens=29"#)) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) + .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29")) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) + .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}")) .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None) .execute_and_expect(ReturnType::Action(Action::Continue)) .unwrap(); @@ -520,16 +540,21 @@ fn llm_gateway_override_use_model_name_none() { chat_completions_request_body.len() as i32, true, ) + .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID) .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody)) .returning(Some(chat_completions_request_body)) // The actual call is not important in this test, we just need to grab the token_id - .expect_log(Some(LogLevel::Debug), None) - .expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): none, model selected: gpt-4")) - .expect_log(Some(LogLevel::Debug), Some("getting token count model=gpt-4")) - .expect_log(Some(LogLevel::Debug), Some("Recorded input token count: 29")) + .expect_log(Some(LogLevel::Info), None) + // Dynamic request ID) + .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] CLIENT_REQUEST_PAYLOAD: {\"model\":\"none\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}")) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) + .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4")) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) .expect_metric_record("input_sequence_length", 29) - .expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4")) - .expect_log(Some(LogLevel::Debug), Some(r#"Checking limit for provider=gpt-4, with selector=Header { key: "selector-key", value: "selector-value" }, consuming tokens=29"#)) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) + .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29")) + .expect_log(Some(LogLevel::Info), None) // Dynamic request ID) + .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}")) .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None) .execute_and_expect(ReturnType::Action(Action::Continue)) .unwrap();