fixed integration tests and cleaned up logs

This commit is contained in:
Salman Paracha 2025-09-04 21:49:54 -07:00
parent ee52c608f7
commit 00c543667d
4 changed files with 172 additions and 119 deletions

View file

@ -9,7 +9,7 @@ stdout_logfile_maxbytes=0
stderr_logfile_maxbytes=0
[program:envoy]
command=/bin/sh -c "python /app/config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:debug 2>&1 | tee /var/log//envoy.log"
command=/bin/sh -c "python /app/config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:debug --log-format '[%%Y-%%m-%%d %%T.%%e][%%l] %%v' 2>&1 | tee /var/log//envoy.log"
stdout_logfile=/dev/stdout
redirect_stderr=true
stdout_logfile_maxbytes=0

View file

@ -2,7 +2,7 @@ use log::debug;
#[allow(dead_code)]
pub fn token_count(model_name: &str, text: &str) -> Result<usize, String> {
debug!("getting token count model={}", model_name);
debug!("TOKENIZER: computing token count for model={}", model_name);
//HACK: add support for tokenizing mistral and other models
//filed issue https://github.com/katanemo/arch/issues/222

View file

@ -77,6 +77,16 @@ impl StreamContext {
user_message: None,
}
}
/// Returns the appropriate request identifier for logging.
/// Uses request_id (from x-request-id header) when available, otherwise falls back to context_id.
fn request_identifier(&self) -> String {
self.request_id
.as_ref()
.filter(|id| !id.is_empty()) // Filter out empty strings
.map(|id| id.clone())
.unwrap_or_else(|| self.context_id.to_string())
}
fn llm_provider(&self) -> &LlmProvider {
self.llm_provider
.as_ref()
@ -111,7 +121,7 @@ impl StreamContext {
info!(
"[ARCHGW_REQ_ID:{}] PROVIDER_SELECTION: hint='{}' -> selected='{}'",
self.context_id,
self.request_identifier(),
self.get_http_request_header(ARCH_PROVIDER_HINT_HEADER)
.unwrap_or("none".to_string()),
self.llm_provider.as_ref().unwrap().name
@ -186,7 +196,9 @@ impl StreamContext {
info!(
"[ARCHGW_REQ_ID:{}] TOKEN_COUNT: model='{}' input_tokens={}",
self.context_id, model, token_count
self.request_identifier(),
model,
token_count
);
// Record the token count to metrics.
@ -198,7 +210,10 @@ impl StreamContext {
if let Some(selector) = self.ratelimit_selector.take() {
info!(
"[ARCHGW_REQ_ID:{}] RATELIMIT_CHECK: model='{}' selector='{}:{}'",
self.context_id, model, selector.key, selector.value
self.request_identifier(),
model,
selector.key,
selector.value
);
ratelimit::ratelimits(None).read().unwrap().check_limit(
model.to_owned(),
@ -208,7 +223,8 @@ impl StreamContext {
} else {
debug!(
"[ARCHGW_REQ_ID:{}] RATELIMIT_SKIP: model='{}' (no selector)",
self.context_id, model
self.request_identifier(),
model
);
}
@ -226,7 +242,8 @@ impl StreamContext {
let duration_ms = duration.as_millis();
info!(
"[ARCHGW_REQ_ID:{}] TIME_TO_FIRST_TOKEN: {}ms",
self.context_id, duration_ms
self.request_identifier(),
duration_ms
);
self.ttft_duration = Some(duration);
self.metrics.time_to_first_token.record(duration_ms as u64);
@ -234,7 +251,8 @@ impl StreamContext {
Err(e) => {
warn!(
"[ARCHGW_REQ_ID:{}] TIME_MEASUREMENT_ERROR: {:?}",
self.context_id, e
self.request_identifier(),
e
);
}
}
@ -249,7 +267,9 @@ impl StreamContext {
let duration_ms = duration.as_millis();
info!(
"[ARCHGW_REQ_ID:{}] REQUEST_COMPLETE: latency={}ms tokens={}",
self.context_id, duration_ms, self.response_tokens
self.request_identifier(),
duration_ms,
self.response_tokens
);
// Record the latency to the latency histogram
self.metrics.request_latency.record(duration_ms as u64);
@ -263,7 +283,7 @@ impl StreamContext {
info!(
"[ARCHGW_REQ_ID:{}] TOKEN_THROUGHPUT: time_per_token={}ms tokens_per_second={}",
self.context_id,
self.request_identifier(),
tpot,
1000 / tpot
);
@ -322,14 +342,16 @@ impl StreamContext {
let chunk_size = body_size;
debug!(
"[ARCHGW_REQ_ID:{}] UPSTREAM_RESPONSE_CHUNK: streaming=true chunk_size={}",
self.context_id, chunk_size
self.request_identifier(),
chunk_size
);
let streaming_chunk = match self.get_http_response_body(0, chunk_size) {
Some(chunk) => chunk,
None => {
warn!(
"[ARCHGW_REQ_ID:{}] UPSTREAM_RESPONSE_ERROR: empty chunk, size={}",
self.context_id, chunk_size
self.request_identifier(),
chunk_size
);
return Err(Action::Continue);
}
@ -338,7 +360,7 @@ impl StreamContext {
if streaming_chunk.len() != chunk_size {
warn!(
"[ARCHGW_REQ_ID:{}] UPSTREAM_RESPONSE_MISMATCH: expected={} actual={}",
self.context_id,
self.request_identifier(),
chunk_size,
streaming_chunk.len()
);
@ -350,7 +372,8 @@ impl StreamContext {
}
debug!(
"[ARCHGW_REQ_ID:{}] UPSTREAM_RESPONSE_COMPLETE: streaming=false body_size={}",
self.context_id, body_size
self.request_identifier(),
body_size
);
match self.get_http_response_body(0, body_size) {
Some(body) => Ok(body),
@ -365,7 +388,7 @@ impl StreamContext {
fn debug_log_body(&self, body: &[u8]) {
debug!(
"[ARCHGW_REQ_ID:{}] UPSTREAM_RAW_RESPONSE: body_size={} content={}",
self.context_id,
self.request_identifier(),
body.len(),
String::from_utf8_lossy(body)
);
@ -378,7 +401,7 @@ impl StreamContext {
) -> Result<Vec<u8>, Action> {
debug!(
"[ARCHGW_REQ_ID:{}] STREAMING_PROCESS: provider_id={:?} chunk_size={}",
self.context_id,
self.request_identifier(),
provider_id,
body.len()
);
@ -420,7 +443,8 @@ impl StreamContext {
if provider_response.is_final() {
debug!(
"[ARCHGW_REQ_ID:{}] STREAMING_FINAL_CHUNK: total_tokens={}",
self.context_id, self.response_tokens
self.request_identifier(),
self.response_tokens
);
}
@ -429,7 +453,7 @@ impl StreamContext {
self.response_tokens += estimated_tokens.max(1);
debug!(
"[ARCHGW_REQ_ID:{}] STREAMING_TOKEN_UPDATE: delta_chars={} estimated_tokens={} total_tokens={}",
self.context_id,
self.request_identifier(),
content.len(),
estimated_tokens.max(1),
self.response_tokens
@ -439,7 +463,8 @@ impl StreamContext {
Err(e) => {
warn!(
"[ARCHGW_REQ_ID:{}] STREAMING_CHUNK_ERROR: {}",
self.context_id, e
self.request_identifier(),
e
);
return Err(Action::Continue);
}
@ -467,7 +492,7 @@ impl StreamContext {
) -> Result<Vec<u8>, Action> {
info!(
"[ARCHGW_REQ_ID:{}] NON_STREAMING_PROCESS: provider_id={:?} body_size={}",
self.context_id,
self.request_identifier(),
provider_id,
body.len()
);
@ -479,7 +504,7 @@ impl StreamContext {
Err(e) => {
warn!(
"[ARCHGW_REQ_ID:{}] UPSTREAM_RESPONSE_PARSE_ERROR: {} | body: {}",
self.context_id,
self.request_identifier(),
e,
String::from_utf8_lossy(body)
);
@ -494,7 +519,7 @@ impl StreamContext {
None => {
warn!(
"[ARCHGW_REQ_ID:{}] UPSTREAM_RESPONSE_ERROR: missing client_api",
self.context_id
self.request_identifier()
);
return Err(Action::Continue);
}
@ -506,7 +531,7 @@ impl StreamContext {
{
info!(
"[ARCHGW_REQ_ID:{}] RESPONSE_USAGE: prompt_tokens={} completion_tokens={} total_tokens={}",
self.context_id,
self.request_identifier(),
prompt_tokens,
completion_tokens,
total_tokens
@ -515,7 +540,7 @@ impl StreamContext {
} else {
warn!(
"[ARCHGW_REQ_ID:{}] RESPONSE_USAGE: no usage information found",
self.context_id
self.request_identifier()
);
}
// Serialize the normalized response back to JSON bytes
@ -523,7 +548,7 @@ impl StreamContext {
Ok(bytes) => {
debug!(
"[ARCHGW_REQ_ID:{}] CLIENT_RESPONSE_PAYLOAD: {}",
self.context_id,
self.request_identifier(),
String::from_utf8_lossy(&bytes)
);
Ok(bytes)
@ -628,7 +653,9 @@ impl HttpContext for StreamContext {
fn on_http_request_body(&mut self, body_size: usize, end_of_stream: bool) -> Action {
debug!(
"[ARCHGW_REQ_ID:{}] REQUEST_BODY_CHUNK: bytes={} end_stream={}",
self.context_id, body_size, end_of_stream
self.request_identifier(),
body_size,
end_of_stream
);
// Let the client send the gateway all the data before sending to the LLM_provider.
@ -665,14 +692,14 @@ impl HttpContext for StreamContext {
Some(the_client_api) => {
info!(
"[ARCHGW_REQ_ID:{}] CLIENT_REQUEST_RECEIVED: api={:?} body_size={}",
self.context_id,
self.request_identifier(),
the_client_api,
body_bytes.len()
);
debug!(
"[ARCHGW_REQ_ID:{}] CLIENT_REQUEST_PAYLOAD: {}",
self.context_id,
self.request_identifier(),
String::from_utf8_lossy(&body_bytes)
);
@ -681,7 +708,7 @@ impl HttpContext for StreamContext {
Err(e) => {
warn!(
"[ARCHGW_REQ_ID:{}] CLIENT_REQUEST_PARSE_ERROR: {} | body: {}",
self.context_id,
self.request_identifier(),
e,
String::from_utf8_lossy(&body_bytes)
);
@ -724,7 +751,7 @@ impl HttpContext for StreamContext {
} else {
warn!(
"[ARCHGW_REQ_ID:{}] MODEL_RESOLUTION_ERROR: no model specified | req_model='{}' provider='{}' config_model={:?}",
self.context_id,
self.request_identifier(),
model_requested,
self.llm_provider().name,
self.llm_provider().model
@ -753,7 +780,7 @@ impl HttpContext for StreamContext {
info!(
"[ARCHGW_REQ_ID:{}] MODEL_RESOLUTION: req_model='{}' -> resolved_model='{}' provider='{}' streaming={}",
self.context_id,
self.request_identifier(),
model_requested,
resolved_model,
self.llm_provider().name,
@ -776,55 +803,56 @@ impl HttpContext for StreamContext {
}
// Convert chat completion request to llm provider specific request using provider interface
let serialized_body_bytes_upstream = match self.resolved_api.as_ref() {
Some(upstream) => {
info!(
let serialized_body_bytes_upstream =
match self.resolved_api.as_ref() {
Some(upstream) => {
info!(
"[ARCHGW_REQ_ID:{}] UPSTREAM_TRANSFORM: client_api={:?} -> upstream_api={:?}",
self.context_id, self.client_api, upstream
self.request_identifier(), self.client_api, upstream
);
match ProviderRequestType::try_from((&deserialized_client_request, upstream)) {
Ok(request) => {
debug!(
"[ARCHGW_REQ_ID:{}] UPSTREAM_REQUEST_PAYLOAD: {}",
self.context_id,
String::from_utf8_lossy(&request.to_bytes().unwrap_or_default())
);
match ProviderRequestType::try_from((&deserialized_client_request, upstream)) {
Ok(request) => {
debug!(
"[ARCHGW_REQ_ID:{}] UPSTREAM_REQUEST_PAYLOAD: {}",
self.request_identifier(),
String::from_utf8_lossy(&request.to_bytes().unwrap_or_default())
);
match request.to_bytes() {
Ok(bytes) => bytes,
Err(e) => {
warn!("Failed to serialize request body: {}", e);
self.send_server_error(
ServerError::LogicError(format!(
"Request serialization error: {}",
e
)),
Some(StatusCode::BAD_REQUEST),
);
return Action::Pause;
match request.to_bytes() {
Ok(bytes) => bytes,
Err(e) => {
warn!("Failed to serialize request body: {}", e);
self.send_server_error(
ServerError::LogicError(format!(
"Request serialization error: {}",
e
)),
Some(StatusCode::BAD_REQUEST),
);
return Action::Pause;
}
}
}
}
Err(e) => {
warn!("Failed to create provider request: {}", e);
self.send_server_error(
ServerError::LogicError(format!("Provider request error: {}", e)),
Some(StatusCode::BAD_REQUEST),
);
return Action::Pause;
Err(e) => {
warn!("Failed to create provider request: {}", e);
self.send_server_error(
ServerError::LogicError(format!("Provider request error: {}", e)),
Some(StatusCode::BAD_REQUEST),
);
return Action::Pause;
}
}
}
}
None => {
warn!("No upstream API resolved");
self.send_server_error(
ServerError::LogicError("No upstream API resolved".into()),
Some(StatusCode::BAD_REQUEST),
);
return Action::Pause;
}
};
None => {
warn!("No upstream API resolved");
self.send_server_error(
ServerError::LogicError("No upstream API resolved".into()),
Some(StatusCode::BAD_REQUEST),
);
return Action::Pause;
}
};
self.set_http_request_body(0, body_size, &serialized_body_bytes_upstream);
Action::Continue

View file

@ -31,8 +31,8 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) {
)
.returning(None)
.expect_log(
Some(LogLevel::Debug),
Some("request received: llm provider hint: default, selected provider: open-ai-gpt-4"),
Some(LogLevel::Info),
None, // Dynamic request ID - could be context_id or x-request-id
)
.expect_add_header_map_value(
Some(MapType::HttpRequestHeaders),
@ -193,10 +193,7 @@ fn llm_gateway_successful_request_to_open_ai_chat_completions() {
module
.call_proxy_on_context_create(http_context, filter_context)
.expect_log(
Some(LogLevel::Trace),
Some("||| create_http_context called with context_id: 2 |||"),
)
.expect_log(Some(LogLevel::Trace), None)
.execute_and_expect(ReturnType::None)
.unwrap();
@ -211,15 +208,19 @@ fn llm_gateway_successful_request_to_open_ai_chat_completions() {
chat_completions_request_body.len() as i32,
true,
)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None) // Dynamic request ID - REQUEST_BODY_CHUNK
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
.returning(Some(chat_completions_request_body))
.expect_log(Some(LogLevel::Info), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID - CLIENT_REQUEST_RECEIVED
.expect_log(Some(LogLevel::Debug), None) // Dynamic request ID - CLIENT_REQUEST_PAYLOAD
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID - MODEL_RESOLUTION
.expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID - TOKEN_COUNT
.expect_metric_record("input_sequence_length", 21)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID - RATELIMIT_CHECK
.expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=21"))
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID - UPSTREAM_TRANSFORM
.expect_log(Some(LogLevel::Debug), None) // Dynamic request ID - UPSTREAM_REQUEST_PAYLOAD
.expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
.execute_and_expect(ReturnType::Action(Action::Continue))
.unwrap();
@ -263,15 +264,19 @@ fn llm_gateway_bad_request_to_open_ai_chat_completions() {
incomplete_chat_completions_request_body.len() as i32,
true,
)
.expect_log(Some(LogLevel::Debug), None) // Dynamic request ID - REQUEST_BODY_CHUNK
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
.returning(Some(incomplete_chat_completions_request_body))
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): gpt-1, model selected: gpt-4"))
.expect_log(Some(LogLevel::Debug), Some("getting token count model=gpt-4"))
.expect_log(Some(LogLevel::Debug), Some("Recorded input token count: 13"))
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID - CLIENT_REQUEST_RECEIVED
.expect_log(Some(LogLevel::Debug), None) // Dynamic request ID - CLIENT_REQUEST_PAYLOAD
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID - MODEL_RESOLUTION
.expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID - TOKEN_COUNT
.expect_metric_record("input_sequence_length", 13)
.expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4"))
.expect_log(Some(LogLevel::Debug), Some(r#"Checking limit for provider=gpt-4, with selector=Header { key: "selector-key", value: "selector-value" }, consuming tokens=13"#))
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID - RATELIMIT_CHECK
.expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=13"))
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID - RATELIMIT_CHECK
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
.expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
.execute_and_expect(ReturnType::Action(Action::Continue))
.unwrap();
@ -322,16 +327,18 @@ fn llm_gateway_request_ratelimited() {
chat_completions_request_body.len() as i32,
true,
)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None) // Dynamic request ID)
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
.returning(Some(chat_completions_request_body))
// The actual call is not important in this test, we just need to grab the token_id
.expect_log(Some(LogLevel::Info), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] CLIENT_REQUEST_PAYLOAD: {\"messages\": [{\"role\": \"system\",\"content\": \"You are a helpful poetic assistant!, skilled in explaining complex programming concepts with creative flair. Be sure to be concise and to the point.\"},{\"role\": \"user\",\"content\": \"Compose a poem that explains the concept of recursion in programming. Compose a poem that explains the concept of recursion in programming. Compose a poem that explains the concept of recursion in programming. And also summarize it how a 4th graded would understand it. Compose a poem that explains the concept of recursion in programming. And also summarize it how a 4th graded would understand it.\"}],\"model\": \"gpt-4\"}"))
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
.expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
.expect_log(Some(LogLevel::Info), None)// Dynamic request ID)
.expect_metric_record("input_sequence_length", 107)
.expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4"))
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
.expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=107"))
.expect_log(Some(LogLevel::Warn), Some(r#"server error occurred: exceeded limit provider=gpt-4, selector=Header { key: "selector-key", value: "selector-value" }, tokens_used=107"#))
.expect_send_local_response(
Some(StatusCode::TOO_MANY_REQUESTS.as_u16().into()),
@ -376,16 +383,21 @@ fn llm_gateway_request_not_ratelimited() {
chat_completions_request_body.len() as i32,
true,
)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None) // Dynamic request ID)
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
.returning(Some(chat_completions_request_body))
// The actual call is not important in this test, we just need to grab the token_id
.expect_log(Some(LogLevel::Info), None)
.expect_log(Some(LogLevel::Debug), Some("getting token count model=gpt-4"))
.expect_log(Some(LogLevel::Debug), Some("Recorded input token count: 29"))
// Dynamic request ID)
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}"))
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
.expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
.expect_metric_record("input_sequence_length", 29)
.expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4"))
.expect_log(Some(LogLevel::Debug), Some(r#"Checking limit for provider=gpt-4, with selector=Header { key: "selector-key", value: "selector-value" }, consuming tokens=29"#))
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
.expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29"))
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
.expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
.execute_and_expect(ReturnType::Action(Action::Continue))
.unwrap();
@ -423,16 +435,20 @@ fn llm_gateway_override_model_name() {
chat_completions_request_body.len() as i32,
true,
)
.expect_log(Some(LogLevel::Debug), None) // Dynamic request ID)
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
.returning(Some(chat_completions_request_body))
// The actual call is not important in this test, we just need to grab the token_id
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): gpt-1, model selected: gpt-4"))
.expect_log(Some(LogLevel::Debug), Some("getting token count model=gpt-4"))
.expect_log(Some(LogLevel::Debug), Some("Recorded input token count: 29"))
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}"))
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
.expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
.expect_metric_record("input_sequence_length", 29)
.expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4"))
.expect_log(Some(LogLevel::Debug), Some(r#"Checking limit for provider=gpt-4, with selector=Header { key: "selector-key", value: "selector-value" }, consuming tokens=29"#))
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
.expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29"))
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
.expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
.execute_and_expect(ReturnType::Action(Action::Continue))
.unwrap();
@ -470,19 +486,23 @@ fn llm_gateway_override_use_default_model() {
chat_completions_request_body.len() as i32,
true,
)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None) // Dynamic request ID)
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
.returning(Some(chat_completions_request_body))
// The actual call is not important in this test, we just need to grab the token_id
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}"))
.expect_log(
Some(LogLevel::Info),
Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): gpt-1, model selected: gpt-4"),
None // Dynamic request ID,
)
.expect_log(Some(LogLevel::Debug), Some("getting token count model=gpt-4"))
.expect_log(Some(LogLevel::Debug), Some("Recorded input token count: 29"))
.expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
.expect_metric_record("input_sequence_length", 29)
.expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4"))
.expect_log(Some(LogLevel::Debug), Some(r#"Checking limit for provider=gpt-4, with selector=Header { key: "selector-key", value: "selector-value" }, consuming tokens=29"#))
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
.expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29"))
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
.expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
.execute_and_expect(ReturnType::Action(Action::Continue))
.unwrap();
@ -520,16 +540,21 @@ fn llm_gateway_override_use_model_name_none() {
chat_completions_request_body.len() as i32,
true,
)
.expect_log(Some(LogLevel::Debug), None) // Dynamic request ID)
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
.returning(Some(chat_completions_request_body))
// The actual call is not important in this test, we just need to grab the token_id
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): none, model selected: gpt-4"))
.expect_log(Some(LogLevel::Debug), Some("getting token count model=gpt-4"))
.expect_log(Some(LogLevel::Debug), Some("Recorded input token count: 29"))
.expect_log(Some(LogLevel::Info), None)
// Dynamic request ID)
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] CLIENT_REQUEST_PAYLOAD: {\"model\":\"none\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}"))
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
.expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
.expect_metric_record("input_sequence_length", 29)
.expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4"))
.expect_log(Some(LogLevel::Debug), Some(r#"Checking limit for provider=gpt-4, with selector=Header { key: "selector-key", value: "selector-value" }, consuming tokens=29"#))
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
.expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29"))
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
.expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
.execute_and_expect(ReturnType::Action(Action::Continue))
.unwrap();