fixing comments from PR

This commit is contained in:
Salman Paracha 2025-09-09 15:39:56 -07:00
parent 1bacf9fa69
commit 788ff87a0c
9 changed files with 102 additions and 103 deletions

View file

@ -4,6 +4,8 @@ use bytes::Bytes;
use common::configuration::ModelUsagePreference;
use common::consts::ARCH_PROVIDER_HINT_HEADER;
use hermesllm::apis::openai::ChatCompletionsRequest;
use hermesllm::clients::SupportedAPIs;
use hermesllm::ProviderRequestType;
use http_body_util::combinators::BoxBody;
use http_body_util::{BodyExt, Full, StreamBody};
use hyper::body::Frame;
@ -33,54 +35,42 @@ pub async fn chat(
let chat_request_bytes = request.collect().await?.to_bytes();
debug!("Received request body (raw utf8): {}", String::from_utf8_lossy(&chat_request_bytes));
let chat_request_parsed = serde_json::from_slice::<serde_json::Value>(&chat_request_bytes)
.inspect_err(|err| {
warn!(
"Failed to parse request body as JSON: err: {}, str: {}",
err,
String::from_utf8_lossy(&chat_request_bytes)
)
})
.unwrap_or_else(|_| {
warn!(
"Failed to parse request body as JSON: {}",
String::from_utf8_lossy(&chat_request_bytes)
);
serde_json::Value::Null
});
if chat_request_parsed == serde_json::Value::Null {
warn!("Request body is not valid JSON");
let err_msg = "Request body is not valid JSON".to_string();
let mut bad_request = Response::new(full(err_msg));
*bad_request.status_mut() = StatusCode::BAD_REQUEST;
return Ok(bad_request);
}
let chat_completion_request: ChatCompletionsRequest =
serde_json::from_value(chat_request_parsed.clone()).unwrap();
// remove metadata from the request
let mut chat_request_user_preferences_removed = chat_request_parsed;
if let Some(metadata) = chat_request_user_preferences_removed.get_mut("metadata") {
if let Some(m) = metadata.as_object_mut() {
m.remove("archgw_preference_config");
debug!("Removed archgw_preference_config from metadata");
let provider_request = match ProviderRequestType::try_from((&chat_request_bytes[..], &SupportedAPIs::from_endpoint(request_path.as_str()).unwrap())) {
Ok(request) => request,
Err(err) => {
warn!("Failed to parse request as ProviderRequestType: {}", err);
let err_msg = format!("Failed to parse request: {}", err);
let mut bad_request = Response::new(full(err_msg));
*bad_request.status_mut() = StatusCode::BAD_REQUEST;
return Ok(bad_request);
}
};
// Convert to ChatCompletionsRequest regardless of input type
let chat_completions_request_for_arch_router: ChatCompletionsRequest =
match ProviderRequestType::try_from((provider_request, &SupportedAPIs::OpenAIChatCompletions(hermesllm::apis::OpenAIApi::ChatCompletions))) {
Ok(ProviderRequestType::ChatCompletionsRequest(req)) => req,
Ok(ProviderRequestType::MessagesRequest(_)) => {
// This should not happen after conversion to OpenAI format
warn!("Unexpected: got MessagesRequest after converting to OpenAI format");
let err_msg = "Request conversion failed".to_string();
let mut bad_request = Response::new(full(err_msg));
*bad_request.status_mut() = StatusCode::BAD_REQUEST;
return Ok(bad_request);
},
Err(err) => {
warn!("Failed to convert request to ChatCompletionsRequest: {}", err);
let err_msg = format!("Failed to convert request: {}", err);
let mut bad_request = Response::new(full(err_msg));
*bad_request.status_mut() = StatusCode::BAD_REQUEST;
return Ok(bad_request);
}
};
// if metadata is empty, remove it
if metadata.as_object().map_or(false, |m| m.is_empty()) {
chat_request_user_preferences_removed
.as_object_mut()
.map(|m| m.remove("metadata"));
debug!("Removed empty metadata from request");
}
}
debug!(
"[BRIGHTSTAFF -> ARCH_ROUTER] REQ: {}",
&serde_json::to_string(&chat_completion_request).unwrap()
&serde_json::to_string(&chat_completions_request_for_arch_router).unwrap()
);
let trace_parent = request_headers
@ -89,7 +79,7 @@ pub async fn chat(
.map(|(_, value)| value.to_str().unwrap_or_default().to_string());
let usage_preferences_str: Option<String> =
chat_completion_request.metadata.and_then(|metadata| {
chat_completions_request_for_arch_router.metadata.as_ref().and_then(|metadata| {
metadata
.get("archgw_preference_config")
.map(|value| value.to_string())
@ -100,7 +90,7 @@ pub async fn chat(
.and_then(|s| serde_yaml::from_str(s).ok());
let latest_message_for_log =
chat_completion_request
chat_completions_request_for_arch_router
.messages
.last()
.map_or("None".to_string(), |msg| {
@ -125,7 +115,7 @@ pub async fn chat(
let model_name = match router_service
.determine_route(
&chat_completion_request.messages,
&chat_completions_request_for_arch_router.messages,
trace_parent.clone(),
usage_preferences,
)
@ -136,9 +126,9 @@ pub async fn chat(
None => {
debug!(
"No route determined, using default model from request: {}",
chat_completion_request.model
chat_completions_request_for_arch_router.model
);
chat_completion_request.model.clone()
chat_completions_request_for_arch_router.model.clone()
}
},
Err(err) => {
@ -166,6 +156,19 @@ pub async fn chat(
);
}
// remove metadata from the request for downstream calls
let mut chat_request_user_preferences_removed = chat_completions_request_for_arch_router.clone();
if let Some(ref mut metadata) = chat_request_user_preferences_removed.metadata {
metadata.remove("archgw_preference_config");
debug!("Removed archgw_preference_config from metadata");
// if metadata is empty, remove it
if metadata.is_empty() {
chat_request_user_preferences_removed.metadata = None;
debug!("Removed empty metadata from request");
}
}
let chat_request_parsed_bytes =
serde_json::to_string(&chat_request_user_preferences_removed).unwrap();

View file

@ -33,8 +33,7 @@ pub fn get_llm_provider(
return provider;
}
//This is a fallback to the default provider if no specific provider is found.
//For example, if the client sends in gpt-4-1 and that's not configured in arch_config, we fall back to the default.
if llm_providers.default().is_some() {
return llm_providers.default().unwrap();
}

View file

@ -6,7 +6,7 @@ use std::collections::HashMap;
use super::ApiDefinition;
use crate::providers::request::{ProviderRequest, ProviderRequestError};
use crate::providers::response::ProviderStreamResponse;
use crate::providers::response::{ProviderResponse, ProviderStreamResponse};
use crate::clients::transformer::ExtractText;
use crate::{MESSAGES_PATH};
@ -416,11 +416,11 @@ impl TokenUsage for MessagesResponse {
}
}
impl MessagesResponse {
pub fn usage(&self) -> Option<&dyn TokenUsage> {
impl ProviderResponse for MessagesResponse {
fn usage(&self) -> Option<&dyn TokenUsage> {
Some(self)
}
pub fn extract_usage_counts(&self) -> Option<(usize, usize, usize)> {
fn extract_usage_counts(&self) -> Option<(usize, usize, usize)> {
Some((self.usage.input_tokens as usize, self.usage.output_tokens as usize, (self.usage.input_tokens + self.usage.output_tokens) as usize))
}
}

View file

@ -56,7 +56,7 @@ mod tests {
assert!(sse_event.data.as_ref().unwrap().contains("Hello"));
// Test that we can parse the event into a provider stream response
let transformed_event = SseEvent::try_from((&sse_event, &client_api, &upstream_api));
let transformed_event = SseEvent::try_from((sse_event, &client_api, &upstream_api));
if let Err(e) = &transformed_event {
println!("Transform error: {:?}", e);
}

View file

@ -119,7 +119,7 @@ pub struct SseEvent {
pub raw_line: String, // The complete line as received including "data: " prefix and "\n\n"
#[serde(skip_serializing, skip_deserializing)]
pub raw_line_transformed: String, // The complete line as received including "data: " prefix and "\n\n"
pub sse_transform_buffer: String, // The complete line as received including "data: " prefix and "\n\n"
#[serde(skip_serializing, skip_deserializing)]
pub provider_stream_response: Option<ProviderStreamResponseType>, // Parsed provider stream response object
@ -159,7 +159,7 @@ impl FromStr for SseEvent {
fn from_str(line: &str) -> Result<Self, Self::Err> {
if line.starts_with("data: ") {
let data = line[6..].to_string(); // Remove "data: " prefix
let data: String = line[6..].to_string(); // Remove "data: " prefix
if data.is_empty() {
return Err(SseParseError {
message: "Empty data field is not a valid SSE event".to_string(),
@ -168,9 +168,9 @@ impl FromStr for SseEvent {
Ok(SseEvent {
data: Some(data),
event: None,
raw_line: format!("{}\n\n", line),
raw_line_transformed: format!("{}\n\n", line),
provider_stream_response: None, // Will be populated later via TryFrom
raw_line: line.to_string(),
sse_transform_buffer: line.to_string(),
provider_stream_response: None,
})
} else if line.starts_with("event: ") { //used by Anthropic
let event_type = line[7..].to_string();
@ -182,8 +182,8 @@ impl FromStr for SseEvent {
Ok(SseEvent {
data: None,
event: Some(event_type),
raw_line: format!("{}\n\n", line),
raw_line_transformed: format!("{}\n\n", line),
raw_line: line.to_string(),
sse_transform_buffer: line.to_string(),
provider_stream_response: None,
})
} else {
@ -196,14 +196,14 @@ impl FromStr for SseEvent {
impl fmt::Display for SseEvent {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.raw_line_transformed)
write!(f, "{}", self.sse_transform_buffer)
}
}
// Into implementation to convert SseEvent to bytes for response buffer
impl Into<Vec<u8>> for SseEvent {
fn into(self) -> Vec<u8> {
format!("{}\n\n", self.raw_line_transformed).into_bytes()
format!("{}\n\n", self.sse_transform_buffer).into_bytes()
}
}
@ -280,20 +280,20 @@ impl TryFrom<(&[u8], &SupportedAPIs, &SupportedAPIs)> for ProviderStreamResponse
}
// TryFrom implementation to convert raw bytes to SseEvent with parsed provider response
impl TryFrom<(&SseEvent, &SupportedAPIs, &SupportedAPIs)> for SseEvent {
impl TryFrom<(SseEvent, &SupportedAPIs, &SupportedAPIs)> for SseEvent {
type Error = Box<dyn std::error::Error + Send + Sync>;
fn try_from((sse_event, client_api, upstream_api): (&SseEvent, &SupportedAPIs, &SupportedAPIs)) -> Result<Self, Self::Error> {
fn try_from((sse_event, client_api, upstream_api): (SseEvent, &SupportedAPIs, &SupportedAPIs)) -> Result<Self, Self::Error> {
// Create a new transformed event based on the original
let mut transformed_event = sse_event.clone();
let mut transformed_event = sse_event;
// If not [DONE] and has data, parse the data as a provider stream response (business logic layer)
if !transformed_event.is_done() && sse_event.data.is_some() {
let data_str = sse_event.data.as_ref().unwrap();
if !transformed_event.is_done() && transformed_event.data.is_some() {
let data_str = transformed_event.data.as_ref().unwrap();
let data_bytes = data_str.as_bytes();
let transformed_response = ProviderStreamResponseType::try_from((data_bytes, client_api, upstream_api))?;
let transformed_json = serde_json::to_string(&transformed_response)?;
transformed_event.raw_line_transformed = format!("data: {}\n\n", transformed_json);
transformed_event.sse_transform_buffer = format!("data: {}\n\n", transformed_json);
transformed_event.provider_stream_response = Some(transformed_response);
}
@ -318,10 +318,10 @@ impl TryFrom<(&SseEvent, &SupportedAPIs, &SupportedAPIs)> for SseEvent {
}
});
// Format as proper SSE: MessageStart first, then ContentBlockStart
transformed_event.raw_line_transformed = format!(
transformed_event.sse_transform_buffer = format!(
"event: {}\n{}\nevent: content_block_start\ndata: {}\n\n",
event_type,
transformed_event.raw_line_transformed,
transformed_event.sse_transform_buffer,
content_block_start_json,
);
} else if event_type == "message_delta" {
@ -330,14 +330,14 @@ impl TryFrom<(&SseEvent, &SupportedAPIs, &SupportedAPIs)> for SseEvent {
"index": 0
});
// Format as proper SSE: ContentBlockStop first, then MessageDelta
transformed_event.raw_line_transformed = format!(
transformed_event.sse_transform_buffer = format!(
"event: content_block_stop\ndata: {}\n\nevent: {}\n{}",
content_block_stop_json,
event_type,
transformed_event.raw_line_transformed
transformed_event.sse_transform_buffer
);
} else {
transformed_event.raw_line_transformed = format!("event: {}\n{}", event_type, transformed_event.raw_line_transformed);
transformed_event.sse_transform_buffer = format!("event: {}\n{}", event_type, transformed_event.sse_transform_buffer);
}
}
// If event_type is None, we just keep the data line as-is without an event line
@ -345,8 +345,8 @@ impl TryFrom<(&SseEvent, &SupportedAPIs, &SupportedAPIs)> for SseEvent {
}
}
(SupportedAPIs::OpenAIChatCompletions(_), SupportedAPIs::AnthropicMessagesAPI(_)) => {
if sse_event.is_event_only() && sse_event.event.is_some() {
transformed_event.raw_line_transformed = format!("\n"); // suppress the event upstream for OpenAI
if transformed_event.is_event_only() && transformed_event.event.is_some() {
transformed_event.sse_transform_buffer = format!("\n"); // suppress the event upstream for OpenAI
}
}
}
@ -585,11 +585,11 @@ mod tests {
#[test]
fn test_sse_event_parsing() {
// Test valid SSE data line
let line = r#"data: {"id":"test","object":"chat.completion.chunk"}"#;
let line = "data: {\"id\":\"test\",\"object\":\"chat.completion.chunk\"}\n\n";
let event: Result<SseEvent, _> = line.parse();
assert!(event.is_ok());
let event = event.unwrap();
assert_eq!(event.data, Some(r#"{"id":"test","object":"chat.completion.chunk"}"#.to_string()));
assert_eq!(event.data, Some("{\"id\":\"test\",\"object\":\"chat.completion.chunk\"}\n\n".to_string()));
// Test conversion back to line using Display trait
let wire_format = event.to_string();
@ -626,7 +626,7 @@ mod tests {
raw_line: r#"data: {"id":"test","object":"chat.completion.chunk"}
"#.to_string(),
raw_line_transformed: r#"data: {"id":"test","object":"chat.completion.chunk"}
sse_transform_buffer: r#"data: {"id":"test","object":"chat.completion.chunk"}
"#.to_string(),
provider_stream_response: None,
@ -654,7 +654,7 @@ mod tests {
data: Some(r#"{"type": "ping"}"#.to_string()),
event: None,
raw_line: r#"data: {"type": "ping"}"#.to_string(),
raw_line_transformed: r#"data: {"type": "ping"}"#.to_string(),
sse_transform_buffer: r#"data: {"type": "ping"}"#.to_string(),
provider_stream_response: None,
};
assert!(ping_event.should_skip());
@ -665,7 +665,7 @@ mod tests {
data: Some(r#"{"id": "test", "object": "chat.completion.chunk"}"#.to_string()),
event: Some("content_block_delta".to_string()),
raw_line: r#"data: {"id": "test", "object": "chat.completion.chunk"}"#.to_string(),
raw_line_transformed: r#"data: {"id": "test", "object": "chat.completion.chunk"}"#.to_string(),
sse_transform_buffer: r#"data: {"id": "test", "object": "chat.completion.chunk"}"#.to_string(),
provider_stream_response: None,
};
assert!(!normal_event.should_skip());
@ -676,7 +676,7 @@ mod tests {
data: Some("[DONE]".to_string()),
event: None,
raw_line: "data: [DONE]".to_string(),
raw_line_transformed: "data: [DONE]".to_string(),
sse_transform_buffer: "data: [DONE]".to_string(),
provider_stream_response: None,
};
assert!(!done_event.should_skip());

View file

@ -89,7 +89,6 @@ impl RootContext for FilterContext {
);
Some(Box::new(StreamContext::new(
context_id,
Rc::clone(&self.metrics),
Rc::clone(
self.llm_providers

View file

@ -26,7 +26,6 @@ use hermesllm::providers::response::{ProviderResponse, SseEvent, SseStreamIter};
use hermesllm::{ProviderId, ProviderRequest, ProviderRequestType, ProviderResponseType};
pub struct StreamContext {
context_id: u32,
metrics: Rc<Metrics>,
ratelimit_selector: Option<Header>,
streaming_response: bool,
@ -50,14 +49,12 @@ pub struct StreamContext {
impl StreamContext {
pub fn new(
context_id: u32,
metrics: Rc<Metrics>,
llm_providers: Rc<LlmProviders>,
traces_queue: Arc<Mutex<VecDeque<TraceData>>>,
overrides: Rc<Option<Overrides>>,
) -> Self {
StreamContext {
context_id,
metrics,
overrides,
ratelimit_selector: None,
@ -79,13 +76,13 @@ impl StreamContext {
}
/// Returns the appropriate request identifier for logging.
/// Uses request_id (from x-request-id header) when available, otherwise falls back to context_id.
/// Uses request_id (from x-request-id header) when available, otherwise returns a literal indicating no request ID.
fn request_identifier(&self) -> String {
self.request_id
.as_ref()
.filter(|id| !id.is_empty()) // Filter out empty strings
.map(|id| id.clone())
.unwrap_or_else(|| self.context_id.to_string())
.unwrap_or_else(|| "NO_REQUEST_ID".to_string())
}
fn llm_provider(&self) -> &LlmProvider {
self.llm_provider
@ -145,14 +142,14 @@ impl StreamContext {
Some(SupportedAPIs::AnthropicMessagesAPI(_)) => {
// Anthropic API requires x-api-key and anthropic-version headers
// Remove any existing Authorization header since Anthropic doesn't use it
self.set_http_request_header("Authorization", None);
self.remove_http_request_header("Authorization");
self.set_http_request_header("x-api-key", Some(llm_provider_api_key_value));
self.set_http_request_header("anthropic-version", Some("2023-06-01"));
}
Some(SupportedAPIs::OpenAIChatCompletions(_)) | None => {
// OpenAI and default: use Authorization Bearer token
// Remove any existing x-api-key header since OpenAI doesn't use it
self.set_http_request_header("x-api-key", None);
self.remove_http_request_header("x-api-key");
let authorization_header_value = format!("Bearer {}", llm_provider_api_key_value);
self.set_http_request_header("Authorization", Some(&authorization_header_value));
}
@ -430,7 +427,7 @@ impl StreamContext {
for sse_event in sse_iter {
// Transform event if upstream API != client API
let transformed_event: SseEvent =
match SseEvent::try_from((&sse_event, &client_api, &upstream_api)) {
match SseEvent::try_from((sse_event, &client_api, &upstream_api)) {
Ok(event) => event,
Err(e) => {
warn!("Failed to transform SSE event: {}", e);

View file

@ -40,7 +40,7 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) {
Some("openai"),
)
.expect_remove_header_map_value(Some(MapType::HttpRequestHeaders), Some("x-api-key"))
.expect_add_header_map_value(
.expect_replace_header_map_value(
Some(MapType::HttpRequestHeaders),
Some("Authorization"),
Some("Bearer secret_key"),
@ -277,7 +277,7 @@ fn llm_gateway_bad_request_to_open_ai_chat_completions() {
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID - RATELIMIT_CHECK
.expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=13"))
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID - RATELIMIT_CHECK
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
.expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
.execute_and_expect(ReturnType::Action(Action::Continue))
.unwrap();
@ -333,7 +333,7 @@ fn llm_gateway_request_ratelimited() {
.returning(Some(chat_completions_request_body))
// The actual call is not important in this test, we just need to grab the token_id
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] CLIENT_REQUEST_PAYLOAD: {\"messages\": [{\"role\": \"system\",\"content\": \"You are a helpful poetic assistant!, skilled in explaining complex programming concepts with creative flair. Be sure to be concise and to the point.\"},{\"role\": \"user\",\"content\": \"Compose a poem that explains the concept of recursion in programming. Compose a poem that explains the concept of recursion in programming. Compose a poem that explains the concept of recursion in programming. And also summarize it how a 4th graded would understand it. Compose a poem that explains the concept of recursion in programming. And also summarize it how a 4th graded would understand it.\"}],\"model\": \"gpt-4\"}"))
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] CLIENT_REQUEST_PAYLOAD: {\"messages\": [{\"role\": \"system\",\"content\": \"You are a helpful poetic assistant!, skilled in explaining complex programming concepts with creative flair. Be sure to be concise and to the point.\"},{\"role\": \"user\",\"content\": \"Compose a poem that explains the concept of recursion in programming. Compose a poem that explains the concept of recursion in programming. Compose a poem that explains the concept of recursion in programming. And also summarize it how a 4th graded would understand it. Compose a poem that explains the concept of recursion in programming. And also summarize it how a 4th graded would understand it.\"}],\"model\": \"gpt-4\"}"))
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
.expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
.expect_log(Some(LogLevel::Info), None)// Dynamic request ID)
@ -390,7 +390,7 @@ fn llm_gateway_request_not_ratelimited() {
// The actual call is not important in this test, we just need to grab the token_id
.expect_log(Some(LogLevel::Info), None)
// Dynamic request ID)
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}"))
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}"))
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
.expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
@ -398,7 +398,7 @@ fn llm_gateway_request_not_ratelimited() {
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
.expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29"))
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
.expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
.execute_and_expect(ReturnType::Action(Action::Continue))
.unwrap();
@ -441,7 +441,7 @@ fn llm_gateway_override_model_name() {
.returning(Some(chat_completions_request_body))
// The actual call is not important in this test, we just need to grab the token_id
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}"))
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}"))
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
.expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
@ -449,7 +449,7 @@ fn llm_gateway_override_model_name() {
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
.expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29"))
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
.expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
.execute_and_expect(ReturnType::Action(Action::Continue))
.unwrap();
@ -492,7 +492,7 @@ fn llm_gateway_override_use_default_model() {
.returning(Some(chat_completions_request_body))
// The actual call is not important in this test, we just need to grab the token_id
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}"))
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}"))
.expect_log(
Some(LogLevel::Info),
None // Dynamic request ID,
@ -503,7 +503,7 @@ fn llm_gateway_override_use_default_model() {
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
.expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29"))
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
.expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
.execute_and_expect(ReturnType::Action(Action::Continue))
.unwrap();
@ -547,7 +547,7 @@ fn llm_gateway_override_use_model_name_none() {
// The actual call is not important in this test, we just need to grab the token_id
.expect_log(Some(LogLevel::Info), None)
// Dynamic request ID)
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] CLIENT_REQUEST_PAYLOAD: {\"model\":\"none\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}"))
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] CLIENT_REQUEST_PAYLOAD: {\"model\":\"none\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}"))
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
.expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
@ -555,7 +555,7 @@ fn llm_gateway_override_use_model_name_none() {
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
.expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29"))
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
.expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
.execute_and_expect(ReturnType::Action(Action::Continue))
.unwrap();

View file

@ -48,6 +48,7 @@ cd ../../
archgw build
cd -
# Once we build archgw we have to install the dependencies again to a new virtual environment.
poetry install
log startup arch gateway with function calling demo