mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
fixing comments from PR
This commit is contained in:
parent
1bacf9fa69
commit
788ff87a0c
9 changed files with 102 additions and 103 deletions
|
|
@ -4,6 +4,8 @@ use bytes::Bytes;
|
|||
use common::configuration::ModelUsagePreference;
|
||||
use common::consts::ARCH_PROVIDER_HINT_HEADER;
|
||||
use hermesllm::apis::openai::ChatCompletionsRequest;
|
||||
use hermesllm::clients::SupportedAPIs;
|
||||
use hermesllm::ProviderRequestType;
|
||||
use http_body_util::combinators::BoxBody;
|
||||
use http_body_util::{BodyExt, Full, StreamBody};
|
||||
use hyper::body::Frame;
|
||||
|
|
@ -33,54 +35,42 @@ pub async fn chat(
|
|||
let chat_request_bytes = request.collect().await?.to_bytes();
|
||||
|
||||
debug!("Received request body (raw utf8): {}", String::from_utf8_lossy(&chat_request_bytes));
|
||||
|
||||
let chat_request_parsed = serde_json::from_slice::<serde_json::Value>(&chat_request_bytes)
|
||||
.inspect_err(|err| {
|
||||
warn!(
|
||||
"Failed to parse request body as JSON: err: {}, str: {}",
|
||||
err,
|
||||
String::from_utf8_lossy(&chat_request_bytes)
|
||||
)
|
||||
})
|
||||
.unwrap_or_else(|_| {
|
||||
warn!(
|
||||
"Failed to parse request body as JSON: {}",
|
||||
String::from_utf8_lossy(&chat_request_bytes)
|
||||
);
|
||||
serde_json::Value::Null
|
||||
});
|
||||
|
||||
if chat_request_parsed == serde_json::Value::Null {
|
||||
warn!("Request body is not valid JSON");
|
||||
let err_msg = "Request body is not valid JSON".to_string();
|
||||
let mut bad_request = Response::new(full(err_msg));
|
||||
*bad_request.status_mut() = StatusCode::BAD_REQUEST;
|
||||
return Ok(bad_request);
|
||||
}
|
||||
|
||||
let chat_completion_request: ChatCompletionsRequest =
|
||||
serde_json::from_value(chat_request_parsed.clone()).unwrap();
|
||||
|
||||
// remove metadata from the request
|
||||
let mut chat_request_user_preferences_removed = chat_request_parsed;
|
||||
if let Some(metadata) = chat_request_user_preferences_removed.get_mut("metadata") {
|
||||
if let Some(m) = metadata.as_object_mut() {
|
||||
m.remove("archgw_preference_config");
|
||||
debug!("Removed archgw_preference_config from metadata");
|
||||
let provider_request = match ProviderRequestType::try_from((&chat_request_bytes[..], &SupportedAPIs::from_endpoint(request_path.as_str()).unwrap())) {
|
||||
Ok(request) => request,
|
||||
Err(err) => {
|
||||
warn!("Failed to parse request as ProviderRequestType: {}", err);
|
||||
let err_msg = format!("Failed to parse request: {}", err);
|
||||
let mut bad_request = Response::new(full(err_msg));
|
||||
*bad_request.status_mut() = StatusCode::BAD_REQUEST;
|
||||
return Ok(bad_request);
|
||||
}
|
||||
};
|
||||
|
||||
// Convert to ChatCompletionsRequest regardless of input type
|
||||
let chat_completions_request_for_arch_router: ChatCompletionsRequest =
|
||||
match ProviderRequestType::try_from((provider_request, &SupportedAPIs::OpenAIChatCompletions(hermesllm::apis::OpenAIApi::ChatCompletions))) {
|
||||
Ok(ProviderRequestType::ChatCompletionsRequest(req)) => req,
|
||||
Ok(ProviderRequestType::MessagesRequest(_)) => {
|
||||
// This should not happen after conversion to OpenAI format
|
||||
warn!("Unexpected: got MessagesRequest after converting to OpenAI format");
|
||||
let err_msg = "Request conversion failed".to_string();
|
||||
let mut bad_request = Response::new(full(err_msg));
|
||||
*bad_request.status_mut() = StatusCode::BAD_REQUEST;
|
||||
return Ok(bad_request);
|
||||
},
|
||||
Err(err) => {
|
||||
warn!("Failed to convert request to ChatCompletionsRequest: {}", err);
|
||||
let err_msg = format!("Failed to convert request: {}", err);
|
||||
let mut bad_request = Response::new(full(err_msg));
|
||||
*bad_request.status_mut() = StatusCode::BAD_REQUEST;
|
||||
return Ok(bad_request);
|
||||
}
|
||||
};
|
||||
|
||||
// if metadata is empty, remove it
|
||||
if metadata.as_object().map_or(false, |m| m.is_empty()) {
|
||||
chat_request_user_preferences_removed
|
||||
.as_object_mut()
|
||||
.map(|m| m.remove("metadata"));
|
||||
debug!("Removed empty metadata from request");
|
||||
}
|
||||
}
|
||||
|
||||
debug!(
|
||||
"[BRIGHTSTAFF -> ARCH_ROUTER] REQ: {}",
|
||||
&serde_json::to_string(&chat_completion_request).unwrap()
|
||||
&serde_json::to_string(&chat_completions_request_for_arch_router).unwrap()
|
||||
);
|
||||
|
||||
let trace_parent = request_headers
|
||||
|
|
@ -89,7 +79,7 @@ pub async fn chat(
|
|||
.map(|(_, value)| value.to_str().unwrap_or_default().to_string());
|
||||
|
||||
let usage_preferences_str: Option<String> =
|
||||
chat_completion_request.metadata.and_then(|metadata| {
|
||||
chat_completions_request_for_arch_router.metadata.as_ref().and_then(|metadata| {
|
||||
metadata
|
||||
.get("archgw_preference_config")
|
||||
.map(|value| value.to_string())
|
||||
|
|
@ -100,7 +90,7 @@ pub async fn chat(
|
|||
.and_then(|s| serde_yaml::from_str(s).ok());
|
||||
|
||||
let latest_message_for_log =
|
||||
chat_completion_request
|
||||
chat_completions_request_for_arch_router
|
||||
.messages
|
||||
.last()
|
||||
.map_or("None".to_string(), |msg| {
|
||||
|
|
@ -125,7 +115,7 @@ pub async fn chat(
|
|||
|
||||
let model_name = match router_service
|
||||
.determine_route(
|
||||
&chat_completion_request.messages,
|
||||
&chat_completions_request_for_arch_router.messages,
|
||||
trace_parent.clone(),
|
||||
usage_preferences,
|
||||
)
|
||||
|
|
@ -136,9 +126,9 @@ pub async fn chat(
|
|||
None => {
|
||||
debug!(
|
||||
"No route determined, using default model from request: {}",
|
||||
chat_completion_request.model
|
||||
chat_completions_request_for_arch_router.model
|
||||
);
|
||||
chat_completion_request.model.clone()
|
||||
chat_completions_request_for_arch_router.model.clone()
|
||||
}
|
||||
},
|
||||
Err(err) => {
|
||||
|
|
@ -166,6 +156,19 @@ pub async fn chat(
|
|||
);
|
||||
}
|
||||
|
||||
// remove metadata from the request for downstream calls
|
||||
let mut chat_request_user_preferences_removed = chat_completions_request_for_arch_router.clone();
|
||||
if let Some(ref mut metadata) = chat_request_user_preferences_removed.metadata {
|
||||
metadata.remove("archgw_preference_config");
|
||||
debug!("Removed archgw_preference_config from metadata");
|
||||
|
||||
// if metadata is empty, remove it
|
||||
if metadata.is_empty() {
|
||||
chat_request_user_preferences_removed.metadata = None;
|
||||
debug!("Removed empty metadata from request");
|
||||
}
|
||||
}
|
||||
|
||||
let chat_request_parsed_bytes =
|
||||
serde_json::to_string(&chat_request_user_preferences_removed).unwrap();
|
||||
|
||||
|
|
|
|||
|
|
@ -33,8 +33,7 @@ pub fn get_llm_provider(
|
|||
return provider;
|
||||
}
|
||||
|
||||
//This is a fallback to the default provider if no specific provider is found.
|
||||
//For example, if the client sends in gpt-4-1 and that's not configured in arch_config, we fall back to the default.
|
||||
|
||||
if llm_providers.default().is_some() {
|
||||
return llm_providers.default().unwrap();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ use std::collections::HashMap;
|
|||
|
||||
use super::ApiDefinition;
|
||||
use crate::providers::request::{ProviderRequest, ProviderRequestError};
|
||||
use crate::providers::response::ProviderStreamResponse;
|
||||
use crate::providers::response::{ProviderResponse, ProviderStreamResponse};
|
||||
use crate::clients::transformer::ExtractText;
|
||||
use crate::{MESSAGES_PATH};
|
||||
|
||||
|
|
@ -416,11 +416,11 @@ impl TokenUsage for MessagesResponse {
|
|||
}
|
||||
}
|
||||
|
||||
impl MessagesResponse {
|
||||
pub fn usage(&self) -> Option<&dyn TokenUsage> {
|
||||
impl ProviderResponse for MessagesResponse {
|
||||
fn usage(&self) -> Option<&dyn TokenUsage> {
|
||||
Some(self)
|
||||
}
|
||||
pub fn extract_usage_counts(&self) -> Option<(usize, usize, usize)> {
|
||||
fn extract_usage_counts(&self) -> Option<(usize, usize, usize)> {
|
||||
Some((self.usage.input_tokens as usize, self.usage.output_tokens as usize, (self.usage.input_tokens + self.usage.output_tokens) as usize))
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ mod tests {
|
|||
assert!(sse_event.data.as_ref().unwrap().contains("Hello"));
|
||||
|
||||
// Test that we can parse the event into a provider stream response
|
||||
let transformed_event = SseEvent::try_from((&sse_event, &client_api, &upstream_api));
|
||||
let transformed_event = SseEvent::try_from((sse_event, &client_api, &upstream_api));
|
||||
if let Err(e) = &transformed_event {
|
||||
println!("Transform error: {:?}", e);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -119,7 +119,7 @@ pub struct SseEvent {
|
|||
pub raw_line: String, // The complete line as received including "data: " prefix and "\n\n"
|
||||
|
||||
#[serde(skip_serializing, skip_deserializing)]
|
||||
pub raw_line_transformed: String, // The complete line as received including "data: " prefix and "\n\n"
|
||||
pub sse_transform_buffer: String, // The complete line as received including "data: " prefix and "\n\n"
|
||||
|
||||
#[serde(skip_serializing, skip_deserializing)]
|
||||
pub provider_stream_response: Option<ProviderStreamResponseType>, // Parsed provider stream response object
|
||||
|
|
@ -159,7 +159,7 @@ impl FromStr for SseEvent {
|
|||
|
||||
fn from_str(line: &str) -> Result<Self, Self::Err> {
|
||||
if line.starts_with("data: ") {
|
||||
let data = line[6..].to_string(); // Remove "data: " prefix
|
||||
let data: String = line[6..].to_string(); // Remove "data: " prefix
|
||||
if data.is_empty() {
|
||||
return Err(SseParseError {
|
||||
message: "Empty data field is not a valid SSE event".to_string(),
|
||||
|
|
@ -168,9 +168,9 @@ impl FromStr for SseEvent {
|
|||
Ok(SseEvent {
|
||||
data: Some(data),
|
||||
event: None,
|
||||
raw_line: format!("{}\n\n", line),
|
||||
raw_line_transformed: format!("{}\n\n", line),
|
||||
provider_stream_response: None, // Will be populated later via TryFrom
|
||||
raw_line: line.to_string(),
|
||||
sse_transform_buffer: line.to_string(),
|
||||
provider_stream_response: None,
|
||||
})
|
||||
} else if line.starts_with("event: ") { //used by Anthropic
|
||||
let event_type = line[7..].to_string();
|
||||
|
|
@ -182,8 +182,8 @@ impl FromStr for SseEvent {
|
|||
Ok(SseEvent {
|
||||
data: None,
|
||||
event: Some(event_type),
|
||||
raw_line: format!("{}\n\n", line),
|
||||
raw_line_transformed: format!("{}\n\n", line),
|
||||
raw_line: line.to_string(),
|
||||
sse_transform_buffer: line.to_string(),
|
||||
provider_stream_response: None,
|
||||
})
|
||||
} else {
|
||||
|
|
@ -196,14 +196,14 @@ impl FromStr for SseEvent {
|
|||
|
||||
impl fmt::Display for SseEvent {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{}", self.raw_line_transformed)
|
||||
write!(f, "{}", self.sse_transform_buffer)
|
||||
}
|
||||
}
|
||||
|
||||
// Into implementation to convert SseEvent to bytes for response buffer
|
||||
impl Into<Vec<u8>> for SseEvent {
|
||||
fn into(self) -> Vec<u8> {
|
||||
format!("{}\n\n", self.raw_line_transformed).into_bytes()
|
||||
format!("{}\n\n", self.sse_transform_buffer).into_bytes()
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -280,20 +280,20 @@ impl TryFrom<(&[u8], &SupportedAPIs, &SupportedAPIs)> for ProviderStreamResponse
|
|||
}
|
||||
|
||||
// TryFrom implementation to convert raw bytes to SseEvent with parsed provider response
|
||||
impl TryFrom<(&SseEvent, &SupportedAPIs, &SupportedAPIs)> for SseEvent {
|
||||
impl TryFrom<(SseEvent, &SupportedAPIs, &SupportedAPIs)> for SseEvent {
|
||||
type Error = Box<dyn std::error::Error + Send + Sync>;
|
||||
|
||||
fn try_from((sse_event, client_api, upstream_api): (&SseEvent, &SupportedAPIs, &SupportedAPIs)) -> Result<Self, Self::Error> {
|
||||
fn try_from((sse_event, client_api, upstream_api): (SseEvent, &SupportedAPIs, &SupportedAPIs)) -> Result<Self, Self::Error> {
|
||||
// Create a new transformed event based on the original
|
||||
let mut transformed_event = sse_event.clone();
|
||||
let mut transformed_event = sse_event;
|
||||
|
||||
// If not [DONE] and has data, parse the data as a provider stream response (business logic layer)
|
||||
if !transformed_event.is_done() && sse_event.data.is_some() {
|
||||
let data_str = sse_event.data.as_ref().unwrap();
|
||||
if !transformed_event.is_done() && transformed_event.data.is_some() {
|
||||
let data_str = transformed_event.data.as_ref().unwrap();
|
||||
let data_bytes = data_str.as_bytes();
|
||||
let transformed_response = ProviderStreamResponseType::try_from((data_bytes, client_api, upstream_api))?;
|
||||
let transformed_json = serde_json::to_string(&transformed_response)?;
|
||||
transformed_event.raw_line_transformed = format!("data: {}\n\n", transformed_json);
|
||||
transformed_event.sse_transform_buffer = format!("data: {}\n\n", transformed_json);
|
||||
transformed_event.provider_stream_response = Some(transformed_response);
|
||||
}
|
||||
|
||||
|
|
@ -318,10 +318,10 @@ impl TryFrom<(&SseEvent, &SupportedAPIs, &SupportedAPIs)> for SseEvent {
|
|||
}
|
||||
});
|
||||
// Format as proper SSE: MessageStart first, then ContentBlockStart
|
||||
transformed_event.raw_line_transformed = format!(
|
||||
transformed_event.sse_transform_buffer = format!(
|
||||
"event: {}\n{}\nevent: content_block_start\ndata: {}\n\n",
|
||||
event_type,
|
||||
transformed_event.raw_line_transformed,
|
||||
transformed_event.sse_transform_buffer,
|
||||
content_block_start_json,
|
||||
);
|
||||
} else if event_type == "message_delta" {
|
||||
|
|
@ -330,14 +330,14 @@ impl TryFrom<(&SseEvent, &SupportedAPIs, &SupportedAPIs)> for SseEvent {
|
|||
"index": 0
|
||||
});
|
||||
// Format as proper SSE: ContentBlockStop first, then MessageDelta
|
||||
transformed_event.raw_line_transformed = format!(
|
||||
transformed_event.sse_transform_buffer = format!(
|
||||
"event: content_block_stop\ndata: {}\n\nevent: {}\n{}",
|
||||
content_block_stop_json,
|
||||
event_type,
|
||||
transformed_event.raw_line_transformed
|
||||
transformed_event.sse_transform_buffer
|
||||
);
|
||||
} else {
|
||||
transformed_event.raw_line_transformed = format!("event: {}\n{}", event_type, transformed_event.raw_line_transformed);
|
||||
transformed_event.sse_transform_buffer = format!("event: {}\n{}", event_type, transformed_event.sse_transform_buffer);
|
||||
}
|
||||
}
|
||||
// If event_type is None, we just keep the data line as-is without an event line
|
||||
|
|
@ -345,8 +345,8 @@ impl TryFrom<(&SseEvent, &SupportedAPIs, &SupportedAPIs)> for SseEvent {
|
|||
}
|
||||
}
|
||||
(SupportedAPIs::OpenAIChatCompletions(_), SupportedAPIs::AnthropicMessagesAPI(_)) => {
|
||||
if sse_event.is_event_only() && sse_event.event.is_some() {
|
||||
transformed_event.raw_line_transformed = format!("\n"); // suppress the event upstream for OpenAI
|
||||
if transformed_event.is_event_only() && transformed_event.event.is_some() {
|
||||
transformed_event.sse_transform_buffer = format!("\n"); // suppress the event upstream for OpenAI
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -585,11 +585,11 @@ mod tests {
|
|||
#[test]
|
||||
fn test_sse_event_parsing() {
|
||||
// Test valid SSE data line
|
||||
let line = r#"data: {"id":"test","object":"chat.completion.chunk"}"#;
|
||||
let line = "data: {\"id\":\"test\",\"object\":\"chat.completion.chunk\"}\n\n";
|
||||
let event: Result<SseEvent, _> = line.parse();
|
||||
assert!(event.is_ok());
|
||||
let event = event.unwrap();
|
||||
assert_eq!(event.data, Some(r#"{"id":"test","object":"chat.completion.chunk"}"#.to_string()));
|
||||
assert_eq!(event.data, Some("{\"id\":\"test\",\"object\":\"chat.completion.chunk\"}\n\n".to_string()));
|
||||
|
||||
// Test conversion back to line using Display trait
|
||||
let wire_format = event.to_string();
|
||||
|
|
@ -626,7 +626,7 @@ mod tests {
|
|||
raw_line: r#"data: {"id":"test","object":"chat.completion.chunk"}
|
||||
|
||||
"#.to_string(),
|
||||
raw_line_transformed: r#"data: {"id":"test","object":"chat.completion.chunk"}
|
||||
sse_transform_buffer: r#"data: {"id":"test","object":"chat.completion.chunk"}
|
||||
|
||||
"#.to_string(),
|
||||
provider_stream_response: None,
|
||||
|
|
@ -654,7 +654,7 @@ mod tests {
|
|||
data: Some(r#"{"type": "ping"}"#.to_string()),
|
||||
event: None,
|
||||
raw_line: r#"data: {"type": "ping"}"#.to_string(),
|
||||
raw_line_transformed: r#"data: {"type": "ping"}"#.to_string(),
|
||||
sse_transform_buffer: r#"data: {"type": "ping"}"#.to_string(),
|
||||
provider_stream_response: None,
|
||||
};
|
||||
assert!(ping_event.should_skip());
|
||||
|
|
@ -665,7 +665,7 @@ mod tests {
|
|||
data: Some(r#"{"id": "test", "object": "chat.completion.chunk"}"#.to_string()),
|
||||
event: Some("content_block_delta".to_string()),
|
||||
raw_line: r#"data: {"id": "test", "object": "chat.completion.chunk"}"#.to_string(),
|
||||
raw_line_transformed: r#"data: {"id": "test", "object": "chat.completion.chunk"}"#.to_string(),
|
||||
sse_transform_buffer: r#"data: {"id": "test", "object": "chat.completion.chunk"}"#.to_string(),
|
||||
provider_stream_response: None,
|
||||
};
|
||||
assert!(!normal_event.should_skip());
|
||||
|
|
@ -676,7 +676,7 @@ mod tests {
|
|||
data: Some("[DONE]".to_string()),
|
||||
event: None,
|
||||
raw_line: "data: [DONE]".to_string(),
|
||||
raw_line_transformed: "data: [DONE]".to_string(),
|
||||
sse_transform_buffer: "data: [DONE]".to_string(),
|
||||
provider_stream_response: None,
|
||||
};
|
||||
assert!(!done_event.should_skip());
|
||||
|
|
|
|||
|
|
@ -89,7 +89,6 @@ impl RootContext for FilterContext {
|
|||
);
|
||||
|
||||
Some(Box::new(StreamContext::new(
|
||||
context_id,
|
||||
Rc::clone(&self.metrics),
|
||||
Rc::clone(
|
||||
self.llm_providers
|
||||
|
|
|
|||
|
|
@ -26,7 +26,6 @@ use hermesllm::providers::response::{ProviderResponse, SseEvent, SseStreamIter};
|
|||
use hermesllm::{ProviderId, ProviderRequest, ProviderRequestType, ProviderResponseType};
|
||||
|
||||
pub struct StreamContext {
|
||||
context_id: u32,
|
||||
metrics: Rc<Metrics>,
|
||||
ratelimit_selector: Option<Header>,
|
||||
streaming_response: bool,
|
||||
|
|
@ -50,14 +49,12 @@ pub struct StreamContext {
|
|||
|
||||
impl StreamContext {
|
||||
pub fn new(
|
||||
context_id: u32,
|
||||
metrics: Rc<Metrics>,
|
||||
llm_providers: Rc<LlmProviders>,
|
||||
traces_queue: Arc<Mutex<VecDeque<TraceData>>>,
|
||||
overrides: Rc<Option<Overrides>>,
|
||||
) -> Self {
|
||||
StreamContext {
|
||||
context_id,
|
||||
metrics,
|
||||
overrides,
|
||||
ratelimit_selector: None,
|
||||
|
|
@ -79,13 +76,13 @@ impl StreamContext {
|
|||
}
|
||||
|
||||
/// Returns the appropriate request identifier for logging.
|
||||
/// Uses request_id (from x-request-id header) when available, otherwise falls back to context_id.
|
||||
/// Uses request_id (from x-request-id header) when available, otherwise returns a literal indicating no request ID.
|
||||
fn request_identifier(&self) -> String {
|
||||
self.request_id
|
||||
.as_ref()
|
||||
.filter(|id| !id.is_empty()) // Filter out empty strings
|
||||
.map(|id| id.clone())
|
||||
.unwrap_or_else(|| self.context_id.to_string())
|
||||
.unwrap_or_else(|| "NO_REQUEST_ID".to_string())
|
||||
}
|
||||
fn llm_provider(&self) -> &LlmProvider {
|
||||
self.llm_provider
|
||||
|
|
@ -145,14 +142,14 @@ impl StreamContext {
|
|||
Some(SupportedAPIs::AnthropicMessagesAPI(_)) => {
|
||||
// Anthropic API requires x-api-key and anthropic-version headers
|
||||
// Remove any existing Authorization header since Anthropic doesn't use it
|
||||
self.set_http_request_header("Authorization", None);
|
||||
self.remove_http_request_header("Authorization");
|
||||
self.set_http_request_header("x-api-key", Some(llm_provider_api_key_value));
|
||||
self.set_http_request_header("anthropic-version", Some("2023-06-01"));
|
||||
}
|
||||
Some(SupportedAPIs::OpenAIChatCompletions(_)) | None => {
|
||||
// OpenAI and default: use Authorization Bearer token
|
||||
// Remove any existing x-api-key header since OpenAI doesn't use it
|
||||
self.set_http_request_header("x-api-key", None);
|
||||
self.remove_http_request_header("x-api-key");
|
||||
let authorization_header_value = format!("Bearer {}", llm_provider_api_key_value);
|
||||
self.set_http_request_header("Authorization", Some(&authorization_header_value));
|
||||
}
|
||||
|
|
@ -430,7 +427,7 @@ impl StreamContext {
|
|||
for sse_event in sse_iter {
|
||||
// Transform event if upstream API != client API
|
||||
let transformed_event: SseEvent =
|
||||
match SseEvent::try_from((&sse_event, &client_api, &upstream_api)) {
|
||||
match SseEvent::try_from((sse_event, &client_api, &upstream_api)) {
|
||||
Ok(event) => event,
|
||||
Err(e) => {
|
||||
warn!("Failed to transform SSE event: {}", e);
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) {
|
|||
Some("openai"),
|
||||
)
|
||||
.expect_remove_header_map_value(Some(MapType::HttpRequestHeaders), Some("x-api-key"))
|
||||
.expect_add_header_map_value(
|
||||
.expect_replace_header_map_value(
|
||||
Some(MapType::HttpRequestHeaders),
|
||||
Some("Authorization"),
|
||||
Some("Bearer secret_key"),
|
||||
|
|
@ -277,7 +277,7 @@ fn llm_gateway_bad_request_to_open_ai_chat_completions() {
|
|||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID - RATELIMIT_CHECK
|
||||
.expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=13"))
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID - RATELIMIT_CHECK
|
||||
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
|
||||
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
|
||||
.expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
|
||||
.execute_and_expect(ReturnType::Action(Action::Continue))
|
||||
.unwrap();
|
||||
|
|
@ -333,7 +333,7 @@ fn llm_gateway_request_ratelimited() {
|
|||
.returning(Some(chat_completions_request_body))
|
||||
// The actual call is not important in this test, we just need to grab the token_id
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] CLIENT_REQUEST_PAYLOAD: {\"messages\": [{\"role\": \"system\",\"content\": \"You are a helpful poetic assistant!, skilled in explaining complex programming concepts with creative flair. Be sure to be concise and to the point.\"},{\"role\": \"user\",\"content\": \"Compose a poem that explains the concept of recursion in programming. Compose a poem that explains the concept of recursion in programming. Compose a poem that explains the concept of recursion in programming. And also summarize it how a 4th graded would understand it. Compose a poem that explains the concept of recursion in programming. And also summarize it how a 4th graded would understand it.\"}],\"model\": \"gpt-4\"}"))
|
||||
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] CLIENT_REQUEST_PAYLOAD: {\"messages\": [{\"role\": \"system\",\"content\": \"You are a helpful poetic assistant!, skilled in explaining complex programming concepts with creative flair. Be sure to be concise and to the point.\"},{\"role\": \"user\",\"content\": \"Compose a poem that explains the concept of recursion in programming. Compose a poem that explains the concept of recursion in programming. Compose a poem that explains the concept of recursion in programming. And also summarize it how a 4th graded would understand it. Compose a poem that explains the concept of recursion in programming. And also summarize it how a 4th graded would understand it.\"}],\"model\": \"gpt-4\"}"))
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
.expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
|
||||
.expect_log(Some(LogLevel::Info), None)// Dynamic request ID)
|
||||
|
|
@ -390,7 +390,7 @@ fn llm_gateway_request_not_ratelimited() {
|
|||
// The actual call is not important in this test, we just need to grab the token_id
|
||||
.expect_log(Some(LogLevel::Info), None)
|
||||
// Dynamic request ID)
|
||||
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}"))
|
||||
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}"))
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
.expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
|
|
@ -398,7 +398,7 @@ fn llm_gateway_request_not_ratelimited() {
|
|||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
.expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29"))
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
|
||||
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
|
||||
.expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
|
||||
.execute_and_expect(ReturnType::Action(Action::Continue))
|
||||
.unwrap();
|
||||
|
|
@ -441,7 +441,7 @@ fn llm_gateway_override_model_name() {
|
|||
.returning(Some(chat_completions_request_body))
|
||||
// The actual call is not important in this test, we just need to grab the token_id
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}"))
|
||||
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}"))
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
.expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
|
|
@ -449,7 +449,7 @@ fn llm_gateway_override_model_name() {
|
|||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
.expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29"))
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
|
||||
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
|
||||
.expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
|
||||
.execute_and_expect(ReturnType::Action(Action::Continue))
|
||||
.unwrap();
|
||||
|
|
@ -492,7 +492,7 @@ fn llm_gateway_override_use_default_model() {
|
|||
.returning(Some(chat_completions_request_body))
|
||||
// The actual call is not important in this test, we just need to grab the token_id
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}"))
|
||||
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}"))
|
||||
.expect_log(
|
||||
Some(LogLevel::Info),
|
||||
None // Dynamic request ID,
|
||||
|
|
@ -503,7 +503,7 @@ fn llm_gateway_override_use_default_model() {
|
|||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
.expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29"))
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
|
||||
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
|
||||
.expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
|
||||
.execute_and_expect(ReturnType::Action(Action::Continue))
|
||||
.unwrap();
|
||||
|
|
@ -547,7 +547,7 @@ fn llm_gateway_override_use_model_name_none() {
|
|||
// The actual call is not important in this test, we just need to grab the token_id
|
||||
.expect_log(Some(LogLevel::Info), None)
|
||||
// Dynamic request ID)
|
||||
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] CLIENT_REQUEST_PAYLOAD: {\"model\":\"none\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}"))
|
||||
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] CLIENT_REQUEST_PAYLOAD: {\"model\":\"none\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}"))
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
.expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
|
|
@ -555,7 +555,7 @@ fn llm_gateway_override_use_model_name_none() {
|
|||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
.expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29"))
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:2] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
|
||||
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
|
||||
.expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
|
||||
.execute_and_expect(ReturnType::Action(Action::Continue))
|
||||
.unwrap();
|
||||
|
|
|
|||
|
|
@ -48,6 +48,7 @@ cd ../../
|
|||
archgw build
|
||||
cd -
|
||||
|
||||
# Once we build archgw we have to install the dependencies again to a new virtual environment.
|
||||
poetry install
|
||||
|
||||
log startup arch gateway with function calling demo
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue