mirror of
https://github.com/katanemo/plano.git
synced 2026-05-13 17:52:38 +02:00
add support for v1/messages and transformations (#558)
* pushing draft PR * transformations are working. Now need to add some tests next * updated tests and added necessary response transformations for Anthropics' message response object * fixed bugs for integration tests * fixed doc tests * fixed serialization issues with enums on response * adding some debug logs to help * fixed issues with non-streaming responses * updated the stream_context to update response bytes * the serialized bytes length must be set in the response side * fixed the debug statement that was causing the integration tests for wasm to fail * fixing json parsing errors * intentionally removing the headers * making sure that we convert the raw bytes to the correct provider type upstream * fixing non-streaming responses to tranform correctly * /v1/messages works with transformations to and from /v1/chat/completions * updating the CLI and demos to support anthropic vs. claude * adding the anthropic key to the preference based routing tests * fixed test cases and added more structured logs * fixed integration tests and cleaned up logs * added python client tests for anthropic and openai * cleaned up logs and fixed issue with connectivity for llm gateway in weather forecast demo * fixing the tests. python dependency order was broken * updated the openAI client to fix demos * removed the raw response debug statement * fixed the dup cloning issue and cleaned up the ProviderRequestType enum and traits * fixing logs * moved away from string literals to consts * fixed streaming from Anthropic Client to OpenAI * removed debug statement that would likely trip up integration tests * fixed integration tests for llm_gateway * cleaned up test cases and removed unnecessary crates * fixing comments from PR * fixed bug whereby we were sending an OpenAIChatCompletions request object to llm_gateway even though the request may have been AnthropicMessages --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-4.local> Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-9.local> Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-10.local> Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-41.local> Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-136.local>
This commit is contained in:
parent
bb71d041a0
commit
fb0581fd39
38 changed files with 2842 additions and 919 deletions
|
|
@ -31,14 +31,15 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) {
|
|||
)
|
||||
.returning(None)
|
||||
.expect_log(
|
||||
Some(LogLevel::Debug),
|
||||
Some("request received: llm provider hint: default, selected provider: open-ai-gpt-4"),
|
||||
Some(LogLevel::Info),
|
||||
None, // Dynamic request ID - could be context_id or x-request-id
|
||||
)
|
||||
.expect_add_header_map_value(
|
||||
Some(MapType::HttpRequestHeaders),
|
||||
Some("x-arch-llm-provider"),
|
||||
Some("openai"),
|
||||
)
|
||||
.expect_remove_header_map_value(Some(MapType::HttpRequestHeaders), Some("x-api-key"))
|
||||
.expect_replace_header_map_value(
|
||||
Some(MapType::HttpRequestHeaders),
|
||||
Some("Authorization"),
|
||||
|
|
@ -193,10 +194,7 @@ fn llm_gateway_successful_request_to_open_ai_chat_completions() {
|
|||
|
||||
module
|
||||
.call_proxy_on_context_create(http_context, filter_context)
|
||||
.expect_log(
|
||||
Some(LogLevel::Trace),
|
||||
Some("||| create_http_context called with context_id: 2 |||"),
|
||||
)
|
||||
.expect_log(Some(LogLevel::Trace), None)
|
||||
.execute_and_expect(ReturnType::None)
|
||||
.unwrap();
|
||||
|
||||
|
|
@ -211,15 +209,19 @@ fn llm_gateway_successful_request_to_open_ai_chat_completions() {
|
|||
chat_completions_request_body.len() as i32,
|
||||
true,
|
||||
)
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_log(Some(LogLevel::Debug), None) // Dynamic request ID - REQUEST_BODY_CHUNK
|
||||
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
|
||||
.returning(Some(chat_completions_request_body))
|
||||
.expect_log(Some(LogLevel::Info), None)
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID - CLIENT_REQUEST_RECEIVED
|
||||
.expect_log(Some(LogLevel::Debug), None) // Dynamic request ID - CLIENT_REQUEST_PAYLOAD
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID - MODEL_RESOLUTION
|
||||
.expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID - TOKEN_COUNT
|
||||
.expect_metric_record("input_sequence_length", 21)
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID - RATELIMIT_CHECK
|
||||
.expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=21"))
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID - UPSTREAM_TRANSFORM
|
||||
.expect_log(Some(LogLevel::Debug), None) // Dynamic request ID - UPSTREAM_REQUEST_PAYLOAD
|
||||
.expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
|
||||
.execute_and_expect(ReturnType::Action(Action::Continue))
|
||||
.unwrap();
|
||||
|
|
@ -263,15 +265,19 @@ fn llm_gateway_bad_request_to_open_ai_chat_completions() {
|
|||
incomplete_chat_completions_request_body.len() as i32,
|
||||
true,
|
||||
)
|
||||
.expect_log(Some(LogLevel::Debug), None) // Dynamic request ID - REQUEST_BODY_CHUNK
|
||||
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
|
||||
.returning(Some(incomplete_chat_completions_request_body))
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): gpt-1, model selected: gpt-4"))
|
||||
.expect_log(Some(LogLevel::Debug), Some("getting token count model=gpt-4"))
|
||||
.expect_log(Some(LogLevel::Debug), Some("Recorded input token count: 13"))
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID - CLIENT_REQUEST_RECEIVED
|
||||
.expect_log(Some(LogLevel::Debug), None) // Dynamic request ID - CLIENT_REQUEST_PAYLOAD
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID - MODEL_RESOLUTION
|
||||
.expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID - TOKEN_COUNT
|
||||
.expect_metric_record("input_sequence_length", 13)
|
||||
.expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4"))
|
||||
.expect_log(Some(LogLevel::Debug), Some(r#"Checking limit for provider=gpt-4, with selector=Header { key: "selector-key", value: "selector-value" }, consuming tokens=13"#))
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID - RATELIMIT_CHECK
|
||||
.expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=13"))
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID - RATELIMIT_CHECK
|
||||
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
|
||||
.expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
|
||||
.execute_and_expect(ReturnType::Action(Action::Continue))
|
||||
.unwrap();
|
||||
|
|
@ -322,16 +328,18 @@ fn llm_gateway_request_ratelimited() {
|
|||
chat_completions_request_body.len() as i32,
|
||||
true,
|
||||
)
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_log(Some(LogLevel::Debug), None) // Dynamic request ID)
|
||||
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
|
||||
.returning(Some(chat_completions_request_body))
|
||||
// The actual call is not important in this test, we just need to grab the token_id
|
||||
.expect_log(Some(LogLevel::Info), None)
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] CLIENT_REQUEST_PAYLOAD: {\"messages\": [{\"role\": \"system\",\"content\": \"You are a helpful poetic assistant!, skilled in explaining complex programming concepts with creative flair. Be sure to be concise and to the point.\"},{\"role\": \"user\",\"content\": \"Compose a poem that explains the concept of recursion in programming. Compose a poem that explains the concept of recursion in programming. Compose a poem that explains the concept of recursion in programming. And also summarize it how a 4th graded would understand it. Compose a poem that explains the concept of recursion in programming. And also summarize it how a 4th graded would understand it.\"}],\"model\": \"gpt-4\"}"))
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
.expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
|
||||
.expect_log(Some(LogLevel::Info), None)// Dynamic request ID)
|
||||
.expect_metric_record("input_sequence_length", 107)
|
||||
.expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4"))
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
.expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=107"))
|
||||
.expect_log(Some(LogLevel::Warn), Some(r#"server error occurred: exceeded limit provider=gpt-4, selector=Header { key: "selector-key", value: "selector-value" }, tokens_used=107"#))
|
||||
.expect_send_local_response(
|
||||
Some(StatusCode::TOO_MANY_REQUESTS.as_u16().into()),
|
||||
|
|
@ -376,16 +384,21 @@ fn llm_gateway_request_not_ratelimited() {
|
|||
chat_completions_request_body.len() as i32,
|
||||
true,
|
||||
)
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_log(Some(LogLevel::Debug), None) // Dynamic request ID)
|
||||
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
|
||||
.returning(Some(chat_completions_request_body))
|
||||
// The actual call is not important in this test, we just need to grab the token_id
|
||||
.expect_log(Some(LogLevel::Info), None)
|
||||
.expect_log(Some(LogLevel::Debug), Some("getting token count model=gpt-4"))
|
||||
.expect_log(Some(LogLevel::Debug), Some("Recorded input token count: 29"))
|
||||
// Dynamic request ID)
|
||||
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}"))
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
.expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
.expect_metric_record("input_sequence_length", 29)
|
||||
.expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4"))
|
||||
.expect_log(Some(LogLevel::Debug), Some(r#"Checking limit for provider=gpt-4, with selector=Header { key: "selector-key", value: "selector-value" }, consuming tokens=29"#))
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
.expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29"))
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
|
||||
.expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
|
||||
.execute_and_expect(ReturnType::Action(Action::Continue))
|
||||
.unwrap();
|
||||
|
|
@ -423,16 +436,20 @@ fn llm_gateway_override_model_name() {
|
|||
chat_completions_request_body.len() as i32,
|
||||
true,
|
||||
)
|
||||
.expect_log(Some(LogLevel::Debug), None) // Dynamic request ID)
|
||||
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
|
||||
.returning(Some(chat_completions_request_body))
|
||||
// The actual call is not important in this test, we just need to grab the token_id
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): gpt-1, model selected: gpt-4"))
|
||||
.expect_log(Some(LogLevel::Debug), Some("getting token count model=gpt-4"))
|
||||
.expect_log(Some(LogLevel::Debug), Some("Recorded input token count: 29"))
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}"))
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
.expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
.expect_metric_record("input_sequence_length", 29)
|
||||
.expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4"))
|
||||
.expect_log(Some(LogLevel::Debug), Some(r#"Checking limit for provider=gpt-4, with selector=Header { key: "selector-key", value: "selector-value" }, consuming tokens=29"#))
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
.expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29"))
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
|
||||
.expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
|
||||
.execute_and_expect(ReturnType::Action(Action::Continue))
|
||||
.unwrap();
|
||||
|
|
@ -470,19 +487,23 @@ fn llm_gateway_override_use_default_model() {
|
|||
chat_completions_request_body.len() as i32,
|
||||
true,
|
||||
)
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_log(Some(LogLevel::Debug), None) // Dynamic request ID)
|
||||
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
|
||||
.returning(Some(chat_completions_request_body))
|
||||
// The actual call is not important in this test, we just need to grab the token_id
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}"))
|
||||
.expect_log(
|
||||
Some(LogLevel::Info),
|
||||
Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): gpt-1, model selected: gpt-4"),
|
||||
None // Dynamic request ID,
|
||||
)
|
||||
.expect_log(Some(LogLevel::Debug), Some("getting token count model=gpt-4"))
|
||||
.expect_log(Some(LogLevel::Debug), Some("Recorded input token count: 29"))
|
||||
.expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
.expect_metric_record("input_sequence_length", 29)
|
||||
.expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4"))
|
||||
.expect_log(Some(LogLevel::Debug), Some(r#"Checking limit for provider=gpt-4, with selector=Header { key: "selector-key", value: "selector-value" }, consuming tokens=29"#))
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
.expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29"))
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
|
||||
.expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
|
||||
.execute_and_expect(ReturnType::Action(Action::Continue))
|
||||
.unwrap();
|
||||
|
|
@ -520,16 +541,21 @@ fn llm_gateway_override_use_model_name_none() {
|
|||
chat_completions_request_body.len() as i32,
|
||||
true,
|
||||
)
|
||||
.expect_log(Some(LogLevel::Debug), None) // Dynamic request ID)
|
||||
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
|
||||
.returning(Some(chat_completions_request_body))
|
||||
// The actual call is not important in this test, we just need to grab the token_id
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): none, model selected: gpt-4"))
|
||||
.expect_log(Some(LogLevel::Debug), Some("getting token count model=gpt-4"))
|
||||
.expect_log(Some(LogLevel::Debug), Some("Recorded input token count: 29"))
|
||||
.expect_log(Some(LogLevel::Info), None)
|
||||
// Dynamic request ID)
|
||||
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] CLIENT_REQUEST_PAYLOAD: {\"model\":\"none\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}"))
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
.expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
.expect_metric_record("input_sequence_length", 29)
|
||||
.expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4"))
|
||||
.expect_log(Some(LogLevel::Debug), Some(r#"Checking limit for provider=gpt-4, with selector=Header { key: "selector-key", value: "selector-value" }, consuming tokens=29"#))
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
.expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29"))
|
||||
.expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
|
||||
.expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
|
||||
.expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
|
||||
.execute_and_expect(ReturnType::Action(Action::Continue))
|
||||
.unwrap();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue