add support for v1/messages and transformations (#558)

* pushing draft PR * transformations are working. Now need to add some tests next * updated tests and added necessary response transformations for Anthropics' message response object * fixed bugs for integration tests * fixed doc tests * fixed serialization issues with enums on response * adding some debug logs to help * fixed issues with non-streaming responses * updated the stream_context to update response bytes * the serialized bytes length must be set in the response side * fixed the debug statement that was causing the integration tests for wasm to fail * fixing json parsing errors * intentionally removing the headers * making sure that we convert the raw bytes to the correct provider type upstream * fixing non-streaming responses to tranform correctly * /v1/messages works with transformations to and from /v1/chat/completions * updating the CLI and demos to support anthropic vs. claude * adding the anthropic key to the preference based routing tests * fixed test cases and added more structured logs * fixed integration tests and cleaned up logs * added python client tests for anthropic and openai * cleaned up logs and fixed issue with connectivity for llm gateway in weather forecast demo * fixing the tests. python dependency order was broken * updated the openAI client to fix demos * removed the raw response debug statement * fixed the dup cloning issue and cleaned up the ProviderRequestType enum and traits * fixing logs * moved away from string literals to consts * fixed streaming from Anthropic Client to OpenAI * removed debug statement that would likely trip up integration tests * fixed integration tests for llm_gateway * cleaned up test cases and removed unnecessary crates * fixing comments from PR * fixed bug whereby we were sending an OpenAIChatCompletions request object to llm_gateway even though the request may have been AnthropicMessages --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-4.local> Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-9.local> Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-10.local> Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-41.local> Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-136.local>
2026-05-13 17:52:38 +02:00 · 2025-09-10 07:40:30 -07:00 · 2025-09-10 07:40:30 -07:00 · fb0581fd39
commit fb0581fd39
parent bb71d041a0
38 changed files with 2842 additions and 919 deletions
--- a/crates/llm_gateway/tests/integration.rs
+++ b/crates/llm_gateway/tests/integration.rs
@ -31,14 +31,15 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) {
        )
        .returning(None)
        .expect_log(
-            Some(LogLevel::Debug),
-            Some("request received: llm provider hint: default, selected provider: open-ai-gpt-4"),
+            Some(LogLevel::Info),
+            None, // Dynamic request ID - could be context_id or x-request-id
        )
        .expect_add_header_map_value(
            Some(MapType::HttpRequestHeaders),
            Some("x-arch-llm-provider"),
            Some("openai"),
        )
+        .expect_remove_header_map_value(Some(MapType::HttpRequestHeaders), Some("x-api-key"))
        .expect_replace_header_map_value(
            Some(MapType::HttpRequestHeaders),
            Some("Authorization"),
@ -193,10 +194,7 @@ fn llm_gateway_successful_request_to_open_ai_chat_completions() {

    module
        .call_proxy_on_context_create(http_context, filter_context)
-        .expect_log(
-            Some(LogLevel::Trace),
-            Some("||| create_http_context called with context_id: 2 |||"),
-        )
+        .expect_log(Some(LogLevel::Trace), None)
        .execute_and_expect(ReturnType::None)
        .unwrap();

@ -211,15 +209,19 @@ fn llm_gateway_successful_request_to_open_ai_chat_completions() {
            chat_completions_request_body.len() as i32,
            true,
        )
-        .expect_log(Some(LogLevel::Debug), None)
+        .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID - REQUEST_BODY_CHUNK
        .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
        .returning(Some(chat_completions_request_body))
-        .expect_log(Some(LogLevel::Info), None)
-        .expect_log(Some(LogLevel::Debug), None)
-        .expect_log(Some(LogLevel::Debug), None)
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - CLIENT_REQUEST_RECEIVED
+        .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID - CLIENT_REQUEST_PAYLOAD
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - MODEL_RESOLUTION
+        .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - TOKEN_COUNT
        .expect_metric_record("input_sequence_length", 21)
-        .expect_log(Some(LogLevel::Debug), None)
-        .expect_log(Some(LogLevel::Debug), None)
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - RATELIMIT_CHECK
+        .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=21"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - UPSTREAM_TRANSFORM
+        .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID - UPSTREAM_REQUEST_PAYLOAD
        .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
        .execute_and_expect(ReturnType::Action(Action::Continue))
        .unwrap();
@ -263,15 +265,19 @@ fn llm_gateway_bad_request_to_open_ai_chat_completions() {
            incomplete_chat_completions_request_body.len() as i32,
            true,
        )
+        .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID - REQUEST_BODY_CHUNK
        .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
        .returning(Some(incomplete_chat_completions_request_body))
-        .expect_log(Some(LogLevel::Debug), None)
-        .expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): gpt-1, model selected: gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some("getting token count model=gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some("Recorded input token count: 13"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - CLIENT_REQUEST_RECEIVED
+        .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID - CLIENT_REQUEST_PAYLOAD
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - MODEL_RESOLUTION
+        .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - TOKEN_COUNT
        .expect_metric_record("input_sequence_length", 13)
-        .expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some(r#"Checking limit for provider=gpt-4, with selector=Header { key: "selector-key", value: "selector-value" }, consuming tokens=13"#))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - RATELIMIT_CHECK
+        .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=13"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID - RATELIMIT_CHECK
+        .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
        .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
        .execute_and_expect(ReturnType::Action(Action::Continue))
        .unwrap();
@ -322,16 +328,18 @@ fn llm_gateway_request_ratelimited() {
            chat_completions_request_body.len() as i32,
            true,
        )
-        .expect_log(Some(LogLevel::Debug), None)
+        .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID)
        .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
        .returning(Some(chat_completions_request_body))
        // The actual call is not important in this test, we just need to grab the token_id
-        .expect_log(Some(LogLevel::Info), None)
-        .expect_log(Some(LogLevel::Debug), None)
-        .expect_log(Some(LogLevel::Debug), None)
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] CLIENT_REQUEST_PAYLOAD: {\"messages\": [{\"role\": \"system\",\"content\": \"You are a helpful poetic assistant!, skilled in explaining complex programming concepts with creative flair. Be sure to be concise and to the point.\"},{\"role\": \"user\",\"content\": \"Compose a poem that explains the concept of recursion in programming. Compose a poem that explains the concept of recursion in programming. Compose a poem that explains the concept of recursion in programming. And also summarize it how a 4th graded would understand it. Compose a poem that explains the concept of recursion in programming. And also summarize it how a 4th graded would understand it.\"}],\"model\": \"gpt-4\"}"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
+        .expect_log(Some(LogLevel::Info), None)// Dynamic request ID)
        .expect_metric_record("input_sequence_length", 107)
-        .expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4"))
-        .expect_log(Some(LogLevel::Debug), None)
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=107"))
        .expect_log(Some(LogLevel::Warn), Some(r#"server error occurred: exceeded limit provider=gpt-4, selector=Header { key: "selector-key", value: "selector-value" }, tokens_used=107"#))
        .expect_send_local_response(
            Some(StatusCode::TOO_MANY_REQUESTS.as_u16().into()),
@ -376,16 +384,21 @@ fn llm_gateway_request_not_ratelimited() {
            chat_completions_request_body.len() as i32,
            true,
        )
-        .expect_log(Some(LogLevel::Debug), None)
+        .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID)
        .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
        .returning(Some(chat_completions_request_body))
        // The actual call is not important in this test, we just need to grab the token_id
        .expect_log(Some(LogLevel::Info), None)
-        .expect_log(Some(LogLevel::Debug), Some("getting token count model=gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some("Recorded input token count: 29"))
+         // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
        .expect_metric_record("input_sequence_length", 29)
-        .expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some(r#"Checking limit for provider=gpt-4, with selector=Header { key: "selector-key", value: "selector-value" }, consuming tokens=29"#))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
        .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
        .execute_and_expect(ReturnType::Action(Action::Continue))
        .unwrap();
@ -423,16 +436,20 @@ fn llm_gateway_override_model_name() {
            chat_completions_request_body.len() as i32,
            true,
        )
+        .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID)
        .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
        .returning(Some(chat_completions_request_body))
        // The actual call is not important in this test, we just need to grab the token_id
-        .expect_log(Some(LogLevel::Debug), None)
-        .expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): gpt-1, model selected: gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some("getting token count model=gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some("Recorded input token count: 29"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
        .expect_metric_record("input_sequence_length", 29)
-        .expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some(r#"Checking limit for provider=gpt-4, with selector=Header { key: "selector-key", value: "selector-value" }, consuming tokens=29"#))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
        .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
        .execute_and_expect(ReturnType::Action(Action::Continue))
        .unwrap();
@ -470,19 +487,23 @@ fn llm_gateway_override_use_default_model() {
            chat_completions_request_body.len() as i32,
            true,
        )
-        .expect_log(Some(LogLevel::Debug), None)
+        .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID)
        .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
        .returning(Some(chat_completions_request_body))
        // The actual call is not important in this test, we just need to grab the token_id
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] CLIENT_REQUEST_PAYLOAD: {\"model\":\"gpt-1\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}"))
        .expect_log(
            Some(LogLevel::Info),
-            Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): gpt-1, model selected: gpt-4"),
+            None // Dynamic request ID,
        )
-        .expect_log(Some(LogLevel::Debug), Some("getting token count model=gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some("Recorded input token count: 29"))
+        .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
        .expect_metric_record("input_sequence_length", 29)
-        .expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some(r#"Checking limit for provider=gpt-4, with selector=Header { key: "selector-key", value: "selector-value" }, consuming tokens=29"#))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
        .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
        .execute_and_expect(ReturnType::Action(Action::Continue))
        .unwrap();
@ -520,16 +541,21 @@ fn llm_gateway_override_use_model_name_none() {
            chat_completions_request_body.len() as i32,
            true,
        )
+        .expect_log(Some(LogLevel::Debug), None) // Dynamic request ID)
        .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
        .returning(Some(chat_completions_request_body))
        // The actual call is not important in this test, we just need to grab the token_id
-        .expect_log(Some(LogLevel::Debug), None)
-        .expect_log(Some(LogLevel::Info), Some("on_http_request_body: provider: open-ai-gpt-4, model requested (in body): none, model selected: gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some("getting token count model=gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some("Recorded input token count: 29"))
+        .expect_log(Some(LogLevel::Info), None)
+         // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] CLIENT_REQUEST_PAYLOAD: {\"model\":\"none\",\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}]}"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("TOKENIZER: computing token count for model=gpt-4"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
        .expect_metric_record("input_sequence_length", 29)
-        .expect_log(Some(LogLevel::Debug), Some("Applying ratelimit for model: gpt-4"))
-        .expect_log(Some(LogLevel::Debug), Some(r#"Checking limit for provider=gpt-4, with selector=Header { key: "selector-key", value: "selector-value" }, consuming tokens=29"#))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("Checking limit for provider=gpt-4, with selector=Header { key: \"selector-key\", value: \"selector-value\" }, consuming tokens=29"))
+        .expect_log(Some(LogLevel::Info), None) // Dynamic request ID)
+        .expect_log(Some(LogLevel::Debug), Some("[ARCHGW_REQ_ID:NO_REQUEST_ID] UPSTREAM_REQUEST_PAYLOAD: {\"messages\":[{\"role\":\"system\",\"content\":\"You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.\"},{\"role\":\"user\",\"content\":\"Compose a poem that explains the concept of recursion in programming.\"}],\"model\":\"gpt-4\"}"))
        .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
        .execute_and_expect(ReturnType::Action(Action::Continue))
        .unwrap();