From db44602cb81e9dc31ab382ba6da5e8783e4d9ad6 Mon Sep 17 00:00:00 2001 From: Salman Paracha Date: Mon, 20 Oct 2025 13:48:26 -0700 Subject: [PATCH] Claude Code works with Amazon Bedrock --- crates/hermesllm/src/apis/amazon_bedrock.rs | 26 ++- crates/hermesllm/src/providers/response.rs | 116 +++++++++++- .../src/transforms/response/to_anthropic.rs | 10 +- .../src/transforms/response/to_openai.rs | 10 +- crates/llm_gateway/src/stream_context.rs | 166 +++++++++++++++++- .../use_cases/claude_code_router/config.yaml | 10 +- tests/e2e/response_with_tools.hex | Bin 0 -> 102343 bytes tests/e2e/test_model_alias_routing.py | 135 ++++++++++++++ 8 files changed, 441 insertions(+), 32 deletions(-) create mode 100644 tests/e2e/response_with_tools.hex diff --git a/crates/hermesllm/src/apis/amazon_bedrock.rs b/crates/hermesllm/src/apis/amazon_bedrock.rs index 0c4eb262..096c84c8 100644 --- a/crates/hermesllm/src/apis/amazon_bedrock.rs +++ b/crates/hermesllm/src/apis/amazon_bedrock.rs @@ -693,16 +693,22 @@ pub struct ContentBlockStartEvent { /// Content block start information #[derive(Serialize, Deserialize, Debug, Clone)] -#[serde(tag = "type")] +#[serde(untagged)] pub enum ContentBlockStart { - #[serde(rename = "toolUse")] ToolUse { - #[serde(rename = "toolUseId")] - tool_use_id: String, - name: String, + #[serde(rename = "toolUse")] + tool_use: ToolUseStart, }, } +/// Tool use start information +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct ToolUseStart { + #[serde(rename = "toolUseId")] + pub tool_use_id: String, + pub name: String, +} + /// Content block delta event #[derive(Serialize, Deserialize, Debug, Clone)] pub struct ContentBlockDeltaEvent { @@ -718,7 +724,15 @@ pub struct ContentBlockDeltaEvent { #[serde(untagged)] pub enum ContentBlockDelta { Text { text: String }, - ToolUse { input: String }, + ToolUse { + #[serde(rename = "toolUse")] + tool_use: ToolUseDelta + }, +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct ToolUseDelta { + pub input: String, } /// Content block stop event diff --git a/crates/hermesllm/src/providers/response.rs b/crates/hermesllm/src/providers/response.rs index 3b14d0b8..61b38f4b 100644 --- a/crates/hermesllm/src/providers/response.rs +++ b/crates/hermesllm/src/providers/response.rs @@ -478,7 +478,7 @@ where { decoder: aws_smithy_eventstream::frame::MessageFrameDecoder, buffer: B, - has_content_block_start_been_sent: bool, + content_block_start_indices: std::collections::HashSet, } impl BedrockBinaryFrameDecoder { @@ -488,7 +488,7 @@ impl BedrockBinaryFrameDecoder { Self { decoder: aws_smithy_eventstream::frame::MessageFrameDecoder::new(), buffer, - has_content_block_start_been_sent: false, + content_block_start_indices: std::collections::HashSet::new(), } } } @@ -501,7 +501,7 @@ where Self { decoder: aws_smithy_eventstream::frame::MessageFrameDecoder::new(), buffer, - has_content_block_start_been_sent: false, + content_block_start_indices: std::collections::HashSet::new(), } } @@ -521,14 +521,14 @@ where self.buffer.has_remaining() } - /// Check if a content_block_start event has been sent - pub fn has_content_block_start_been_sent(&self) -> bool { - self.has_content_block_start_been_sent + /// Check if a content_block_start event has been sent for the given index + pub fn has_content_block_start_been_sent(&self, index: i32) -> bool { + self.content_block_start_indices.contains(&index) } - /// Set the content_block_start flag - pub fn set_content_block_start_sent(&mut self, sent: bool) { - self.has_content_block_start_been_sent = sent; + /// Mark that a content_block_start event has been sent for the given index + pub fn set_content_block_start_sent(&mut self, index: i32) { + self.content_block_start_indices.insert(index); } } @@ -1122,6 +1122,17 @@ mod tests { test_bedrock_conversion(true); } + #[test] + fn test_bedrock_decoded_frame_with_tool_use() { + test_bedrock_conversion_with_tools(false); + } + + #[test] + #[ignore] // Run with: cargo test -- --ignored --nocapture + fn test_bedrock_decoded_frame_with_tool_use_verbose() { + test_bedrock_conversion_with_tools(true); + } + fn test_bedrock_conversion(verbose: bool) { use bytes::BytesMut; use std::fs; @@ -1194,6 +1205,93 @@ mod tests { assert!(message_start_seen, "Should have seen MessageStart event"); } + fn test_bedrock_conversion_with_tools(verbose: bool) { + use bytes::BytesMut; + use std::fs; + use std::path::PathBuf; + + // Read the actual response_with_tools.hex file from tests/e2e directory + let test_file = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("../../tests/e2e/response_with_tools.hex"); + + // Only run this test if the file exists + if !test_file.exists() { + println!("Skipping test - response_with_tools.hex not found"); + return; + } + + let response_data = fs::read(&test_file).unwrap(); + let mut buffer = BytesMut::from(&response_data[..]); + + let mut decoder = BedrockBinaryFrameDecoder::new(&mut buffer); + + let client_api = SupportedAPIs::AnthropicMessagesAPI(crate::apis::anthropic::AnthropicApi::Messages); + let upstream_api = SupportedUpstreamAPIs::AmazonBedrockConverseStream(crate::apis::amazon_bedrock::AmazonBedrockApi::ConverseStream); + + let mut conversion_count = 0; + let mut message_start_seen = false; + let mut content_block_start_seen = false; + let mut content_block_delta_tool_use_seen = false; + + // Decode and convert frames + loop { + match decoder.decode_frame() { + Some(frame @ aws_smithy_eventstream::frame::DecodedFrame::Complete(_)) => { + // Convert DecodedFrame to ProviderStreamResponseType + let result = ProviderStreamResponseType::try_from((&frame, &client_api, &upstream_api)); + + match result { + Ok(provider_response) => { + conversion_count += 1; + + // Verify we got a MessagesStreamEvent + assert!(matches!(provider_response, ProviderStreamResponseType::MessagesStreamEvent(_))); + + if verbose { + // Print the SSE string output + let sse_string: String = provider_response.clone().into(); + println!("{}", sse_string); + } + + // Check for specific events related to tool use + if let ProviderStreamResponseType::MessagesStreamEvent(ref event) = provider_response { + match event { + crate::apis::anthropic::MessagesStreamEvent::MessageStart { .. } => { + message_start_seen = true; + } + crate::apis::anthropic::MessagesStreamEvent::ContentBlockStart { .. } => { + content_block_start_seen = true; + } + crate::apis::anthropic::MessagesStreamEvent::ContentBlockDelta { delta, .. } => { + if matches!(delta, crate::apis::anthropic::MessagesContentDelta::InputJsonDelta { .. }) { + content_block_delta_tool_use_seen = true; + } + } + _ => {} + } + } + } + Err(e) => { + println!("Conversion error (frame {}): {}", conversion_count, e); + } + } + } + Some(aws_smithy_eventstream::frame::DecodedFrame::Incomplete) => { + // End of buffer + break; + } + None => { + panic!("Decode error"); + } + } + } + + assert!(conversion_count > 0, "Should have converted at least one frame"); + assert!(message_start_seen, "Should have seen MessageStart event"); + assert!(content_block_start_seen, "Should have seen ContentBlockStart event for tool use"); + assert!(content_block_delta_tool_use_seen, "Should have seen ContentBlockDelta with ToolUseDelta"); + } + #[test] fn test_sse_event_transformation_openai_to_anthropic_message_start() { use crate::apis::openai::OpenAIApi; diff --git a/crates/hermesllm/src/transforms/response/to_anthropic.rs b/crates/hermesllm/src/transforms/response/to_anthropic.rs index c19debc6..2076313f 100644 --- a/crates/hermesllm/src/transforms/response/to_anthropic.rs +++ b/crates/hermesllm/src/transforms/response/to_anthropic.rs @@ -261,12 +261,12 @@ impl TryFrom for MessagesStreamEvent { // Note: Bedrock sends tool_use_id and name at start, with input coming in subsequent deltas // Anthropic expects the same pattern, so we initialize with an empty input object match start_event.start { - crate::apis::amazon_bedrock::ContentBlockStart::ToolUse { tool_use_id, name } => { + crate::apis::amazon_bedrock::ContentBlockStart::ToolUse { tool_use } => { Ok(MessagesStreamEvent::ContentBlockStart { index: start_event.content_block_index as u32, content_block: MessagesContentBlock::ToolUse { - id: tool_use_id, - name, + id: tool_use.tool_use_id, + name: tool_use.name, input: Value::Object(serde_json::Map::new()), // Empty - will be filled by deltas cache_control: None, }, @@ -281,8 +281,8 @@ impl TryFrom for MessagesStreamEvent { ContentBlockDelta::Text { text } => { MessagesContentDelta::TextDelta { text } } - ContentBlockDelta::ToolUse { input } => { - MessagesContentDelta::InputJsonDelta { partial_json: input } + ContentBlockDelta::ToolUse { tool_use } => { + MessagesContentDelta::InputJsonDelta { partial_json: tool_use.input } } }; diff --git a/crates/hermesllm/src/transforms/response/to_openai.rs b/crates/hermesllm/src/transforms/response/to_openai.rs index 974a9339..8bf6896b 100644 --- a/crates/hermesllm/src/transforms/response/to_openai.rs +++ b/crates/hermesllm/src/transforms/response/to_openai.rs @@ -280,7 +280,7 @@ impl TryFrom for ChatCompletionsStreamResponse { use crate::apis::amazon_bedrock::ContentBlockStart; match start_event.start { - ContentBlockStart::ToolUse { tool_use_id, name } => { + ContentBlockStart::ToolUse { tool_use } => { Ok(create_openai_chunk( "stream", "unknown", @@ -291,10 +291,10 @@ impl TryFrom for ChatCompletionsStreamResponse { function_call: None, tool_calls: Some(vec![ToolCallDelta { index: start_event.content_block_index as u32, - id: Some(tool_use_id), + id: Some(tool_use.tool_use_id), call_type: Some("function".to_string()), function: Some(FunctionCallDelta { - name: Some(name), + name: Some(tool_use.name), arguments: Some("".to_string()), }), }]), @@ -325,7 +325,7 @@ impl TryFrom for ChatCompletionsStreamResponse { None, )) } - ContentBlockDelta::ToolUse { input } => { + ContentBlockDelta::ToolUse { tool_use } => { Ok(create_openai_chunk( "stream", "unknown", @@ -340,7 +340,7 @@ impl TryFrom for ChatCompletionsStreamResponse { call_type: None, function: Some(FunctionCallDelta { name: None, - arguments: Some(input), + arguments: Some(tool_use.input), }), }]), }, diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index f9a699a0..43151a7f 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -1,4 +1,3 @@ -use bytes::Buf; use hermesllm::clients::endpoints::SupportedUpstreamAPIs; use http::StatusCode; use log::{debug, info, warn}; @@ -25,9 +24,11 @@ use common::tracing::{Event, Span, TraceData, Traceparent}; use common::{ratelimit, routing, tokenizer}; use hermesllm::clients::endpoints::SupportedAPIs; use hermesllm::providers::response::{ - BedrockBinaryFrameDecoder, ProviderResponse, SseEvent, SseStreamIter, + BedrockBinaryFrameDecoder, ProviderResponse, ProviderStreamResponse, SseEvent, SseStreamIter, +}; +use hermesllm::{ + DecodedFrame, ProviderId, ProviderRequest, ProviderRequestType, ProviderResponseType, }; -use hermesllm::{ProviderId, ProviderRequest, ProviderRequestType, ProviderResponseType}; pub struct StreamContext { metrics: Rc, @@ -424,6 +425,14 @@ impl StreamContext { let upstream_api = provider_id.compatible_api_for_client(&client_api, self.streaming_response); + // Check if this is Bedrock binary stream + if matches!( + upstream_api, + SupportedUpstreamAPIs::AmazonBedrockConverseStream(_) + ) { + return self.handle_bedrock_binary_stream(body, &client_api, &upstream_api); + } + // Parse body into SSE iterator using TryFrom let sse_iter: SseStreamIter> = match SseStreamIter::try_from(body) { @@ -499,6 +508,157 @@ impl StreamContext { } } + fn handle_bedrock_binary_stream( + &mut self, + body: &[u8], + client_api: &SupportedAPIs, + upstream_api: &SupportedUpstreamAPIs, + ) -> Result, Action> { + use hermesllm::providers::response::ProviderStreamResponseType; + + // Initialize decoder if not present + if self.binary_frame_decoder.is_none() { + self.binary_frame_decoder = Some(BedrockBinaryFrameDecoder::from_bytes(&[])); + } + + // Add incoming bytes to buffer + if let Some(decoder) = self.binary_frame_decoder.as_mut() { + decoder.buffer_mut().extend_from_slice(body); + } + + let mut response_buffer = Vec::new(); + + // Decode all available complete frames + loop { + let decoded_frame = self.binary_frame_decoder.as_mut().unwrap().decode_frame(); + match decoded_frame { + Some(DecodedFrame::Complete(ref frame_ref)) => { + // Convert frame to ProviderStreamResponseType + let frame = DecodedFrame::Complete(frame_ref.clone()); + match ProviderStreamResponseType::try_from((&frame, client_api, upstream_api)) { + Ok(provider_response) => { + self.record_ttft_if_needed(); + + // Extract index from the event if available + let event_index = + if let ProviderStreamResponseType::MessagesStreamEvent(ref evt) = + provider_response + { + use hermesllm::apis::anthropic::MessagesStreamEvent; + match evt { + MessagesStreamEvent::ContentBlockStart { + index, .. + } => Some(*index as i32), + MessagesStreamEvent::ContentBlockDelta { + index, .. + } => Some(*index as i32), + MessagesStreamEvent::ContentBlockStop { index, .. } => { + Some(*index as i32) + } + _ => None, + } + } else { + None + }; + + // Check event type to track ContentBlockStart + if let Some(event_type) = provider_response.event_type() { + match event_type { + "content_block_start" => { + // Mark that we've seen ContentBlockStart for this index + if let (Some(decoder), Some(index)) = + (self.binary_frame_decoder.as_mut(), event_index) + { + decoder.set_content_block_start_sent(index); + debug!( + "[ARCHGW_REQ_ID:{}] BEDROCK_CONTENT_BLOCK_START_TRACKED: index={}", + self.request_identifier(), + index + ); + } + } + "content_block_delta" => { + // Check if ContentBlockStart was sent for this index + if let Some(index) = event_index { + let needs_start = if let Some(decoder) = + self.binary_frame_decoder.as_ref() + { + !decoder.has_content_block_start_been_sent(index) + } else { + false + }; + + if needs_start { + // Emit empty ContentBlockStart before delta + use hermesllm::apis::anthropic::{ + MessagesContentBlock, MessagesStreamEvent, + }; + let content_block_start = + MessagesStreamEvent::ContentBlockStart { + index: index as u32, + content_block: MessagesContentBlock::Text { + text: String::new(), + cache_control: None, + }, + }; + let start_sse: String = content_block_start.into(); + response_buffer + .extend_from_slice(start_sse.as_bytes()); + + // Mark that we've now sent it + if let Some(decoder) = + self.binary_frame_decoder.as_mut() + { + decoder.set_content_block_start_sent(index); + } + + debug!( + "[ARCHGW_REQ_ID:{}] BEDROCK_INJECTED_CONTENT_BLOCK_START: index={}", + self.request_identifier(), + index + ); + } + } + } + _ => {} + } + } + + let sse_string: String = provider_response.into(); + response_buffer.extend_from_slice(sse_string.as_bytes()); + } + Err(e) => { + warn!( + "[ARCHGW_REQ_ID:{}] BEDROCK_FRAME_CONVERSION_ERROR: {}", + self.request_identifier(), + e + ); + } + } + } + Some(DecodedFrame::Incomplete) => { + // Incomplete frame - buffer retains partial data, wait for more bytes + debug!( + "[ARCHGW_REQ_ID:{}] BEDROCK_INCOMPLETE_FRAME: waiting for more data", + self.request_identifier() + ); + break; + } + None => { + // Decode error + warn!( + "[ARCHGW_REQ_ID:{}] BEDROCK_DECODE_ERROR", + self.request_identifier() + ); + return Err(Action::Continue); + } + } + } + + // Return accumulated complete frames (may be empty if all frames incomplete) + Ok(response_buffer) + } + fn handle_non_streaming_response( &mut self, body: &[u8], diff --git a/demos/use_cases/claude_code_router/config.yaml b/demos/use_cases/claude_code_router/config.yaml index 11a98c07..2a727c2a 100644 --- a/demos/use_cases/claude_code_router/config.yaml +++ b/demos/use_cases/claude_code_router/config.yaml @@ -9,8 +9,10 @@ listeners: llm_providers: # OpenAI Models - - model: openai/gpt-5-2025-08-07 - access_key: $OPENAI_API_KEY + + - model: amazon_bedrock/us.amazon.nova-premier-v1:0 + access_key: $AWS_BEARER_TOKEN_BEDROCK + base_url: https://bedrock-runtime.us-west-2.amazonaws.com routing_preferences: - name: code generation description: generating new code snippets, functions, or boilerplate based on user prompts or requirements @@ -26,7 +28,7 @@ llm_providers: default: true access_key: $ANTHROPIC_API_KEY - - model: anthropic/claude-3-haiku-20240307 + - model: anthropic/claude-haiku-4-5-20251001 access_key: $ANTHROPIC_API_KEY # Ollama Models @@ -38,4 +40,4 @@ llm_providers: model_aliases: # Alias for a small faster Claude model arch.claude.code.small.fast: - target: claude-3-haiku-20240307 + target: claude-haiku-4-5-20251001 diff --git a/tests/e2e/response_with_tools.hex b/tests/e2e/response_with_tools.hex new file mode 100644 index 0000000000000000000000000000000000000000..5aa4116588e2d87519ea07ee36f4af03e097de69 GIT binary patch literal 102343 zcmdsA2YeLO_C^$?DvC<84EI?=@5Km;2uKS(EF}t?%|I45yJ1U6z$=Pi0YyPn1eAz? z6crm7Ks3OkKNV~=<$)kl^(g`hivRCy%C?!^+3^0E^Yr%{#jr`fbLZZB+IP+=Q>M(! zGG)s2y`<@*!zv}JS*p|9(3_K~UQwoUhU)QHN2vY0R=2lGqRr*>2L7_DH8ay;w^_Y* zm$UImkIPvlvGC)C-(AlCqsEC$ktnRGwlsD4h;;kNQH~6!D|58l*IHt5!9S_1GfmAF ziA@@aH2Rq$ah&j~*rJ;iN7?md!v_UhfIZ$BYB?{1?)gqiqE6E!vquZ%iU+!0O0!s(l>JXF|$FA!J+hop$t9C zLY}(8IvaR*6Xt6-Uw*yWn7yv3fD104$c-PmUs|^jve8|e6Cl((B-onCiL=ZW%;r|D zUj4%$@6+-WvotI-#M)0uU4B4D%G_`P$y=`Lq9{6bFlJoPy+FQw&v zanJIVW-s?}pVLN4IRdqiRR>sJ0?gM4>g!7`WSb2&;*1JeA<}`dOmMYuqTt3X_H2;a(w| zE%>G$ATJ@t z|Fj0M&nNuXO%~}U<4=r4F64>v*Nz81E=F`pq^=t&o50swPy9uru11S7OxK%xJLyT!3;0L0>= zAeESwwUE|3X7>f;EqE;6glRKOJ_U+fwfWp0dzQWfqxm+3JUIWwst{r=3DR-ko@BEH ziPO{)GH>s9_XGT565wR5!@EruK+n1o@;-;{7l8bXfSylJYhf}_Mc1JTH3kbzmB>I= z!@8%>K@=|WIK^Xk+Tz^(Nj-)9et-9Q5QB|lWJj1*FBmFh;DylTbzz4wg7}&q;_5e^#3LaWGHtbS_#Py5A zn?eql`Cem)MjpSV?5c_9%@)s|k?Bw~kYMtsl(Y_PQ07R;{FEUB0i8>fQ_Ba@w(o_k zRpzp%06m{po`H+nEXG=gDOMrZl-Za8>|Cx*4R=Jy<*8*8fRW9$c~6>=11sc$s~YtL zZZ^i3-I?i&>)tF*A>yhL@{^qJNeisPW7#^m(iF2VnJlZro@U^J5pqMzNy~tm&Gi@& zD~}e|74p|_w@e0nHhy2a)tQDYzplv`set_(NywJD$16e*E@2$SsgA(@iPwKcT3twA zolC!hC~ShLPG3f<>W;KJv3m}d9SfvvlDXJCac>=>F%+9S{_KBiA$vdiYc~is7msEA zxTSxYeIGf4^K?QcCmq}jXk4-b4Gk?il6Ey60fN8$UeO zRJ3=5EN^+9_yF5!oS?H&@Tz@iGk~0Hyf^3jW6~PY(p2Y*M$7$ibv>v-9a+ll9#m8Y?*9Uj~eHN zRLCv#9}^*8U9f)$gyC{IDS8IBkmFZhSqn(Nrlo&j=Cn;_FMXEkwhzy_s4EKD$j;9l zItGcBlT=+Q&KYdBR5q6z$sw;3oMS!FgnaJy8FaCdwPBgHb?0W2K}Tpb_>GHp(7m&s z5Z*QO$(AM~Y@xT7K@{xbm}jBOHTck`a)8ezc4AO_3Ay(89hC8B6LL=W=xC@A0(@Zg zg`DonY6%J0B$#6c`-OZc;q>!>%*AC<+&*XQEiYt_a?Krp&4sx)G&W`pmVTkZiD3Ai zddDHnzwlT-+*fMD@+D&bLkn$I#*E9YY4b9wT=#SKpHN}0A`vi7A+JB3vi~?J?(*e1HFlN zc0KsK$!|i}yo~Z9g#6UIDGdT}af=l-8}yx$GO2{TV&+lu^7+Kn4dt>6FDW5=jhx*P z@VU_Nlo+R5NN>-P*8y-Y9?SlU6Bn6$2EwdmAcm~+sBz!eywXewwUr4>8kQo(! zYzm-UE<%F^aYD{H(q<5Fv$2ZfrE`I)kg|-<9UJOje3b6{guLwAWz``-E*=i&FpSeJ zh-yGy(=N>uRYZZSh-YfE5>~)A#Z+ZXIWt9Vw%KPEr7V#+xJqY zii>hmz)>@>`a(XrENu*Mb0J0RZ5a?*#;>4#%>`%^oz)d`M8lK+1W+y!(#GLHVUzcL_miTX#iJBylG2HiYDj&3rk4g?^+8i&SU z`lhImDW@A!^z&VUdahpkZYJL*N|zYbLPEasNq1^v#w8x7q`JK68k-t@5$Ap11waWb zi=Jx{#SuaduW&V0`cT-=@>E{(t!BS{>OLEv&x+jqO(RL$EGHQluvv}V7D8^?ok4a8 z7miBXh+wTozuurLj`-YYSXKS!qYz>%t$gz*j(0S9_ZV;08|HWmd0qY^FGC70=b!|g zdxgBR{jC2$hA-%CJdl0U2PVG_Bj+IOJp^J;{;s_!<=#;JMu@}32f28W{Edj0zW3x% zNJXKL0h?Ys#ym8+>41)@&6*A9*Rj`tkz+cK9ozDr(K&tK;>(Wj-?E}inM(zU{$}2F zrP@&rq6fN?2P7*f&rcNNQUr;VA`(+XN{atUX^cOlG*&wMdxFAqE^efxgtpqCkD>MX zaHSw>fKMhk>n8fYs^A~{aK-8JDt7cOqVDWAHNn|HffW(-5&9TAfKW4g8|anuqZ$u zQUM$Lz*;Dnl?~c;@?f~_y4_H_4piK`nz$`{{`el6nq|%C_&#F0OED$ z1pddrPz66>2)^ll{FuPY#1{gKtY+KPOs~?({~vS-R;(T+LOef|q925|GYM+J4=4l) zd?H0i9Es5LDCE7hMpK>OR6GNY{&DM&(97Zf^-_zt8_;K^@}L`l#1!+YS@3x+m?Dtk zOmW6BNx-ibg?wgIC34Fs3THW!eR-$QB@MZ6|Lda=G=wODQPQU`I)=8MaRCy%PEz!( zH!#m+DYVIxLhg9yePVf-l6uxy>{tw6gdK!jy=c)>z-&s!OAB)$N4%sB8^FJ?By@#9 zrm%vw1oHT7HWVce_cvvYbo3tFsnvH9Hfvw!tqz$c2eR zD*~S>%n&V%`bjKMO?xKLku!yqpn)}NUJ5#V?Cr$DVsoPv+Ho53N~AKM-~2e$8&Mw` zOV=)Wx0tM|jpw>UMi>d%^ws+*e_#p`Qe%rmG|oii?8bNg0fb-CO7CcMyt>&deerE7 zz*pJa{v=U6Ov$ZkEqmO3D&W>ks7Hx1Q<7yGA~e!Ery=QKAyZd6NiUiby3~NI^mJj? zMW}ZeBbE%N$Fkwnkb;Z5raNOIVrWC4YmSi9{@n8jgfJxmAUEmEgAww_ z7k_*Xl9*BoNXft6ju(EL!T|RGE17IP`-Q8{< zFq%Sd*V-l-RMx@kO-r7WA(WZveUTti2=w)N9()n zA@-`~MFfwn>qA~hvjk764;j;V6}nD6n(@6i;bEX$KuAk_imV1wEwT)zTTHomI{@Yq z$kOiqpaC+>c0h-gy*-F-!6lXlMPbBYfni*J^IYKOa{XP5-WT#;E3@1XhK+8#H(j-j z(tHy_w(nb?+L=&;C5y9sa+cZg5;d~EPREdsVs(KGg1*#m-F!7=`PhVl{O#He4LsMi zDTTbV@^cOd^(DQp(r&}5aRDh~T<%d?S7rD}dVMQd$lF`2AbwRY$;9|lrHjARI}dBD zd2`L9Y`9zl9nq_eoB~>Rkh4eadL9z6vG${6V!|zEj6iIEJq38VoNdMCP_6EW zoVd|G`)IF!O*J3n;gg|pg&qocB;Ib666Ip*m1xr!20?}V+TNbJt*yjkdHUSwFmhI@ zGl>+CE#09Du4z4?1p2|66qYijkVfMM4Rd6IE*|u=xo12jPPv%V`cOv;`SG!KzW^)c z(M!FuR%2O-Fd4BOFyVdC{Ar9<)KBS*fa#yZA>^a|dysL+M!Q&@R!5GQN3o{e(8bjBE$={{~I1mKd;(JuF7kJ`dZaO*#QgJb|^csB`luF2}QfBXmJY0gRb+qqJS0*OC z+0%aqQZAe>rJWvBbgB@t>s?9IEbt3FrCwFHthILay8K(@Vh0 zC7Y~#H)lqa?V?d19(0us95fg`DYk!4MY#ERO1)2u#(w%K@0oby5wg3zQWCIpv3=sv z%y5mq*+``cT+E7iwKBN7TW{I`0l2tG`np)iiCgy&y+0QZhX`ueEraZK?zIm98y5zb zuENypmlV=%tK#Pe4?q$&RLp1!Rr7{K3Sm#y4-kP()G?krv1p1LPUpGVWK(i^5n)?i zGwKqW0})b5&pH7SUd3bicI2Bi%+9@q>02SUHeTI?(Hh68Ke1QPs%_Hsw9!ALHSSdF zLVc6hI1UwxTXX7%l)!wOu$OkPat-q(h6WXK@*8z2P=|W%GM1~B9G-0QTI(y!AU_@0!i5`)xIltPw${`89g$t6w^mA8c2v$K1M_|9cX zwc=Umyt8JS1t8hfr-oSt=6m=_w zE7|H5a@)Ch2QiEee<^UNU5(r&08{QE4RaZ!O< zcX9-QOUS3@eVhx_{}b2V=k(@Fds?K1ITxRC6clPGm{HygWUn9Uqz;2LTux-v7`|r6 zp;zxdLgv*uI~G!KDF6*Uky^S)$i2H0$SC<2o>HFxtFe_Xg7+ung?8n-K@Kj#%vQSQ zD~2P~Ew>X;-=Sq+GdXdV$#-{DTM#pMg{<_%NmbRro}abLb^^_DdWb4rXC-B4Ta_J#8_eK zbRy?+@#mtZ5`~;m^J&scTnHq(9!AWvZ0M{7mSdJ zjmA9-fM3u`mUiQ54F{w|v6z6R<{y4j`SUx81;B;UqI9svm_ryaKrwT=-@c{*PnL~k zTaCu2P2SlBt#D&sg`wqyLVB7!NWn1*^Kob#s%XyY!I1U2siga9yOeqpe2v{-V#64- zz>hEN1S!5INnV&aZIj9Srg6Mm1_7MDA?Mg%U%P8UH7X(Gl3*|t#4Pr^0*(u8=BZPN zKh8yv*n@mDdp#rz`J(4)D*oa^HVs#3L8O@CN|`t5Ow^dwzNX1%BARXk5vIz|?E%!3 zS%n%`k<#1cv3u=VYRrR-X>)&Ms-`D-bnpSL|!N84id74`!r=D zxODZ=*z7{SzvGGJz{th8kG9G6BKIK|KW3rK`E(K_BAyS6PNx1*2r-XA_=SlUMV4NN zAOjoc5CaA5Y3NWCJT_IxtakIMl?#_pxVEJhD5^%Y_tQ6)1K>|+dzAK+W{r!|o9^CnDsT#SN)<(gqUdom~$;Z}!#c?~M}Q#feP#o%xgnXJhP-QQfLD{J^3? z{o%2a#i(1l>6iowN$)r z`v^?AiPd}+)F*4NaRAPRF$9}OdpikPz0XgSx8hLN1WBH_sCg1oPHl>l1TaX(S(s9XDD};*;B}Y zGml*gc;tI?V4#Q27=(-TO?67%a5;D3wqbGZdl#)=-9?QTnyg;J0S4tVf~pC_Ph3#I z+TRKXT*v*FSF%DTE=8SP;7>=QCs0y7xl^NFhiz;mI_m&mgga@gZ@ z1f*aS1-3@nFi0Z^Ik>@xvHXdPI@Gv-gQ0Xdh*UCFjeVNnCdA>3?h z3u)Mdg?$)QKcYmtzbKv|S0q-3L~Mw}VB-4Ro{)5_&LB6?Y7$Oe0qpDXSnitnWJ{Az zMuWIt1ZoIt_BsyuTryJ17`r$Av@9lTbbYT5#31#+!S7Qrb2&4l;W9?DeQ?WjfXC*t zk4h}y!mTWE_!s3d!1iV~496Gaw!h`3>Y+%r^VZ)5vZm4Iy z>+#uGQ!erG%y16_Qlzb;SR4BY%L3Y(3zOIjo+eWJNsinLaw-U z{r!N-g>|57Ko^f%<6`P-^8W*@vQWPYEp?BMy$tKjCEXWyTp@@mG?-FNEzt7ok0F)5 zflOS^VqFb|j63&<1HS;|2Y5Kl`wi1&T0I=V`$V^Cy`{^?Cfc39OdbSkwR=^tzU#d?c5gq}}Q9TAj9N zc?4Z{xsd5os=o_i$X2p+9JnXhWbZmIW#n%$I`oZ^%>kav{bEQTAx6LVvvEMpMT^EC z)Cdr6i?wLZ&Z(2KT^(E{r zDZ=$7pR}OYf*NM8u~`?nV!1ym{~v~;OBXRz6uKQ*?o2d8%5 znKuDApFrodIrY8Spsj{XqasxxWUrTZ`~d8$2>;f>m8O`DALfLL`j?moS;*^qBzS@P zGeUkoJ*|bw$Wuf-Wf(#B~D|o&zDb{B^!PmY++uPHAf(f)#3q7VIP;&)xW4 z6-dIRHn5-H#d=Q&PGJzu=$aKE50@0T66Qc5iIVx? zA^_xKI4TC*CZv9v?cUWJsB`gf;EX7td~YGAzWnqgVEdZzF=%Io3{OeSs*uJuEim2S zj=`-U5$zYt_OYMYO+KJHy?TV4`_0@rz{rJQ=2S+iQ-O^ zSF@o^sX|Ii_3L)HiY=Mp*6i-2KVo!GAeD3{D% zN!#KUF%U>&+|jzquMlYq9*Z|&+61xbo7hQUMi48 z$OQxT+y()*lMpp`rHwH8Z4}1N^bSO<4nC0Qy^l#AE?1IU_4?fUS7>pyzmVA-%UlQG zTs$$&Hy~t%y?gV4k_&+-)_sJID;26!YpqxCST59_(97ig9dby896h7#vp|+l=orKy zCok=nUf4Z6i_h20=q3H#;YMIswSeBgf8r9x`yTfkWtfLcV-xR7Ifs6wlwMO)?jD zmBAADBARL-z9D3?$3vkfE=BPno)cN1LYCXypbrrKEdnrWX!~c6-8tK2gSo$|LmPKS zwn_r2`Q4>tjVved9GK(=`k5kXJ!{%V0OrEPHtG|07IMO$nG`nth?bi{SFMO6nI2`Z zg^>Mv&7(Lr7e^zi;>Rd<4OLn^m7}mo5QFFUy?8ZI6S&l|7`4eGtb{_*gzWHg2WlR$ zoAwxk5nr)vbz|?THF~Aq*7w5)AQYF2R>!7~yk$|g29}g_+&;rY9#vbx&Lz;Svqe zQQXC3MlLO7jOk(_U-iwVuwFhM4oRRmHLj2!et4leuyaWv$D>$-wt3MdB;?y~*P>f`SvKs8G2`4>VC7;yB9YoaWQs&#O$~0+E#&e1 z`kjFDLt58PwJy{*d0pdBks!AJa%uu_bFsk;Dn`AT>?l&-UZpJLd70kD%GJNkHThix zJGQ8gZhMNf`RfF(Z@qzeCWF(~g+i{me#&`3%qN%($Yp3hrgiQ#175Jp(I;ZunYzHv zg)AwN0PoBH~*p`jV1%IJmV3;Or@ zoBN7uvll`#E@A8t+ay?@YMg5{ak}-MHL|dQwszs^DIqf+n?YVbZ5WHI+vxHpuXp$r>dzMnS$^^TPXQs922*huUK*rI zA^+@mf+h%avCm`JVi=Bib~J^B=)Pnz2PMKz5OVp?$hi_v9f80FO)8FiquMoZN-UedAmTo&1l%<&tMM z5)0NWdl0hgM~^kZ!gDe5G$o4S zpV->F4@lsR$5YyjO9IC*>oLf^5z^+YwFyG}Z)|lgJi}Tr4ez9$}3_tqvi- z^n7#%*uExw9D0PI#h^V6GkimF>eNK)^v^+SsA~?ULn1D7S&hw$(UFEmPa!b=V+J+4 z;^OC^PgkbTTjG}ldfmY~8xo0uZ)755qanJIVCQA?*Hz7$##DA8|2h4L9T?f1cs1LmW_s-|X9?F`$lCdHJ3t~XCZe`>L+{bk z+ouC2mqQF+LCd7k$Qc*U>&H+ zHptSh9whpfUd4efxnG<8Dr{DV&F4TP?^rJ*c8x8v=`T!M0KwiT;TVjU(7s@c(;!ri zD&%WN`}YD~avCfrYaQNg@(V##o#t&ToP*8?OMK2Fg6iJbq5jF!NbPm9QazNqwe zyE0X`o;Mz@x-h!*>+_X>eH-EDFtID$@7Tjmjga>*^ZJ1MGdvu|pvJY0B6=+dx&3G! z+01=u-PagPsA8>ME7scMqLM}V<$^(G>G9C^kZo%0*A@fF7rjX{l1nykD_T5ZATQR+E zxU%cEw*p!&*K#B@no1cK zMFcPzA3H-~`pty4YnQxROh#M6vQ!}tEYGS58S+UE2Eo~Qs!)+pEbdvw<(AZD%n3Oy z`OTqNVlE}{1ssLI28Vx8uLcc|;8Ic*Y{VACR)kDSnm!W{x!Bpk_X9@PPrB@Sfa7Ad zYLGTUI9rhhM~|eQy?PhNsc#XtR)siRifY1!O&cc(!4S-=e-@Ik;neDlr55tmJvUql zq+9}o7t^#lFFlD%E#l+WW zWDg-%J=K9KbAZ6oK`KebW zh;I2kE2L_BcN@%Tzuax`Xs&*x^Ju{4;tU!>tySKNXwh%GXFw7%hX=ea|N1NRz*e=n zoL<%G?ci|PM)mW$GOHvOex)H5Y+g~Osx>pyVYm4Sv5iN1T+S+q8LG!)9ibL}pq!e8 z?;j_Ee^6Ivnwl*Vn;DOXLOYG>*%KHku-<1;mzQdX;;dBoOD_syWaCtqXdpb0y02=5 zXy~izb-5fv{fUdOZtn)|a!Cc?>DjDe*|1KvT6=l@c60aA2fZJ~Kh{SOMbYs0no6nEXjb$80E?!LJ^%m! literal 0 HcmV?d00001 diff --git a/tests/e2e/test_model_alias_routing.py b/tests/e2e/test_model_alias_routing.py index 6539deb6..c285bda8 100644 --- a/tests/e2e/test_model_alias_routing.py +++ b/tests/e2e/test_model_alias_routing.py @@ -499,3 +499,138 @@ def test_anthropic_client_with_coding_model_alias_and_tools(): # Should get either text response or tool use blocks for coding assistance assert text_content or len(tool_use_blocks) > 0 + + +@pytest.mark.flaky(retries=0) # Disable retries to see the actual failure +def test_anthropic_client_with_coding_model_alias_and_tools_streaming(): + """Test Anthropic client using 'coding-model' alias (maps to Bedrock) with coding question and tools - streaming""" + logger.info( + "Testing Anthropic client with 'coding-model' alias -> Bedrock with tools (streaming)" + ) + + base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "") + client = anthropic.Anthropic(api_key="test-key", base_url=base_url) + + text_chunks = [] + tool_use_blocks = [] + all_events = [] # Capture all events for debugging + + try: + with client.messages.stream( + model="coding-model", # This should resolve to us.amazon.nova-premier-v1:0 + max_tokens=1000, + messages=[ + { + "role": "user", + "content": "I need to write a Python function that calculates the factorial of a number. Can you help me write and run it?", + } + ], + tools=[ + { + "name": "run_python_code", + "description": "Execute Python code and return the result", + "input_schema": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "Python code to execute", + } + }, + "required": ["code"], + }, + } + ], + tool_choice={"type": "auto"}, + ) as stream: + for event in stream: + # Extract index if available + index = getattr(event, "index", None) + + # Log and capture all events for debugging + all_events.append( + {"type": event.type, "index": index, "event": str(event)[:200]} + ) + logger.info(f"Event #{len(all_events)}: {event.type} [index={index}]") + + # Collect text deltas + if event.type == "content_block_delta" and hasattr(event, "delta"): + if event.delta.type == "text_delta": + text_chunks.append(event.delta.text) + + # Collect tool use blocks + if event.type == "content_block_start" and hasattr( + event, "content_block" + ): + if event.content_block.type == "tool_use": + tool_use_blocks.append(event.content_block) + + final_message = stream.get_final_message() + except Exception as e: + logger.error(f"Exception during streaming: {type(e).__name__}: {e}") + logger.error(f"Events received before error: {len(all_events)}") + logger.error(f"Text chunks collected: {len(text_chunks)}") + logger.error(f"Tool use blocks collected: {len(tool_use_blocks)}") + logger.error("\nLast 20 events before crash:") + for evt in all_events[-20:]: + logger.error(f" {evt['type']:30s} index={evt['index']}") + raise + + full_text = "".join(text_chunks) + logger.info(f"Streaming response from coding-model with tools: {full_text}") + logger.info(f"Total events received: {len(all_events)}") + logger.info( + f"Text chunks: {len(text_chunks)}, Tool use blocks: {len(tool_use_blocks)}" + ) + + # Should get either text response or tool use blocks for coding assistance + # Modified assertion to be more lenient and provide better error messages + assert ( + full_text or len(tool_use_blocks) > 0 + ), f"Expected text or tool use. Got text_len={len(full_text)}, tools={len(tool_use_blocks)}, events={len(all_events)}" + + # Verify final message structure + assert final_message is not None, "Final message should not be None" + assert ( + final_message.content and len(final_message.content) > 0 + ), f"Final message should have content. Got: {final_message.content if final_message else 'None'}" + + +def test_anthropic_client_streaming_with_bedrock(): + """Test Anthropic client using 'coding-model' alias (maps to Bedrock) with streaming""" + logger.info( + "Testing Anthropic client with 'coding-model' alias -> Bedrock (streaming)" + ) + + base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "") + client = anthropic.Anthropic(api_key="test-key", base_url=base_url) + + text_chunks = [] + + with client.messages.stream( + model="coding-model", # This should resolve to us.amazon.nova-premier-v1:0 + max_tokens=500, + messages=[ + { + "role": "user", + "content": "Write a short 4-line sonnet about coding.", + } + ], + ) as stream: + for event in stream: + # Collect text deltas + if event.type == "content_block_delta" and hasattr(event, "delta"): + if event.delta.type == "text_delta": + text_chunks.append(event.delta.text) + + final_message = stream.get_final_message() + + full_text = "".join(text_chunks) + logger.info(f"Response: {full_text}") + + # Should get a text response + assert len(full_text) > 0, "Expected text response from streaming" + + # Verify final message structure + assert final_message is not None + assert final_message.content and len(final_message.content) > 0