Add support for Amazon Bedrock Converse and ConverseStream (#588)

* first commit to get Bedrock Converse API working. Next commit support for streaming and binary frames * adding translation from BedrockBinaryFrameDecoder to AnthropicMessagesEvent * Claude Code works with Amazon Bedrock * added tests for openai streaming from bedrock * PR comments fixed * adding support for bedrock in docs as supported provider * cargo fmt * revertted to chatgpt models for claude code routing --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-288.local> Co-authored-by: Adil Hafeez <adil.hafeez@gmail.com>
2026-04-26 17:26:26 +02:00 · 2025-10-22 11:31:21 -07:00 · 2025-10-22 11:31:21 -07:00 · 9407ae6af7
commit 9407ae6af7
parent ba826b1961
35 changed files with 7362 additions and 1493 deletions
--- a/tests/e2e/test_model_alias_routing.py
+++ b/tests/e2e/test_model_alias_routing.py
@ -403,3 +403,381 @@ def test_anthropic_thinking_mode_streaming():
    final_block_types = [blk.type for blk in final.content]
    assert "text" in final_block_types
    assert "thinking" in final_block_types
+
+
+def test_openai_client_with_coding_model_alias_and_tools():
+    """Test OpenAI client using 'coding-model' alias (maps to Bedrock) with coding question and tools"""
+    logger.info("Testing OpenAI client with 'coding-model' alias -> Bedrock with tools")
+
+    base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
+    client = openai.OpenAI(
+        api_key="test-key",
+        base_url=f"{base_url}/v1",
+    )
+
+    completion = client.chat.completions.create(
+        model="coding-model",  # This should resolve to us.amazon.nova-premier-v1:0
+        max_tokens=1000,
+        messages=[
+            {
+                "role": "user",
+                "content": "I need to write a Python function that calculates the factorial of a number. Can you help me write and run it?",
+            }
+        ],
+        tools=[
+            {
+                "type": "function",
+                "function": {
+                    "name": "run_python_code",
+                    "description": "Execute Python code and return the result",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "code": {
+                                "type": "string",
+                                "description": "Python code to execute",
+                            }
+                        },
+                        "required": ["code"],
+                    },
+                },
+            }
+        ],
+        tool_choice="auto",
+    )
+
+    response_content = completion.choices[0].message.content
+    tool_calls = completion.choices[0].message.tool_calls
+    # Should get either text response or tool calls for coding assistance
+    assert response_content is not None or (
+        tool_calls is not None and len(tool_calls) > 0
+    )
+
+
+def test_anthropic_client_with_coding_model_alias_and_tools():
+    """Test Anthropic client using 'coding-model' alias (maps to Bedrock) with coding question and tools"""
+    logger.info(
+        "Testing Anthropic client with 'coding-model' alias -> Bedrock with tools"
+    )
+
+    base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
+    client = anthropic.Anthropic(api_key="test-key", base_url=base_url)
+
+    message = client.messages.create(
+        model="coding-model",  # This should resolve to us.amazon.nova-premier-v1:0
+        max_tokens=1000,
+        messages=[
+            {
+                "role": "user",
+                "content": "I need to write a Python function that calculates the factorial of a number. Can you help me write and run it?",
+            }
+        ],
+        tools=[
+            {
+                "name": "run_python_code",
+                "description": "Execute Python code and return the result",
+                "input_schema": {
+                    "type": "object",
+                    "properties": {
+                        "code": {
+                            "type": "string",
+                            "description": "Python code to execute",
+                        }
+                    },
+                    "required": ["code"],
+                },
+            }
+        ],
+        tool_choice={"type": "auto"},
+    )
+
+    text_content = "".join(b.text for b in message.content if b.type == "text")
+    tool_use_blocks = [b for b in message.content if b.type == "tool_use"]
+
+    logger.info(f"Response from coding-model alias via Anthropic: {text_content}")
+    logger.info(f"Tool use blocks: {len(tool_use_blocks)}")
+
+    # Should get either text response or tool use blocks for coding assistance
+    assert text_content or len(tool_use_blocks) > 0
+
+
+@pytest.mark.flaky(retries=0)  # Disable retries to see the actual failure
+def test_anthropic_client_with_coding_model_alias_and_tools_streaming():
+    """Test Anthropic client using 'coding-model' alias (maps to Bedrock) with coding question and tools - streaming"""
+    logger.info(
+        "Testing Anthropic client with 'coding-model' alias -> Bedrock with tools (streaming)"
+    )
+
+    base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
+    client = anthropic.Anthropic(api_key="test-key", base_url=base_url)
+
+    text_chunks = []
+    tool_use_blocks = []
+    all_events = []  # Capture all events for debugging
+
+    try:
+        with client.messages.stream(
+            model="coding-model",  # This should resolve to us.amazon.nova-premier-v1:0
+            max_tokens=1000,
+            messages=[
+                {
+                    "role": "user",
+                    "content": "I need to write a Python function that calculates the factorial of a number. Can you help me write and run it?",
+                }
+            ],
+            tools=[
+                {
+                    "name": "run_python_code",
+                    "description": "Execute Python code and return the result",
+                    "input_schema": {
+                        "type": "object",
+                        "properties": {
+                            "code": {
+                                "type": "string",
+                                "description": "Python code to execute",
+                            }
+                        },
+                        "required": ["code"],
+                    },
+                }
+            ],
+            tool_choice={"type": "auto"},
+        ) as stream:
+            for event in stream:
+                # Extract index if available
+                index = getattr(event, "index", None)
+
+                # Log and capture all events for debugging
+                all_events.append(
+                    {"type": event.type, "index": index, "event": str(event)[:200]}
+                )
+                logger.info(f"Event #{len(all_events)}: {event.type} [index={index}]")
+
+                # Collect text deltas
+                if event.type == "content_block_delta" and hasattr(event, "delta"):
+                    if event.delta.type == "text_delta":
+                        text_chunks.append(event.delta.text)
+
+                # Collect tool use blocks
+                if event.type == "content_block_start" and hasattr(
+                    event, "content_block"
+                ):
+                    if event.content_block.type == "tool_use":
+                        tool_use_blocks.append(event.content_block)
+
+            final_message = stream.get_final_message()
+    except Exception as e:
+        logger.error(f"Exception during streaming: {type(e).__name__}: {e}")
+        logger.error(f"Events received before error: {len(all_events)}")
+        logger.error(f"Text chunks collected: {len(text_chunks)}")
+        logger.error(f"Tool use blocks collected: {len(tool_use_blocks)}")
+        logger.error("\nLast 20 events before crash:")
+        for evt in all_events[-20:]:
+            logger.error(f"  {evt['type']:30s} index={evt['index']}")
+        raise
+
+    full_text = "".join(text_chunks)
+    logger.info(f"Streaming response from coding-model with tools: {full_text}")
+    logger.info(f"Total events received: {len(all_events)}")
+    logger.info(
+        f"Text chunks: {len(text_chunks)}, Tool use blocks: {len(tool_use_blocks)}"
+    )
+
+    # Should get either text response or tool use blocks for coding assistance
+    # Modified assertion to be more lenient and provide better error messages
+    assert (
+        full_text or len(tool_use_blocks) > 0
+    ), f"Expected text or tool use. Got text_len={len(full_text)}, tools={len(tool_use_blocks)}, events={len(all_events)}"
+
+    # Verify final message structure
+    assert final_message is not None, "Final message should not be None"
+    assert (
+        final_message.content and len(final_message.content) > 0
+    ), f"Final message should have content. Got: {final_message.content if final_message else 'None'}"
+
+
+def test_anthropic_client_streaming_with_bedrock():
+    """Test Anthropic client using 'coding-model' alias (maps to Bedrock) with streaming"""
+    logger.info(
+        "Testing Anthropic client with 'coding-model' alias -> Bedrock (streaming)"
+    )
+
+    base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
+    client = anthropic.Anthropic(api_key="test-key", base_url=base_url)
+
+    text_chunks = []
+
+    with client.messages.stream(
+        model="coding-model",  # This should resolve to us.amazon.nova-premier-v1:0
+        max_tokens=500,
+        messages=[
+            {
+                "role": "user",
+                "content": "Write a short 4-line sonnet about coding.",
+            }
+        ],
+    ) as stream:
+        for event in stream:
+            # Collect text deltas
+            if event.type == "content_block_delta" and hasattr(event, "delta"):
+                if event.delta.type == "text_delta":
+                    text_chunks.append(event.delta.text)
+
+        final_message = stream.get_final_message()
+
+    full_text = "".join(text_chunks)
+    logger.info(f"Response: {full_text}")
+
+    # Should get a text response
+    assert len(full_text) > 0, "Expected text response from streaming"
+
+    # Verify final message structure
+    assert final_message is not None
+    assert final_message.content and len(final_message.content) > 0
+
+
+def test_openai_client_streaming_with_bedrock():
+    """Test OpenAI client using 'coding-model' alias (maps to Bedrock) with streaming"""
+    logger.info(
+        "Testing OpenAI client with 'coding-model' alias -> Bedrock (streaming)"
+    )
+
+    base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
+    client = openai.OpenAI(
+        api_key="test-key",
+        base_url=f"{base_url}/v1",
+    )
+
+    stream = client.chat.completions.create(
+        model="coding-model",  # This should resolve to us.amazon.nova-premier-v1:0
+        max_tokens=500,
+        messages=[
+            {
+                "role": "user",
+                "content": "Write a short 4-line sonnet about coding.",
+            }
+        ],
+        stream=True,
+    )
+
+    content_chunks = []
+    for chunk in stream:
+        if chunk.choices and len(chunk.choices) > 0:
+            delta = chunk.choices[0].delta
+            if delta.content:
+                content_chunks.append(delta.content)
+
+    full_content = "".join(content_chunks)
+    logger.info(f"Streaming response from coding-model: {full_content}")
+
+    # Should get a text response
+    assert len(full_content) > 0, "Expected text response from streaming"
+
+
+def test_openai_client_streaming_with_bedrock_and_tools():
+    """Test OpenAI client using 'coding-model' alias (maps to Bedrock) with streaming and tools"""
+    logger.info(
+        "Testing OpenAI client with 'coding-model' alias -> Bedrock with tools (streaming)"
+    )
+
+    base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
+    client = openai.OpenAI(
+        api_key="test-key",
+        base_url=f"{base_url}/v1",
+    )
+
+    stream = client.chat.completions.create(
+        model="coding-model",  # This should resolve to us.amazon.nova-premier-v1:0
+        max_tokens=1000,
+        messages=[
+            {
+                "role": "user",
+                "content": "I need to write a Python function that calculates the factorial of a number. Can you help me write and run it?. You should use the tool to run the code.",
+            }
+        ],
+        tools=[
+            {
+                "type": "function",
+                "function": {
+                    "name": "run_python_code",
+                    "description": "Execute Python code and return the result",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "code": {
+                                "type": "string",
+                                "description": "Python code to execute",
+                            }
+                        },
+                        "required": ["code"],
+                    },
+                },
+            }
+        ],
+        tool_choice="auto",
+        stream=True,
+    )
+
+    content_chunks = []
+    tool_calls = []
+    chunk_count = 0
+
+    for chunk in stream:
+        chunk_count += 1
+        if chunk.choices and len(chunk.choices) > 0:
+            delta = chunk.choices[0].delta
+
+            # Log what we see in each chunk
+            has_content = delta.content is not None
+            has_tool_calls = delta.tool_calls is not None
+
+            if (
+                chunk_count % 50 == 0 or has_tool_calls
+            ):  # Log every 50th chunk or any chunk with tool calls
+                logger.info(
+                    f"Chunk {chunk_count}: content={has_content}, tool_calls={has_tool_calls}"
+                )
+                if has_tool_calls:
+                    logger.info(f"  Tool calls in chunk: {delta.tool_calls}")
+
+            # Collect text content
+            if delta.content:
+                content_chunks.append(delta.content)
+
+            # Collect tool calls
+            if delta.tool_calls:
+                for tool_call in delta.tool_calls:
+                    # Extend or create tool call entries
+                    while len(tool_calls) <= tool_call.index:
+                        tool_calls.append(
+                            {
+                                "id": "",
+                                "type": "function",
+                                "function": {"name": "", "arguments": ""},
+                            }
+                        )
+
+                    if tool_call.id:
+                        tool_calls[tool_call.index]["id"] = tool_call.id
+                    if tool_call.function:
+                        if tool_call.function.name:
+                            tool_calls[tool_call.index]["function"][
+                                "name"
+                            ] = tool_call.function.name
+                        if tool_call.function.arguments:
+                            tool_calls[tool_call.index]["function"][
+                                "arguments"
+                            ] += tool_call.function.arguments
+
+    full_content = "".join(content_chunks)
+    logger.info(f"Streaming response from coding-model with tools: {full_content}")
+    logger.info(f"Tool calls collected: {len(tool_calls)}")
+
+    if tool_calls:
+        for i, tc in enumerate(tool_calls):
+            logger.info(f"  Tool call {i}: {tc['function']['name']}")
+
+    # Should get either text response or tool calls for coding assistance
+    assert (
+        full_content or len(tool_calls) > 0
+    ), f"Expected text or tool calls. Got text_len={len(full_content)}, tools={len(tool_calls)}"