mirror of
https://github.com/katanemo/plano.git
synced 2026-04-26 17:26:26 +02:00
Add support for Amazon Bedrock Converse and ConverseStream (#588)
* first commit to get Bedrock Converse API working. Next commit support for streaming and binary frames * adding translation from BedrockBinaryFrameDecoder to AnthropicMessagesEvent * Claude Code works with Amazon Bedrock * added tests for openai streaming from bedrock * PR comments fixed * adding support for bedrock in docs as supported provider * cargo fmt * revertted to chatgpt models for claude code routing --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-288.local> Co-authored-by: Adil Hafeez <adil.hafeez@gmail.com>
This commit is contained in:
parent
ba826b1961
commit
9407ae6af7
35 changed files with 7362 additions and 1493 deletions
|
|
@ -403,3 +403,381 @@ def test_anthropic_thinking_mode_streaming():
|
|||
final_block_types = [blk.type for blk in final.content]
|
||||
assert "text" in final_block_types
|
||||
assert "thinking" in final_block_types
|
||||
|
||||
|
||||
def test_openai_client_with_coding_model_alias_and_tools():
|
||||
"""Test OpenAI client using 'coding-model' alias (maps to Bedrock) with coding question and tools"""
|
||||
logger.info("Testing OpenAI client with 'coding-model' alias -> Bedrock with tools")
|
||||
|
||||
base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
|
||||
client = openai.OpenAI(
|
||||
api_key="test-key",
|
||||
base_url=f"{base_url}/v1",
|
||||
)
|
||||
|
||||
completion = client.chat.completions.create(
|
||||
model="coding-model", # This should resolve to us.amazon.nova-premier-v1:0
|
||||
max_tokens=1000,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "I need to write a Python function that calculates the factorial of a number. Can you help me write and run it?",
|
||||
}
|
||||
],
|
||||
tools=[
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "run_python_code",
|
||||
"description": "Execute Python code and return the result",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {
|
||||
"type": "string",
|
||||
"description": "Python code to execute",
|
||||
}
|
||||
},
|
||||
"required": ["code"],
|
||||
},
|
||||
},
|
||||
}
|
||||
],
|
||||
tool_choice="auto",
|
||||
)
|
||||
|
||||
response_content = completion.choices[0].message.content
|
||||
tool_calls = completion.choices[0].message.tool_calls
|
||||
# Should get either text response or tool calls for coding assistance
|
||||
assert response_content is not None or (
|
||||
tool_calls is not None and len(tool_calls) > 0
|
||||
)
|
||||
|
||||
|
||||
def test_anthropic_client_with_coding_model_alias_and_tools():
|
||||
"""Test Anthropic client using 'coding-model' alias (maps to Bedrock) with coding question and tools"""
|
||||
logger.info(
|
||||
"Testing Anthropic client with 'coding-model' alias -> Bedrock with tools"
|
||||
)
|
||||
|
||||
base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
|
||||
client = anthropic.Anthropic(api_key="test-key", base_url=base_url)
|
||||
|
||||
message = client.messages.create(
|
||||
model="coding-model", # This should resolve to us.amazon.nova-premier-v1:0
|
||||
max_tokens=1000,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "I need to write a Python function that calculates the factorial of a number. Can you help me write and run it?",
|
||||
}
|
||||
],
|
||||
tools=[
|
||||
{
|
||||
"name": "run_python_code",
|
||||
"description": "Execute Python code and return the result",
|
||||
"input_schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {
|
||||
"type": "string",
|
||||
"description": "Python code to execute",
|
||||
}
|
||||
},
|
||||
"required": ["code"],
|
||||
},
|
||||
}
|
||||
],
|
||||
tool_choice={"type": "auto"},
|
||||
)
|
||||
|
||||
text_content = "".join(b.text for b in message.content if b.type == "text")
|
||||
tool_use_blocks = [b for b in message.content if b.type == "tool_use"]
|
||||
|
||||
logger.info(f"Response from coding-model alias via Anthropic: {text_content}")
|
||||
logger.info(f"Tool use blocks: {len(tool_use_blocks)}")
|
||||
|
||||
# Should get either text response or tool use blocks for coding assistance
|
||||
assert text_content or len(tool_use_blocks) > 0
|
||||
|
||||
|
||||
@pytest.mark.flaky(retries=0) # Disable retries to see the actual failure
|
||||
def test_anthropic_client_with_coding_model_alias_and_tools_streaming():
|
||||
"""Test Anthropic client using 'coding-model' alias (maps to Bedrock) with coding question and tools - streaming"""
|
||||
logger.info(
|
||||
"Testing Anthropic client with 'coding-model' alias -> Bedrock with tools (streaming)"
|
||||
)
|
||||
|
||||
base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
|
||||
client = anthropic.Anthropic(api_key="test-key", base_url=base_url)
|
||||
|
||||
text_chunks = []
|
||||
tool_use_blocks = []
|
||||
all_events = [] # Capture all events for debugging
|
||||
|
||||
try:
|
||||
with client.messages.stream(
|
||||
model="coding-model", # This should resolve to us.amazon.nova-premier-v1:0
|
||||
max_tokens=1000,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "I need to write a Python function that calculates the factorial of a number. Can you help me write and run it?",
|
||||
}
|
||||
],
|
||||
tools=[
|
||||
{
|
||||
"name": "run_python_code",
|
||||
"description": "Execute Python code and return the result",
|
||||
"input_schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {
|
||||
"type": "string",
|
||||
"description": "Python code to execute",
|
||||
}
|
||||
},
|
||||
"required": ["code"],
|
||||
},
|
||||
}
|
||||
],
|
||||
tool_choice={"type": "auto"},
|
||||
) as stream:
|
||||
for event in stream:
|
||||
# Extract index if available
|
||||
index = getattr(event, "index", None)
|
||||
|
||||
# Log and capture all events for debugging
|
||||
all_events.append(
|
||||
{"type": event.type, "index": index, "event": str(event)[:200]}
|
||||
)
|
||||
logger.info(f"Event #{len(all_events)}: {event.type} [index={index}]")
|
||||
|
||||
# Collect text deltas
|
||||
if event.type == "content_block_delta" and hasattr(event, "delta"):
|
||||
if event.delta.type == "text_delta":
|
||||
text_chunks.append(event.delta.text)
|
||||
|
||||
# Collect tool use blocks
|
||||
if event.type == "content_block_start" and hasattr(
|
||||
event, "content_block"
|
||||
):
|
||||
if event.content_block.type == "tool_use":
|
||||
tool_use_blocks.append(event.content_block)
|
||||
|
||||
final_message = stream.get_final_message()
|
||||
except Exception as e:
|
||||
logger.error(f"Exception during streaming: {type(e).__name__}: {e}")
|
||||
logger.error(f"Events received before error: {len(all_events)}")
|
||||
logger.error(f"Text chunks collected: {len(text_chunks)}")
|
||||
logger.error(f"Tool use blocks collected: {len(tool_use_blocks)}")
|
||||
logger.error("\nLast 20 events before crash:")
|
||||
for evt in all_events[-20:]:
|
||||
logger.error(f" {evt['type']:30s} index={evt['index']}")
|
||||
raise
|
||||
|
||||
full_text = "".join(text_chunks)
|
||||
logger.info(f"Streaming response from coding-model with tools: {full_text}")
|
||||
logger.info(f"Total events received: {len(all_events)}")
|
||||
logger.info(
|
||||
f"Text chunks: {len(text_chunks)}, Tool use blocks: {len(tool_use_blocks)}"
|
||||
)
|
||||
|
||||
# Should get either text response or tool use blocks for coding assistance
|
||||
# Modified assertion to be more lenient and provide better error messages
|
||||
assert (
|
||||
full_text or len(tool_use_blocks) > 0
|
||||
), f"Expected text or tool use. Got text_len={len(full_text)}, tools={len(tool_use_blocks)}, events={len(all_events)}"
|
||||
|
||||
# Verify final message structure
|
||||
assert final_message is not None, "Final message should not be None"
|
||||
assert (
|
||||
final_message.content and len(final_message.content) > 0
|
||||
), f"Final message should have content. Got: {final_message.content if final_message else 'None'}"
|
||||
|
||||
|
||||
def test_anthropic_client_streaming_with_bedrock():
|
||||
"""Test Anthropic client using 'coding-model' alias (maps to Bedrock) with streaming"""
|
||||
logger.info(
|
||||
"Testing Anthropic client with 'coding-model' alias -> Bedrock (streaming)"
|
||||
)
|
||||
|
||||
base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
|
||||
client = anthropic.Anthropic(api_key="test-key", base_url=base_url)
|
||||
|
||||
text_chunks = []
|
||||
|
||||
with client.messages.stream(
|
||||
model="coding-model", # This should resolve to us.amazon.nova-premier-v1:0
|
||||
max_tokens=500,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Write a short 4-line sonnet about coding.",
|
||||
}
|
||||
],
|
||||
) as stream:
|
||||
for event in stream:
|
||||
# Collect text deltas
|
||||
if event.type == "content_block_delta" and hasattr(event, "delta"):
|
||||
if event.delta.type == "text_delta":
|
||||
text_chunks.append(event.delta.text)
|
||||
|
||||
final_message = stream.get_final_message()
|
||||
|
||||
full_text = "".join(text_chunks)
|
||||
logger.info(f"Response: {full_text}")
|
||||
|
||||
# Should get a text response
|
||||
assert len(full_text) > 0, "Expected text response from streaming"
|
||||
|
||||
# Verify final message structure
|
||||
assert final_message is not None
|
||||
assert final_message.content and len(final_message.content) > 0
|
||||
|
||||
|
||||
def test_openai_client_streaming_with_bedrock():
|
||||
"""Test OpenAI client using 'coding-model' alias (maps to Bedrock) with streaming"""
|
||||
logger.info(
|
||||
"Testing OpenAI client with 'coding-model' alias -> Bedrock (streaming)"
|
||||
)
|
||||
|
||||
base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
|
||||
client = openai.OpenAI(
|
||||
api_key="test-key",
|
||||
base_url=f"{base_url}/v1",
|
||||
)
|
||||
|
||||
stream = client.chat.completions.create(
|
||||
model="coding-model", # This should resolve to us.amazon.nova-premier-v1:0
|
||||
max_tokens=500,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Write a short 4-line sonnet about coding.",
|
||||
}
|
||||
],
|
||||
stream=True,
|
||||
)
|
||||
|
||||
content_chunks = []
|
||||
for chunk in stream:
|
||||
if chunk.choices and len(chunk.choices) > 0:
|
||||
delta = chunk.choices[0].delta
|
||||
if delta.content:
|
||||
content_chunks.append(delta.content)
|
||||
|
||||
full_content = "".join(content_chunks)
|
||||
logger.info(f"Streaming response from coding-model: {full_content}")
|
||||
|
||||
# Should get a text response
|
||||
assert len(full_content) > 0, "Expected text response from streaming"
|
||||
|
||||
|
||||
def test_openai_client_streaming_with_bedrock_and_tools():
|
||||
"""Test OpenAI client using 'coding-model' alias (maps to Bedrock) with streaming and tools"""
|
||||
logger.info(
|
||||
"Testing OpenAI client with 'coding-model' alias -> Bedrock with tools (streaming)"
|
||||
)
|
||||
|
||||
base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
|
||||
client = openai.OpenAI(
|
||||
api_key="test-key",
|
||||
base_url=f"{base_url}/v1",
|
||||
)
|
||||
|
||||
stream = client.chat.completions.create(
|
||||
model="coding-model", # This should resolve to us.amazon.nova-premier-v1:0
|
||||
max_tokens=1000,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "I need to write a Python function that calculates the factorial of a number. Can you help me write and run it?. You should use the tool to run the code.",
|
||||
}
|
||||
],
|
||||
tools=[
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "run_python_code",
|
||||
"description": "Execute Python code and return the result",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {
|
||||
"type": "string",
|
||||
"description": "Python code to execute",
|
||||
}
|
||||
},
|
||||
"required": ["code"],
|
||||
},
|
||||
},
|
||||
}
|
||||
],
|
||||
tool_choice="auto",
|
||||
stream=True,
|
||||
)
|
||||
|
||||
content_chunks = []
|
||||
tool_calls = []
|
||||
chunk_count = 0
|
||||
|
||||
for chunk in stream:
|
||||
chunk_count += 1
|
||||
if chunk.choices and len(chunk.choices) > 0:
|
||||
delta = chunk.choices[0].delta
|
||||
|
||||
# Log what we see in each chunk
|
||||
has_content = delta.content is not None
|
||||
has_tool_calls = delta.tool_calls is not None
|
||||
|
||||
if (
|
||||
chunk_count % 50 == 0 or has_tool_calls
|
||||
): # Log every 50th chunk or any chunk with tool calls
|
||||
logger.info(
|
||||
f"Chunk {chunk_count}: content={has_content}, tool_calls={has_tool_calls}"
|
||||
)
|
||||
if has_tool_calls:
|
||||
logger.info(f" Tool calls in chunk: {delta.tool_calls}")
|
||||
|
||||
# Collect text content
|
||||
if delta.content:
|
||||
content_chunks.append(delta.content)
|
||||
|
||||
# Collect tool calls
|
||||
if delta.tool_calls:
|
||||
for tool_call in delta.tool_calls:
|
||||
# Extend or create tool call entries
|
||||
while len(tool_calls) <= tool_call.index:
|
||||
tool_calls.append(
|
||||
{
|
||||
"id": "",
|
||||
"type": "function",
|
||||
"function": {"name": "", "arguments": ""},
|
||||
}
|
||||
)
|
||||
|
||||
if tool_call.id:
|
||||
tool_calls[tool_call.index]["id"] = tool_call.id
|
||||
if tool_call.function:
|
||||
if tool_call.function.name:
|
||||
tool_calls[tool_call.index]["function"][
|
||||
"name"
|
||||
] = tool_call.function.name
|
||||
if tool_call.function.arguments:
|
||||
tool_calls[tool_call.index]["function"][
|
||||
"arguments"
|
||||
] += tool_call.function.arguments
|
||||
|
||||
full_content = "".join(content_chunks)
|
||||
logger.info(f"Streaming response from coding-model with tools: {full_content}")
|
||||
logger.info(f"Tool calls collected: {len(tool_calls)}")
|
||||
|
||||
if tool_calls:
|
||||
for i, tc in enumerate(tool_calls):
|
||||
logger.info(f" Tool call {i}: {tc['function']['name']}")
|
||||
|
||||
# Should get either text response or tool calls for coding assistance
|
||||
assert (
|
||||
full_content or len(tool_calls) > 0
|
||||
), f"Expected text or tool calls. Got text_len={len(full_content)}, tools={len(tool_calls)}"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue