mirror of
https://github.com/katanemo/plano.git
synced 2026-04-25 00:36:34 +02:00
enable state management for v1/responses (#631)
* first commit with tests to enable state mamangement via memory * fixed logs to follow the conversational flow a bit better * added support for supabase * added the state_storage_v1_responses flag, and use that to store state appropriately * cleaned up logs and fixed issue with connectivity for llm gateway in weather forecast demo * fixed mixed inputs from openai v1/responses api (#632) * fixed mixed inputs from openai v1/responses api * removing tracing from model-alias-rouing * handling additional input types from openairs --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local> * resolving PR comments --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local>
This commit is contained in:
parent
33e90dd338
commit
d5a273f740
26 changed files with 2687 additions and 76 deletions
25
tests/e2e/arch_config_memory_state_v1_responses.yaml
Normal file
25
tests/e2e/arch_config_memory_state_v1_responses.yaml
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
version: v0.1
|
||||
|
||||
listeners:
|
||||
egress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 12000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
|
||||
# OpenAI Models
|
||||
- model: openai/gpt-5-mini-2025-08-07
|
||||
access_key: $OPENAI_API_KEY
|
||||
default: true
|
||||
|
||||
# Anthropic Models
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
# State storage configuration for v1/responses API
|
||||
# Manages conversation state for multi-turn conversations
|
||||
state_storage:
|
||||
# Type: memory | postgres
|
||||
type: memory
|
||||
|
|
@ -69,6 +69,14 @@ log running e2e tests for openai responses api client
|
|||
log ========================================
|
||||
poetry run pytest test_openai_responses_api_client.py
|
||||
|
||||
log startup arch gateway with state storage for openai responses api client demo
|
||||
archgw down
|
||||
archgw up arch_config_memory_state_v1_responses.yaml
|
||||
|
||||
log running e2e tests for openai responses api client
|
||||
log ========================================
|
||||
poetry run pytest test_openai_responses_api_client_with_state.py
|
||||
|
||||
log shutting down the weather_forecast demo
|
||||
log =======================================
|
||||
cd ../../demos/samples_python/weather_forecast
|
||||
|
|
|
|||
218
tests/e2e/test_openai_responses_api_client_with_state.py
Normal file
218
tests/e2e/test_openai_responses_api_client_with_state.py
Normal file
|
|
@ -0,0 +1,218 @@
|
|||
import openai
|
||||
import pytest
|
||||
import os
|
||||
import logging
|
||||
import sys
|
||||
|
||||
# Set up logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
||||
handlers=[logging.StreamHandler(sys.stdout)],
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
LLM_GATEWAY_ENDPOINT = os.getenv(
|
||||
"LLM_GATEWAY_ENDPOINT", "http://localhost:12000/v1/chat/completions"
|
||||
)
|
||||
|
||||
|
||||
def test_conversation_state_management_two_turn():
|
||||
"""
|
||||
Test conversation state management across two turns:
|
||||
1. Send initial message to non-OpenAI model via v1/responses
|
||||
2. Capture response_id from first response
|
||||
3. Send second message with previous_response_id
|
||||
4. Verify model receives both messages in correct order
|
||||
"""
|
||||
base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
|
||||
client = openai.OpenAI(api_key="test-key", base_url=f"{base_url}/v1")
|
||||
|
||||
logger.info("\n" + "=" * 80)
|
||||
logger.info("TEST: Conversation State Management - Two Turn Flow")
|
||||
logger.info("=" * 80)
|
||||
|
||||
# Turn 1: Send initial message to Anthropic (non-OpenAI model)
|
||||
logger.info("\n[TURN 1] Sending initial message...")
|
||||
resp1 = client.responses.create(
|
||||
model="claude-sonnet-4-20250514",
|
||||
input="My name is Alice and I like pizza.",
|
||||
)
|
||||
|
||||
# Extract response_id from first response
|
||||
response_id_1 = resp1.id
|
||||
logger.info(f"[TURN 1] Received response_id: {response_id_1}")
|
||||
logger.info(f"[TURN 1] Model response: {resp1.output_text}")
|
||||
|
||||
assert response_id_1 is not None, "First response should have an id"
|
||||
assert len(resp1.output_text) > 0, "First response should have content"
|
||||
|
||||
# Turn 2: Send follow-up message with previous_response_id
|
||||
# Ask the model to list all messages to verify state was combined
|
||||
logger.info(
|
||||
f"\n[TURN 2] Sending follow-up with previous_response_id={response_id_1}"
|
||||
)
|
||||
resp2 = client.responses.create(
|
||||
model="claude-sonnet-4-20250514",
|
||||
input="Please list all the messages you have received in our conversation, numbering each one.",
|
||||
previous_response_id=response_id_1,
|
||||
)
|
||||
|
||||
response_id_2 = resp2.id
|
||||
logger.info(f"[TURN 2] Received response_id: {response_id_2}")
|
||||
logger.info(f"[TURN 2] Model response: {resp2.output_text}")
|
||||
|
||||
assert response_id_2 is not None, "Second response should have an id"
|
||||
assert response_id_2 != response_id_1, "Second response should have different id"
|
||||
|
||||
# Verify the model received the conversation history
|
||||
# The response should reference both the initial message and the follow-up
|
||||
response_lower = resp2.output_text.lower()
|
||||
|
||||
# Check if the model acknowledges receiving multiple messages
|
||||
# Different models might format this differently, so we check for various indicators
|
||||
has_conversation_context = (
|
||||
"alice" in response_lower
|
||||
or "pizza" in response_lower # References the name from turn 1
|
||||
or "two" in response_lower # References the preference from turn 1
|
||||
or "2" in response_lower # Mentions number of messages
|
||||
or "first" in response_lower # Numeric indicator
|
||||
or "second" # References first message
|
||||
in response_lower # References second message
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"\n[VALIDATION] Conversation context preserved: {has_conversation_context}"
|
||||
)
|
||||
logger.info(
|
||||
f"[VALIDATION] Response contains conversation markers: {has_conversation_context}"
|
||||
)
|
||||
|
||||
print(f"\n{'='*80}")
|
||||
print("Conversation State Test Results:")
|
||||
print(f"Turn 1 Response ID: {response_id_1}")
|
||||
print(f"Turn 2 Response ID: {response_id_2}")
|
||||
print(f"Turn 1 Output: {resp1.output_text[:100]}...")
|
||||
print(f"Turn 2 Output: {resp2.output_text}")
|
||||
print(f"Conversation Context Preserved: {has_conversation_context}")
|
||||
print(f"{'='*80}\n")
|
||||
|
||||
assert has_conversation_context, (
|
||||
f"Model should have received conversation history. "
|
||||
f"Response: {resp2.output_text}"
|
||||
)
|
||||
|
||||
|
||||
def test_conversation_state_management_two_turn_streaming():
|
||||
"""
|
||||
Test conversation state management across two turns with streaming:
|
||||
1. Send initial streaming message to non-OpenAI model via v1/responses
|
||||
2. Capture response_id from first response
|
||||
3. Send second streaming message with previous_response_id
|
||||
4. Verify model receives both messages in correct order
|
||||
"""
|
||||
base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
|
||||
client = openai.OpenAI(api_key="test-key", base_url=f"{base_url}/v1")
|
||||
|
||||
logger.info("\n" + "=" * 80)
|
||||
logger.info("TEST: Conversation State Management - Two Turn Streaming Flow")
|
||||
logger.info("=" * 80)
|
||||
|
||||
# Turn 1: Send initial streaming message to Anthropic (non-OpenAI model)
|
||||
logger.info("\n[TURN 1] Sending initial streaming message...")
|
||||
stream1 = client.responses.create(
|
||||
model="claude-sonnet-4-20250514",
|
||||
input="My name is Alice and I like pizza.",
|
||||
stream=True,
|
||||
)
|
||||
|
||||
# Collect streamed content and capture response_id
|
||||
text_chunks_1 = []
|
||||
response_id_1 = None
|
||||
|
||||
for event in stream1:
|
||||
if getattr(event, "type", None) == "response.output_text.delta" and getattr(
|
||||
event, "delta", None
|
||||
):
|
||||
text_chunks_1.append(event.delta)
|
||||
|
||||
# Capture response_id from response.completed event
|
||||
if getattr(event, "type", None) == "response.completed" and getattr(
|
||||
event, "response", None
|
||||
):
|
||||
response_id_1 = event.response.id
|
||||
|
||||
output_1 = "".join(text_chunks_1)
|
||||
logger.info(f"[TURN 1] Received response_id: {response_id_1}")
|
||||
logger.info(f"[TURN 1] Model response: {output_1}")
|
||||
|
||||
assert response_id_1 is not None, "First response should have an id"
|
||||
assert len(output_1) > 0, "First response should have content"
|
||||
|
||||
# Turn 2: Send follow-up streaming message with previous_response_id
|
||||
logger.info(
|
||||
f"\n[TURN 2] Sending follow-up streaming request with previous_response_id={response_id_1}"
|
||||
)
|
||||
stream2 = client.responses.create(
|
||||
model="claude-sonnet-4-20250514",
|
||||
input="Please list all the messages you have received in our conversation, numbering each one.",
|
||||
previous_response_id=response_id_1,
|
||||
stream=True,
|
||||
)
|
||||
|
||||
# Collect streamed content from second response
|
||||
text_chunks_2 = []
|
||||
response_id_2 = None
|
||||
|
||||
for event in stream2:
|
||||
if getattr(event, "type", None) == "response.output_text.delta" and getattr(
|
||||
event, "delta", None
|
||||
):
|
||||
text_chunks_2.append(event.delta)
|
||||
|
||||
# Capture response_id from response.completed event
|
||||
if getattr(event, "type", None) == "response.completed" and getattr(
|
||||
event, "response", None
|
||||
):
|
||||
response_id_2 = event.response.id
|
||||
|
||||
output_2 = "".join(text_chunks_2)
|
||||
logger.info(f"[TURN 2] Received response_id: {response_id_2}")
|
||||
logger.info(f"[TURN 2] Model response: {output_2}")
|
||||
|
||||
assert response_id_2 is not None, "Second response should have an id"
|
||||
assert response_id_2 != response_id_1, "Second response should have different id"
|
||||
|
||||
# Verify the model received the conversation history
|
||||
response_lower = output_2.lower()
|
||||
|
||||
# Check if the model acknowledges receiving multiple messages
|
||||
has_conversation_context = (
|
||||
"alice" in response_lower
|
||||
or "pizza" in response_lower # References the name from turn 1
|
||||
or "two" in response_lower # References the preference from turn 1
|
||||
or "2" in response_lower # Mentions number of messages
|
||||
or "first" in response_lower # Numeric indicator
|
||||
or "second" # References first message
|
||||
in response_lower # References second message
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"\n[VALIDATION] Conversation context preserved: {has_conversation_context}"
|
||||
)
|
||||
logger.info(
|
||||
f"[VALIDATION] Response contains conversation markers: {has_conversation_context}"
|
||||
)
|
||||
|
||||
print(f"\n{'='*80}")
|
||||
print("Streaming Conversation State Test Results:")
|
||||
print(f"Turn 1 Response ID: {response_id_1}")
|
||||
print(f"Turn 2 Response ID: {response_id_2}")
|
||||
print(f"Turn 1 Output: {output_1[:100]}...")
|
||||
print(f"Turn 2 Output: {output_2}")
|
||||
print(f"Conversation Context Preserved: {has_conversation_context}")
|
||||
print(f"{'='*80}\n")
|
||||
|
||||
assert has_conversation_context, (
|
||||
f"Model should have received conversation history. " f"Response: {output_2}"
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue