mirror of
https://github.com/katanemo/plano.git
synced 2026-06-29 15:49:40 +02:00
cleaned up logs and fixed issue with connectivity for llm gateway in weather forecast demo
This commit is contained in:
parent
1212b526b8
commit
e49ff4bbf4
8 changed files with 260 additions and 220 deletions
|
|
@ -3,9 +3,9 @@ use brightstaff::handlers::llm::llm_chat;
|
||||||
use brightstaff::handlers::models::list_models;
|
use brightstaff::handlers::models::list_models;
|
||||||
use brightstaff::handlers::function_calling::{function_calling_chat_handler};
|
use brightstaff::handlers::function_calling::{function_calling_chat_handler};
|
||||||
use brightstaff::router::llm_router::RouterService;
|
use brightstaff::router::llm_router::RouterService;
|
||||||
use brightstaff::state::memory::MemoryConversationalStorage;
|
|
||||||
use brightstaff::state::StateStorage;
|
use brightstaff::state::StateStorage;
|
||||||
use brightstaff::state::supabase::SupabaseConversationalStorage;
|
use brightstaff::state::postgresql::PostgreSQLConversationStorage;
|
||||||
|
use brightstaff::state::memory::MemoryConversationalStorage;
|
||||||
use brightstaff::utils::tracing::init_tracer;
|
use brightstaff::utils::tracing::init_tracer;
|
||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
use common::configuration::Configuration;
|
use common::configuration::Configuration;
|
||||||
|
|
@ -123,7 +123,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||||
debug!("Postgres connection string (full): {}", connection_string);
|
debug!("Postgres connection string (full): {}", connection_string);
|
||||||
info!("Initializing conversation state storage: Postgres");
|
info!("Initializing conversation state storage: Postgres");
|
||||||
Arc::new(
|
Arc::new(
|
||||||
SupabaseConversationalStorage::new(connection_string.clone())
|
PostgreSQLConversationStorage::new(connection_string.clone())
|
||||||
.await
|
.await
|
||||||
.expect("Failed to initialize Postgres state storage"),
|
.expect("Failed to initialize Postgres state storage"),
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ use tracing::{debug};
|
||||||
|
|
||||||
pub mod memory;
|
pub mod memory;
|
||||||
pub mod response_state_processor;
|
pub mod response_state_processor;
|
||||||
pub mod supabase;
|
pub mod postgresql;
|
||||||
|
|
||||||
/// Represents the conversational state for a v1/responses request
|
/// Represents the conversational state for a v1/responses request
|
||||||
/// Contains the complete input/output history that can be restored
|
/// Contains the complete input/output history that can be restored
|
||||||
|
|
|
||||||
|
|
@ -8,12 +8,12 @@ use tracing::{debug, info, warn};
|
||||||
|
|
||||||
/// Supabase/PostgreSQL storage backend for conversation state
|
/// Supabase/PostgreSQL storage backend for conversation state
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct SupabaseConversationalStorage {
|
pub struct PostgreSQLConversationStorage {
|
||||||
client: Arc<Client>,
|
client: Arc<Client>,
|
||||||
table_verified: Arc<OnceCell<()>>,
|
table_verified: Arc<OnceCell<()>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SupabaseConversationalStorage {
|
impl PostgreSQLConversationStorage {
|
||||||
/// Creates a new Supabase storage instance with the given connection string
|
/// Creates a new Supabase storage instance with the given connection string
|
||||||
pub async fn new(connection_string: String) -> Result<Self, StateStorageError> {
|
pub async fn new(connection_string: String) -> Result<Self, StateStorageError> {
|
||||||
let (client, connection) = tokio_postgres::connect(&connection_string, NoTls)
|
let (client, connection) = tokio_postgres::connect(&connection_string, NoTls)
|
||||||
|
|
@ -76,7 +76,7 @@ impl SupabaseConversationalStorage {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
impl StateStorage for SupabaseConversationalStorage {
|
impl StateStorage for PostgreSQLConversationStorage {
|
||||||
async fn put(&self, state: OpenAIConversationState) -> Result<(), StateStorageError> {
|
async fn put(&self, state: OpenAIConversationState) -> Result<(), StateStorageError> {
|
||||||
self.ensure_ready().await?;
|
self.ensure_ready().await?;
|
||||||
|
|
||||||
|
|
@ -251,9 +251,9 @@ mod tests {
|
||||||
// Set TEST_DATABASE_URL environment variable to run integration tests
|
// Set TEST_DATABASE_URL environment variable to run integration tests
|
||||||
// Example: TEST_DATABASE_URL=postgresql://user:pass@localhost/test_db
|
// Example: TEST_DATABASE_URL=postgresql://user:pass@localhost/test_db
|
||||||
|
|
||||||
async fn get_test_storage() -> Option<SupabaseConversationalStorage> {
|
async fn get_test_storage() -> Option<PostgreSQLConversationStorage> {
|
||||||
if let Ok(db_url) = std::env::var("TEST_DATABASE_URL") {
|
if let Ok(db_url) = std::env::var("TEST_DATABASE_URL") {
|
||||||
match SupabaseConversationalStorage::new(db_url).await {
|
match PostgreSQLConversationStorage::new(db_url).await {
|
||||||
Ok(storage) => Some(storage),
|
Ok(storage) => Some(storage),
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
eprintln!("Failed to create test storage: {}", e);
|
eprintln!("Failed to create test storage: {}", e);
|
||||||
|
|
@ -92,13 +92,3 @@ model_aliases:
|
||||||
|
|
||||||
tracing:
|
tracing:
|
||||||
random_sampling: 100
|
random_sampling: 100
|
||||||
|
|
||||||
state_storage:
|
|
||||||
# Type: memory | postgres
|
|
||||||
type: postgres
|
|
||||||
|
|
||||||
# Connection string for postgres type
|
|
||||||
# Environment variables are supported using $VAR_NAME or ${VAR_NAME} syntax
|
|
||||||
# Variables MUST be set before running config validation/rendering
|
|
||||||
# Example with environment variable substitution:
|
|
||||||
connection_string: "postgresql://postgres.saueycoonskiktmozyvp:$DB_PASSWORD@aws-0-us-west-2.pooler.supabase.com:5432/postgres"
|
|
||||||
|
|
|
||||||
25
tests/e2e/arch_config_memory_state_v1_responses.yaml
Normal file
25
tests/e2e/arch_config_memory_state_v1_responses.yaml
Normal file
|
|
@ -0,0 +1,25 @@
|
||||||
|
version: v0.1
|
||||||
|
|
||||||
|
listeners:
|
||||||
|
egress_traffic:
|
||||||
|
address: 0.0.0.0
|
||||||
|
port: 12000
|
||||||
|
message_format: openai
|
||||||
|
timeout: 30s
|
||||||
|
|
||||||
|
llm_providers:
|
||||||
|
|
||||||
|
# OpenAI Models
|
||||||
|
- model: openai/gpt-5-mini-2025-08-07
|
||||||
|
access_key: $OPENAI_API_KEY
|
||||||
|
default: true
|
||||||
|
|
||||||
|
# Anthropic Models
|
||||||
|
- model: anthropic/claude-sonnet-4-20250514
|
||||||
|
access_key: $ANTHROPIC_API_KEY
|
||||||
|
|
||||||
|
# State storage configuration for v1/responses API
|
||||||
|
# Manages conversation state for multi-turn conversations
|
||||||
|
state_storage_v1_responses:
|
||||||
|
# Type: memory | postgres
|
||||||
|
type: memory
|
||||||
|
|
@ -69,6 +69,14 @@ log running e2e tests for openai responses api client
|
||||||
log ========================================
|
log ========================================
|
||||||
poetry run pytest test_openai_responses_api_client.py
|
poetry run pytest test_openai_responses_api_client.py
|
||||||
|
|
||||||
|
log startup arch gateway with state storage for openai responses api client demo
|
||||||
|
archgw down
|
||||||
|
archgw up arch_config_memory_state_v1_responses.yaml
|
||||||
|
|
||||||
|
log running e2e tests for openai responses api client
|
||||||
|
log ========================================
|
||||||
|
poetry run pytest test_openai_responses_api_client_with_state.py
|
||||||
|
|
||||||
log shutting down the weather_forecast demo
|
log shutting down the weather_forecast demo
|
||||||
log =======================================
|
log =======================================
|
||||||
cd ../../demos/samples_python/weather_forecast
|
cd ../../demos/samples_python/weather_forecast
|
||||||
|
|
|
||||||
|
|
@ -628,204 +628,3 @@ def test_openai_responses_api_streaming_with_tools_upstream_anthropic():
|
||||||
assert (
|
assert (
|
||||||
full_text or tool_calls
|
full_text or tool_calls
|
||||||
), "Expected streamed text or tool call argument deltas from Responses tools stream"
|
), "Expected streamed text or tool call argument deltas from Responses tools stream"
|
||||||
|
|
||||||
|
|
||||||
def test_conversation_state_management_two_turn():
|
|
||||||
"""
|
|
||||||
Test conversation state management across two turns:
|
|
||||||
1. Send initial message to non-OpenAI model via v1/responses
|
|
||||||
2. Capture response_id from first response
|
|
||||||
3. Send second message with previous_response_id
|
|
||||||
4. Verify model receives both messages in correct order
|
|
||||||
"""
|
|
||||||
base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
|
|
||||||
client = openai.OpenAI(api_key="test-key", base_url=f"{base_url}/v1")
|
|
||||||
|
|
||||||
logger.info("\n" + "=" * 80)
|
|
||||||
logger.info("TEST: Conversation State Management - Two Turn Flow")
|
|
||||||
logger.info("=" * 80)
|
|
||||||
|
|
||||||
# Turn 1: Send initial message to Anthropic (non-OpenAI model)
|
|
||||||
logger.info("\n[TURN 1] Sending initial message...")
|
|
||||||
resp1 = client.responses.create(
|
|
||||||
model="claude-sonnet-4-20250514",
|
|
||||||
input="My name is Alice and I like pizza.",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Extract response_id from first response
|
|
||||||
response_id_1 = resp1.id
|
|
||||||
logger.info(f"[TURN 1] Received response_id: {response_id_1}")
|
|
||||||
logger.info(f"[TURN 1] Model response: {resp1.output_text}")
|
|
||||||
|
|
||||||
assert response_id_1 is not None, "First response should have an id"
|
|
||||||
assert len(resp1.output_text) > 0, "First response should have content"
|
|
||||||
|
|
||||||
# Turn 2: Send follow-up message with previous_response_id
|
|
||||||
# Ask the model to list all messages to verify state was combined
|
|
||||||
logger.info(
|
|
||||||
f"\n[TURN 2] Sending follow-up with previous_response_id={response_id_1}"
|
|
||||||
)
|
|
||||||
resp2 = client.responses.create(
|
|
||||||
model="claude-sonnet-4-20250514",
|
|
||||||
input="Please list all the messages you have received in our conversation, numbering each one.",
|
|
||||||
previous_response_id=response_id_1,
|
|
||||||
)
|
|
||||||
|
|
||||||
response_id_2 = resp2.id
|
|
||||||
logger.info(f"[TURN 2] Received response_id: {response_id_2}")
|
|
||||||
logger.info(f"[TURN 2] Model response: {resp2.output_text}")
|
|
||||||
|
|
||||||
assert response_id_2 is not None, "Second response should have an id"
|
|
||||||
assert response_id_2 != response_id_1, "Second response should have different id"
|
|
||||||
|
|
||||||
# Verify the model received the conversation history
|
|
||||||
# The response should reference both the initial message and the follow-up
|
|
||||||
response_lower = resp2.output_text.lower()
|
|
||||||
|
|
||||||
# Check if the model acknowledges receiving multiple messages
|
|
||||||
# Different models might format this differently, so we check for various indicators
|
|
||||||
has_conversation_context = (
|
|
||||||
"alice" in response_lower
|
|
||||||
or "pizza" in response_lower # References the name from turn 1
|
|
||||||
or "two" in response_lower # References the preference from turn 1
|
|
||||||
or "2" in response_lower # Mentions number of messages
|
|
||||||
or "first" in response_lower # Numeric indicator
|
|
||||||
or "second" # References first message
|
|
||||||
in response_lower # References second message
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info(
|
|
||||||
f"\n[VALIDATION] Conversation context preserved: {has_conversation_context}"
|
|
||||||
)
|
|
||||||
logger.info(
|
|
||||||
f"[VALIDATION] Response contains conversation markers: {has_conversation_context}"
|
|
||||||
)
|
|
||||||
|
|
||||||
print(f"\n{'='*80}")
|
|
||||||
print("Conversation State Test Results:")
|
|
||||||
print(f"Turn 1 Response ID: {response_id_1}")
|
|
||||||
print(f"Turn 2 Response ID: {response_id_2}")
|
|
||||||
print(f"Turn 1 Output: {resp1.output_text[:100]}...")
|
|
||||||
print(f"Turn 2 Output: {resp2.output_text}")
|
|
||||||
print(f"Conversation Context Preserved: {has_conversation_context}")
|
|
||||||
print(f"{'='*80}\n")
|
|
||||||
|
|
||||||
assert has_conversation_context, (
|
|
||||||
f"Model should have received conversation history. "
|
|
||||||
f"Response: {resp2.output_text}"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_conversation_state_management_two_turn_streaming():
|
|
||||||
"""
|
|
||||||
Test conversation state management across two turns with streaming:
|
|
||||||
1. Send initial streaming message to non-OpenAI model via v1/responses
|
|
||||||
2. Capture response_id from first response
|
|
||||||
3. Send second streaming message with previous_response_id
|
|
||||||
4. Verify model receives both messages in correct order
|
|
||||||
"""
|
|
||||||
base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
|
|
||||||
client = openai.OpenAI(api_key="test-key", base_url=f"{base_url}/v1")
|
|
||||||
|
|
||||||
logger.info("\n" + "=" * 80)
|
|
||||||
logger.info("TEST: Conversation State Management - Two Turn Streaming Flow")
|
|
||||||
logger.info("=" * 80)
|
|
||||||
|
|
||||||
# Turn 1: Send initial streaming message to Anthropic (non-OpenAI model)
|
|
||||||
logger.info("\n[TURN 1] Sending initial streaming message...")
|
|
||||||
stream1 = client.responses.create(
|
|
||||||
model="claude-sonnet-4-20250514",
|
|
||||||
input="My name is Alice and I like pizza.",
|
|
||||||
stream=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Collect streamed content and capture response_id
|
|
||||||
text_chunks_1 = []
|
|
||||||
response_id_1 = None
|
|
||||||
|
|
||||||
for event in stream1:
|
|
||||||
if getattr(event, "type", None) == "response.output_text.delta" and getattr(
|
|
||||||
event, "delta", None
|
|
||||||
):
|
|
||||||
text_chunks_1.append(event.delta)
|
|
||||||
|
|
||||||
# Capture response_id from response.completed event
|
|
||||||
if getattr(event, "type", None) == "response.completed" and getattr(
|
|
||||||
event, "response", None
|
|
||||||
):
|
|
||||||
response_id_1 = event.response.id
|
|
||||||
|
|
||||||
output_1 = "".join(text_chunks_1)
|
|
||||||
logger.info(f"[TURN 1] Received response_id: {response_id_1}")
|
|
||||||
logger.info(f"[TURN 1] Model response: {output_1}")
|
|
||||||
|
|
||||||
assert response_id_1 is not None, "First response should have an id"
|
|
||||||
assert len(output_1) > 0, "First response should have content"
|
|
||||||
|
|
||||||
# Turn 2: Send follow-up streaming message with previous_response_id
|
|
||||||
logger.info(
|
|
||||||
f"\n[TURN 2] Sending follow-up streaming request with previous_response_id={response_id_1}"
|
|
||||||
)
|
|
||||||
stream2 = client.responses.create(
|
|
||||||
model="claude-sonnet-4-20250514",
|
|
||||||
input="Please list all the messages you have received in our conversation, numbering each one.",
|
|
||||||
previous_response_id=response_id_1,
|
|
||||||
stream=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Collect streamed content from second response
|
|
||||||
text_chunks_2 = []
|
|
||||||
response_id_2 = None
|
|
||||||
|
|
||||||
for event in stream2:
|
|
||||||
if getattr(event, "type", None) == "response.output_text.delta" and getattr(
|
|
||||||
event, "delta", None
|
|
||||||
):
|
|
||||||
text_chunks_2.append(event.delta)
|
|
||||||
|
|
||||||
# Capture response_id from response.completed event
|
|
||||||
if getattr(event, "type", None) == "response.completed" and getattr(
|
|
||||||
event, "response", None
|
|
||||||
):
|
|
||||||
response_id_2 = event.response.id
|
|
||||||
|
|
||||||
output_2 = "".join(text_chunks_2)
|
|
||||||
logger.info(f"[TURN 2] Received response_id: {response_id_2}")
|
|
||||||
logger.info(f"[TURN 2] Model response: {output_2}")
|
|
||||||
|
|
||||||
assert response_id_2 is not None, "Second response should have an id"
|
|
||||||
assert response_id_2 != response_id_1, "Second response should have different id"
|
|
||||||
|
|
||||||
# Verify the model received the conversation history
|
|
||||||
response_lower = output_2.lower()
|
|
||||||
|
|
||||||
# Check if the model acknowledges receiving multiple messages
|
|
||||||
has_conversation_context = (
|
|
||||||
"alice" in response_lower
|
|
||||||
or "pizza" in response_lower # References the name from turn 1
|
|
||||||
or "two" in response_lower # References the preference from turn 1
|
|
||||||
or "2" in response_lower # Mentions number of messages
|
|
||||||
or "first" in response_lower # Numeric indicator
|
|
||||||
or "second" # References first message
|
|
||||||
in response_lower # References second message
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info(
|
|
||||||
f"\n[VALIDATION] Conversation context preserved: {has_conversation_context}"
|
|
||||||
)
|
|
||||||
logger.info(
|
|
||||||
f"[VALIDATION] Response contains conversation markers: {has_conversation_context}"
|
|
||||||
)
|
|
||||||
|
|
||||||
print(f"\n{'='*80}")
|
|
||||||
print("Streaming Conversation State Test Results:")
|
|
||||||
print(f"Turn 1 Response ID: {response_id_1}")
|
|
||||||
print(f"Turn 2 Response ID: {response_id_2}")
|
|
||||||
print(f"Turn 1 Output: {output_1[:100]}...")
|
|
||||||
print(f"Turn 2 Output: {output_2}")
|
|
||||||
print(f"Conversation Context Preserved: {has_conversation_context}")
|
|
||||||
print(f"{'='*80}\n")
|
|
||||||
|
|
||||||
assert has_conversation_context, (
|
|
||||||
f"Model should have received conversation history. " f"Response: {output_2}"
|
|
||||||
)
|
|
||||||
|
|
|
||||||
218
tests/e2e/test_openai_responses_api_client_with_state.py
Normal file
218
tests/e2e/test_openai_responses_api_client_with_state.py
Normal file
|
|
@ -0,0 +1,218 @@
|
||||||
|
import openai
|
||||||
|
import pytest
|
||||||
|
import os
|
||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# Set up logging
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
||||||
|
handlers=[logging.StreamHandler(sys.stdout)],
|
||||||
|
)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
LLM_GATEWAY_ENDPOINT = os.getenv(
|
||||||
|
"LLM_GATEWAY_ENDPOINT", "http://localhost:12000/v1/chat/completions"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_conversation_state_management_two_turn():
|
||||||
|
"""
|
||||||
|
Test conversation state management across two turns:
|
||||||
|
1. Send initial message to non-OpenAI model via v1/responses
|
||||||
|
2. Capture response_id from first response
|
||||||
|
3. Send second message with previous_response_id
|
||||||
|
4. Verify model receives both messages in correct order
|
||||||
|
"""
|
||||||
|
base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
|
||||||
|
client = openai.OpenAI(api_key="test-key", base_url=f"{base_url}/v1")
|
||||||
|
|
||||||
|
logger.info("\n" + "=" * 80)
|
||||||
|
logger.info("TEST: Conversation State Management - Two Turn Flow")
|
||||||
|
logger.info("=" * 80)
|
||||||
|
|
||||||
|
# Turn 1: Send initial message to Anthropic (non-OpenAI model)
|
||||||
|
logger.info("\n[TURN 1] Sending initial message...")
|
||||||
|
resp1 = client.responses.create(
|
||||||
|
model="claude-sonnet-4-20250514",
|
||||||
|
input="My name is Alice and I like pizza.",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Extract response_id from first response
|
||||||
|
response_id_1 = resp1.id
|
||||||
|
logger.info(f"[TURN 1] Received response_id: {response_id_1}")
|
||||||
|
logger.info(f"[TURN 1] Model response: {resp1.output_text}")
|
||||||
|
|
||||||
|
assert response_id_1 is not None, "First response should have an id"
|
||||||
|
assert len(resp1.output_text) > 0, "First response should have content"
|
||||||
|
|
||||||
|
# Turn 2: Send follow-up message with previous_response_id
|
||||||
|
# Ask the model to list all messages to verify state was combined
|
||||||
|
logger.info(
|
||||||
|
f"\n[TURN 2] Sending follow-up with previous_response_id={response_id_1}"
|
||||||
|
)
|
||||||
|
resp2 = client.responses.create(
|
||||||
|
model="claude-sonnet-4-20250514",
|
||||||
|
input="Please list all the messages you have received in our conversation, numbering each one.",
|
||||||
|
previous_response_id=response_id_1,
|
||||||
|
)
|
||||||
|
|
||||||
|
response_id_2 = resp2.id
|
||||||
|
logger.info(f"[TURN 2] Received response_id: {response_id_2}")
|
||||||
|
logger.info(f"[TURN 2] Model response: {resp2.output_text}")
|
||||||
|
|
||||||
|
assert response_id_2 is not None, "Second response should have an id"
|
||||||
|
assert response_id_2 != response_id_1, "Second response should have different id"
|
||||||
|
|
||||||
|
# Verify the model received the conversation history
|
||||||
|
# The response should reference both the initial message and the follow-up
|
||||||
|
response_lower = resp2.output_text.lower()
|
||||||
|
|
||||||
|
# Check if the model acknowledges receiving multiple messages
|
||||||
|
# Different models might format this differently, so we check for various indicators
|
||||||
|
has_conversation_context = (
|
||||||
|
"alice" in response_lower
|
||||||
|
or "pizza" in response_lower # References the name from turn 1
|
||||||
|
or "two" in response_lower # References the preference from turn 1
|
||||||
|
or "2" in response_lower # Mentions number of messages
|
||||||
|
or "first" in response_lower # Numeric indicator
|
||||||
|
or "second" # References first message
|
||||||
|
in response_lower # References second message
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"\n[VALIDATION] Conversation context preserved: {has_conversation_context}"
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
f"[VALIDATION] Response contains conversation markers: {has_conversation_context}"
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"\n{'='*80}")
|
||||||
|
print("Conversation State Test Results:")
|
||||||
|
print(f"Turn 1 Response ID: {response_id_1}")
|
||||||
|
print(f"Turn 2 Response ID: {response_id_2}")
|
||||||
|
print(f"Turn 1 Output: {resp1.output_text[:100]}...")
|
||||||
|
print(f"Turn 2 Output: {resp2.output_text}")
|
||||||
|
print(f"Conversation Context Preserved: {has_conversation_context}")
|
||||||
|
print(f"{'='*80}\n")
|
||||||
|
|
||||||
|
assert has_conversation_context, (
|
||||||
|
f"Model should have received conversation history. "
|
||||||
|
f"Response: {resp2.output_text}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_conversation_state_management_two_turn_streaming():
|
||||||
|
"""
|
||||||
|
Test conversation state management across two turns with streaming:
|
||||||
|
1. Send initial streaming message to non-OpenAI model via v1/responses
|
||||||
|
2. Capture response_id from first response
|
||||||
|
3. Send second streaming message with previous_response_id
|
||||||
|
4. Verify model receives both messages in correct order
|
||||||
|
"""
|
||||||
|
base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
|
||||||
|
client = openai.OpenAI(api_key="test-key", base_url=f"{base_url}/v1")
|
||||||
|
|
||||||
|
logger.info("\n" + "=" * 80)
|
||||||
|
logger.info("TEST: Conversation State Management - Two Turn Streaming Flow")
|
||||||
|
logger.info("=" * 80)
|
||||||
|
|
||||||
|
# Turn 1: Send initial streaming message to Anthropic (non-OpenAI model)
|
||||||
|
logger.info("\n[TURN 1] Sending initial streaming message...")
|
||||||
|
stream1 = client.responses.create(
|
||||||
|
model="claude-sonnet-4-20250514",
|
||||||
|
input="My name is Alice and I like pizza.",
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Collect streamed content and capture response_id
|
||||||
|
text_chunks_1 = []
|
||||||
|
response_id_1 = None
|
||||||
|
|
||||||
|
for event in stream1:
|
||||||
|
if getattr(event, "type", None) == "response.output_text.delta" and getattr(
|
||||||
|
event, "delta", None
|
||||||
|
):
|
||||||
|
text_chunks_1.append(event.delta)
|
||||||
|
|
||||||
|
# Capture response_id from response.completed event
|
||||||
|
if getattr(event, "type", None) == "response.completed" and getattr(
|
||||||
|
event, "response", None
|
||||||
|
):
|
||||||
|
response_id_1 = event.response.id
|
||||||
|
|
||||||
|
output_1 = "".join(text_chunks_1)
|
||||||
|
logger.info(f"[TURN 1] Received response_id: {response_id_1}")
|
||||||
|
logger.info(f"[TURN 1] Model response: {output_1}")
|
||||||
|
|
||||||
|
assert response_id_1 is not None, "First response should have an id"
|
||||||
|
assert len(output_1) > 0, "First response should have content"
|
||||||
|
|
||||||
|
# Turn 2: Send follow-up streaming message with previous_response_id
|
||||||
|
logger.info(
|
||||||
|
f"\n[TURN 2] Sending follow-up streaming request with previous_response_id={response_id_1}"
|
||||||
|
)
|
||||||
|
stream2 = client.responses.create(
|
||||||
|
model="claude-sonnet-4-20250514",
|
||||||
|
input="Please list all the messages you have received in our conversation, numbering each one.",
|
||||||
|
previous_response_id=response_id_1,
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Collect streamed content from second response
|
||||||
|
text_chunks_2 = []
|
||||||
|
response_id_2 = None
|
||||||
|
|
||||||
|
for event in stream2:
|
||||||
|
if getattr(event, "type", None) == "response.output_text.delta" and getattr(
|
||||||
|
event, "delta", None
|
||||||
|
):
|
||||||
|
text_chunks_2.append(event.delta)
|
||||||
|
|
||||||
|
# Capture response_id from response.completed event
|
||||||
|
if getattr(event, "type", None) == "response.completed" and getattr(
|
||||||
|
event, "response", None
|
||||||
|
):
|
||||||
|
response_id_2 = event.response.id
|
||||||
|
|
||||||
|
output_2 = "".join(text_chunks_2)
|
||||||
|
logger.info(f"[TURN 2] Received response_id: {response_id_2}")
|
||||||
|
logger.info(f"[TURN 2] Model response: {output_2}")
|
||||||
|
|
||||||
|
assert response_id_2 is not None, "Second response should have an id"
|
||||||
|
assert response_id_2 != response_id_1, "Second response should have different id"
|
||||||
|
|
||||||
|
# Verify the model received the conversation history
|
||||||
|
response_lower = output_2.lower()
|
||||||
|
|
||||||
|
# Check if the model acknowledges receiving multiple messages
|
||||||
|
has_conversation_context = (
|
||||||
|
"alice" in response_lower
|
||||||
|
or "pizza" in response_lower # References the name from turn 1
|
||||||
|
or "two" in response_lower # References the preference from turn 1
|
||||||
|
or "2" in response_lower # Mentions number of messages
|
||||||
|
or "first" in response_lower # Numeric indicator
|
||||||
|
or "second" # References first message
|
||||||
|
in response_lower # References second message
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"\n[VALIDATION] Conversation context preserved: {has_conversation_context}"
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
f"[VALIDATION] Response contains conversation markers: {has_conversation_context}"
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"\n{'='*80}")
|
||||||
|
print("Streaming Conversation State Test Results:")
|
||||||
|
print(f"Turn 1 Response ID: {response_id_1}")
|
||||||
|
print(f"Turn 2 Response ID: {response_id_2}")
|
||||||
|
print(f"Turn 1 Output: {output_1[:100]}...")
|
||||||
|
print(f"Turn 2 Output: {output_2}")
|
||||||
|
print(f"Conversation Context Preserved: {has_conversation_context}")
|
||||||
|
print(f"{'='*80}\n")
|
||||||
|
|
||||||
|
assert has_conversation_context, (
|
||||||
|
f"Model should have received conversation history. " f"Response: {output_2}"
|
||||||
|
)
|
||||||
Loading…
Add table
Add a link
Reference in a new issue