twilio: get greeting message from agents

This commit is contained in:
arkml 2025-04-01 00:17:19 +05:30
parent 98c48f6484
commit f7d3983945
3 changed files with 39 additions and 50 deletions

View file

@ -204,33 +204,38 @@ def handle_call(call_sid, workflow_id, project_id=None):
# Create TwiML response
response = VoiceResponse()
# Check if this is a new call
# Check if this is a new call (no turns yet)
if call_state.get('turn_count', 0) == 0:
# Initial greeting for new calls
greeting = "Hello! I'm your RowBoat assistant. How can I help you today?"
logger.info(f"New call, preparing greeting: {greeting}")
logger.info("First turn: generating AI greeting using an empty user input...")
# Generate greeting by calling process_conversation_turn with empty user input
try:
# Use streaming audio endpoint instead of generating files
# Include a unique ID to prevent caching
unique_id = str(uuid.uuid4())
# Use a relative URL - Twilio will use the same host as the webhook
audio_url = f"/stream-audio/{call_sid}/greeting/{unique_id}"
logger.info(f"Streaming greeting from relative URL: {audio_url}")
# Play the greeting via streaming
response.play(audio_url)
ai_greeting, updated_messages, updated_state = process_conversation_turn(
user_input="", # empty to signal "give me your greeting"
workflow_id=call_state['workflow_id'],
system_prompt=call_state['system_prompt'],
previous_messages=[],
previous_state=None,
project_id=call_state.get('project_id')
)
except Exception as e:
logger.error(f"Error with audio streaming for greeting: {str(e)}")
import traceback
logger.error(traceback.format_exc())
# Fallback to Twilio TTS
response.say(greeting, voice='alice')
logger.error(f"Error generating AI greeting: {str(e)}")
ai_greeting = "Hello, I encountered an issue creating a greeting. How can I help you?"
# Update call state
# Fallback: no changes to updated_messages/updated_state
updated_messages = []
updated_state = None
# Update call_state with AI greeting
call_state['messages'] = updated_messages
call_state['state'] = updated_state
call_state['conversation_history'].append({
'user': "", # empty user
'assistant': ai_greeting
})
call_state['turn_count'] = 1
# Save to MongoDB (primary source of truth)
# Save changes to MongoDB
try:
save_call_state(call_sid, call_state)
logger.info(f"Saved greeting state to MongoDB for {call_sid}")
@ -238,24 +243,24 @@ def handle_call(call_sid, workflow_id, project_id=None):
logger.error(f"Error saving greeting state to MongoDB: {str(e)}")
raise RuntimeError(f"Failed to save greeting state to MongoDB: {str(e)}")
# Update local memory cache
active_calls[call_sid] = call_state
# Instead of using both Gather and Record which compete for input,
# just use Gather for speech recognition, and rely on its SpeechResult
# This is more reliable than trying to use Record and Deepgram
# Play the greeting via streaming audio
unique_id = str(uuid.uuid4())
audio_url = f"/stream-audio/{call_sid}/greeting/{unique_id}"
logger.info(f"Will stream greeting from {audio_url}")
response.play(audio_url)
# Gather user input next
gather = Gather(
input='speech',
action=f'/process_speech?call_sid={call_sid}',
speech_timeout='auto',
language='en-US',
enhanced=True, # Enable enhanced speech recognition
speechModel='phone_call' # Optimize for phone calls
enhanced=True,
speechModel='phone_call'
)
response.append(gather)
# If no input detected, redirect to twiml endpoint
# Call state will be retrieved from MongoDB
response.redirect('/twiml')
logger.info(f"Returning response: {str(response)}")
@ -492,10 +497,7 @@ def stream_audio(call_sid, text_type, unique_id):
# Determine what text to synthesize
text_to_speak = ""
if text_type == "greeting":
# Use default greeting
text_to_speak = "Hello! I'm your RowBoat assistant. How can I help you today?"
elif text_type == "response":
if text_type == "greeting" or text_type == "response":
# Get the text from call state (try MongoDB first, then memory)
call_state = None

View file

@ -55,8 +55,9 @@ def process_conversation_turn(
if not messages or not any(msg.role == 'system' for msg in messages):
messages.append(SystemMessage(role='system', content=system_prompt))
# Add the user's new message
messages.append(UserMessage(role='user', content=user_input))
# Add the user's new
if user_input:
messages.append(UserMessage(role='user', content=user_input))
# Process the conversation using the stateless API
logger.info(f"Sending to RowBoat API with {len(messages)} messages")
@ -84,20 +85,6 @@ def process_conversation_turn(
# Update messages list with the new responses
final_messages = messages + response_messages
# dump_data = {
# 'messages': [msg.model_dump() for msg in messages],
# 'response_messages': [msg.model_dump() for msg in response_messages],
# 'state': new_state
# }
# # Write messages to a debug file for inspection
# fname = f'debug_dump_{time.time()}.json'
# try:
# with open(fname, 'w') as f:
# json.dump(dump_data, f, indent=2)
# logger.info(f"Wrote debug info to {fname}")
# except Exception as e:
# logger.error(f"Failed to write message debug file: {str(e)}")
logger.info(f"Got response from RowBoat API: {assistant_response[:100]}...")
return assistant_response, final_messages, new_state

View file

@ -184,7 +184,7 @@ services:
context: ./apps/twilio_handler
dockerfile: Dockerfile
ports:
- "3009:3009"
- "4010:4010"
environment:
- ELEVENLABS_API_KEY=${ELEVENLABS_API_KEY}
- ROWBOAT_API_HOST=http://rowboat:3000