diff --git a/apps/twilio_handler/app.py b/apps/twilio_handler/app.py index 2b94605b..c0cf7f39 100644 --- a/apps/twilio_handler/app.py +++ b/apps/twilio_handler/app.py @@ -204,33 +204,38 @@ def handle_call(call_sid, workflow_id, project_id=None): # Create TwiML response response = VoiceResponse() - # Check if this is a new call +# Check if this is a new call (no turns yet) if call_state.get('turn_count', 0) == 0: - # Initial greeting for new calls - greeting = "Hello! I'm your RowBoat assistant. How can I help you today?" - logger.info(f"New call, preparing greeting: {greeting}") + logger.info("First turn: generating AI greeting using an empty user input...") + # Generate greeting by calling process_conversation_turn with empty user input try: - # Use streaming audio endpoint instead of generating files - # Include a unique ID to prevent caching - unique_id = str(uuid.uuid4()) - # Use a relative URL - Twilio will use the same host as the webhook - audio_url = f"/stream-audio/{call_sid}/greeting/{unique_id}" - logger.info(f"Streaming greeting from relative URL: {audio_url}") - - # Play the greeting via streaming - response.play(audio_url) + ai_greeting, updated_messages, updated_state = process_conversation_turn( + user_input="", # empty to signal "give me your greeting" + workflow_id=call_state['workflow_id'], + system_prompt=call_state['system_prompt'], + previous_messages=[], + previous_state=None, + project_id=call_state.get('project_id') + ) except Exception as e: - logger.error(f"Error with audio streaming for greeting: {str(e)}") - import traceback - logger.error(traceback.format_exc()) - # Fallback to Twilio TTS - response.say(greeting, voice='alice') + logger.error(f"Error generating AI greeting: {str(e)}") + ai_greeting = "Hello, I encountered an issue creating a greeting. How can I help you?" - # Update call state + # Fallback: no changes to updated_messages/updated_state + updated_messages = [] + updated_state = None + + # Update call_state with AI greeting + call_state['messages'] = updated_messages + call_state['state'] = updated_state + call_state['conversation_history'].append({ + 'user': "", # empty user + 'assistant': ai_greeting + }) call_state['turn_count'] = 1 - # Save to MongoDB (primary source of truth) + # Save changes to MongoDB try: save_call_state(call_sid, call_state) logger.info(f"Saved greeting state to MongoDB for {call_sid}") @@ -238,24 +243,24 @@ def handle_call(call_sid, workflow_id, project_id=None): logger.error(f"Error saving greeting state to MongoDB: {str(e)}") raise RuntimeError(f"Failed to save greeting state to MongoDB: {str(e)}") - # Update local memory cache active_calls[call_sid] = call_state - # Instead of using both Gather and Record which compete for input, - # just use Gather for speech recognition, and rely on its SpeechResult - # This is more reliable than trying to use Record and Deepgram + # Play the greeting via streaming audio + unique_id = str(uuid.uuid4()) + audio_url = f"/stream-audio/{call_sid}/greeting/{unique_id}" + logger.info(f"Will stream greeting from {audio_url}") + response.play(audio_url) + + # Gather user input next gather = Gather( input='speech', action=f'/process_speech?call_sid={call_sid}', speech_timeout='auto', language='en-US', - enhanced=True, # Enable enhanced speech recognition - speechModel='phone_call' # Optimize for phone calls + enhanced=True, + speechModel='phone_call' ) response.append(gather) - - # If no input detected, redirect to twiml endpoint - # Call state will be retrieved from MongoDB response.redirect('/twiml') logger.info(f"Returning response: {str(response)}") @@ -492,10 +497,7 @@ def stream_audio(call_sid, text_type, unique_id): # Determine what text to synthesize text_to_speak = "" - if text_type == "greeting": - # Use default greeting - text_to_speak = "Hello! I'm your RowBoat assistant. How can I help you today?" - elif text_type == "response": + if text_type == "greeting" or text_type == "response": # Get the text from call state (try MongoDB first, then memory) call_state = None diff --git a/apps/twilio_handler/twilio_api.py b/apps/twilio_handler/twilio_api.py index fa69e07a..b233fb65 100644 --- a/apps/twilio_handler/twilio_api.py +++ b/apps/twilio_handler/twilio_api.py @@ -55,8 +55,9 @@ def process_conversation_turn( if not messages or not any(msg.role == 'system' for msg in messages): messages.append(SystemMessage(role='system', content=system_prompt)) - # Add the user's new message - messages.append(UserMessage(role='user', content=user_input)) + # Add the user's new + if user_input: + messages.append(UserMessage(role='user', content=user_input)) # Process the conversation using the stateless API logger.info(f"Sending to RowBoat API with {len(messages)} messages") @@ -84,20 +85,6 @@ def process_conversation_turn( # Update messages list with the new responses final_messages = messages + response_messages - # dump_data = { - # 'messages': [msg.model_dump() for msg in messages], - # 'response_messages': [msg.model_dump() for msg in response_messages], - # 'state': new_state - # } - # # Write messages to a debug file for inspection - # fname = f'debug_dump_{time.time()}.json' - # try: - # with open(fname, 'w') as f: - # json.dump(dump_data, f, indent=2) - # logger.info(f"Wrote debug info to {fname}") - # except Exception as e: - # logger.error(f"Failed to write message debug file: {str(e)}") - logger.info(f"Got response from RowBoat API: {assistant_response[:100]}...") return assistant_response, final_messages, new_state diff --git a/docker-compose.yml b/docker-compose.yml index cfff9405..a3cc5d7a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -184,7 +184,7 @@ services: context: ./apps/twilio_handler dockerfile: Dockerfile ports: - - "3009:3009" + - "4010:4010" environment: - ELEVENLABS_API_KEY=${ELEVENLABS_API_KEY} - ROWBOAT_API_HOST=http://rowboat:3000