From 11e033c72d01d71b209fe0a47620fd7441fdadb4 Mon Sep 17 00:00:00 2001 From: Abhishek Kumar Date: Mon, 12 Jan 2026 12:47:32 +0530 Subject: [PATCH] fix: formatting fix and fix #79 Improve Safari Permissions UX --- api/routes/telephony.py | 2 +- api/services/configuration/check_validity.py | 2 +- api/services/configuration/registry.py | 4 +- api/services/pipecat/run_pipeline.py | 2 +- api/utils/telephony_helper.py | 3 +- .../run/[runId]/components/AudioControls.tsx | 80 ++++++++++++++++--- .../run/[runId]/hooks/useDeviceInputs.tsx | 3 - 7 files changed, 78 insertions(+), 18 deletions(-) diff --git a/api/routes/telephony.py b/api/routes/telephony.py index cb9f756..cd7eb35 100644 --- a/api/routes/telephony.py +++ b/api/routes/telephony.py @@ -10,7 +10,6 @@ from typing import Optional from fastapi import APIRouter, Depends, Header, HTTPException, Request, WebSocket from loguru import logger -from pipecat.utils.context import set_current_run_id from pydantic import BaseModel from sqlalchemy import text from sqlalchemy.future import select @@ -37,6 +36,7 @@ from api.utils.telephony_helper import ( parse_webhook_request, ) from api.utils.tunnel import TunnelURLProvider +from pipecat.utils.context import set_current_run_id router = APIRouter(prefix="/telephony") diff --git a/api/services/configuration/check_validity.py b/api/services/configuration/check_validity.py index 4c3f9cf..4d0bc6e 100644 --- a/api/services/configuration/check_validity.py +++ b/api/services/configuration/check_validity.py @@ -138,6 +138,6 @@ class UserConfigurationValidator: def _check_sarvam_api_key(self, model: str, api_key: str) -> bool: return True - + def _check_speechmatics_api_key(self, model: str, api_key: str) -> bool: return True diff --git a/api/services/configuration/registry.py b/api/services/configuration/registry.py index 6809060..f2c2a2b 100644 --- a/api/services/configuration/registry.py +++ b/api/services/configuration/registry.py @@ -408,7 +408,9 @@ SPEECHMATICS_STT_LANGUAGES = [ @register_stt class SpeechmaticsSTTConfiguration(BaseSTTConfiguration): provider: Literal[ServiceProviders.SPEECHMATICS] = ServiceProviders.SPEECHMATICS - model: str = Field(default="enhanced", description="Operating point: standard or enhanced") + model: str = Field( + default="enhanced", description="Operating point: standard or enhanced" + ) language: str = Field( default="en", json_schema_extra={"examples": SPEECHMATICS_STT_LANGUAGES} ) diff --git a/api/services/pipecat/run_pipeline.py b/api/services/pipecat/run_pipeline.py index a516de0..c33d121 100644 --- a/api/services/pipecat/run_pipeline.py +++ b/api/services/pipecat/run_pipeline.py @@ -567,7 +567,7 @@ async def _run_pipeline( # Now set the task on the engine engine.set_task(task) - + # Initialize the engine to set the initial context await engine.initialize() diff --git a/api/utils/telephony_helper.py b/api/utils/telephony_helper.py index 6ad917e..ee51e46 100644 --- a/api/utils/telephony_helper.py +++ b/api/utils/telephony_helper.py @@ -3,11 +3,12 @@ Telephony helper utilities. Common functions used across telephony operations. """ -from dograh.api.constants import COUNTRY_CODES from fastapi import Request from loguru import logger from starlette.responses import HTMLResponse +from api.constants import COUNTRY_CODES + def numbers_match( incoming_number: str, diff --git a/ui/src/app/workflow/[workflowId]/run/[runId]/components/AudioControls.tsx b/ui/src/app/workflow/[workflowId]/run/[runId]/components/AudioControls.tsx index b6dbce3..a2f5c34 100644 --- a/ui/src/app/workflow/[workflowId]/run/[runId]/components/AudioControls.tsx +++ b/ui/src/app/workflow/[workflowId]/run/[runId]/components/AudioControls.tsx @@ -1,5 +1,5 @@ -import { Mic, Phone, PhoneOff } from "lucide-react"; -import { useEffect } from "react"; +import { Loader2, Mic, Phone, PhoneOff } from "lucide-react"; +import { useEffect, useState } from "react"; import { Button } from "@/components/ui/button"; @@ -28,23 +28,33 @@ export const AudioControls = ({ isStarting, getAudioInputDevices }: AudioControlsProps) => { - // Check if we have valid audio devices (permissions granted) + const [isRequestingPermission, setIsRequestingPermission] = useState(false); + const [permissionDenied, setPermissionDenied] = useState(false); + // Browsers only provide device labels after permission is granted const hasValidDevices = audioInputs.length > 0 && audioInputs.some(device => device.label && device.label.trim() !== ''); const requestAudioPermissions = async () => { + setIsRequestingPermission(true); + try { - // Request audio permissions - this triggers the browser permission prompt const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); - // Stop the stream immediately - we just needed to trigger the permission prompt stream.getTracks().forEach(track => track.stop()); - // Refresh the device list now that we have permissions await getAudioInputDevices(); } catch (error) { - console.error('Failed to request audio permissions:', error); + if (error instanceof Error && error.name === 'NotAllowedError') { + setPermissionDenied(true); + } + } finally { + setIsRequestingPermission(false); } }; + const handleTryAgain = () => { + setPermissionDenied(false); + requestAudioPermissions(); + }; + // Handle auto-selection of first device if none selected useEffect(() => { if (hasValidDevices && !selectedAudioInput) { @@ -60,18 +70,68 @@ export const AudioControls = ({ } if (!hasValidDevices) { + // Show permission denied UI + if (permissionDenied) { + return ( +
+
+ +
+
+

Microphone access denied

+

+ To use the voice agent, you need to allow microphone access. + Please enable it in your browser settings and try again. +

+
+ +
+ ); + } + + // Show initial permission request UI return (

Audio permissions required

-

Click below to grant microphone access

+

+ {isRequestingPermission + ? "Please allow microphone access in the browser dialog" + : "Click below to grant microphone access"} +

); diff --git a/ui/src/app/workflow/[workflowId]/run/[runId]/hooks/useDeviceInputs.tsx b/ui/src/app/workflow/[workflowId]/run/[runId]/hooks/useDeviceInputs.tsx index c3dfd4e..a7cb232 100644 --- a/ui/src/app/workflow/[workflowId]/run/[runId]/hooks/useDeviceInputs.tsx +++ b/ui/src/app/workflow/[workflowId]/run/[runId]/hooks/useDeviceInputs.tsx @@ -1,7 +1,5 @@ import { useCallback, useEffect, useState } from "react"; -import logger from '@/lib/logger'; - export const useDeviceInputs = () => { const [audioInputs, setAudioInputs] = useState([]); const [selectedAudioInput, setSelectedAudioInput] = useState(''); @@ -19,7 +17,6 @@ export const useDeviceInputs = () => { } } catch (error) { setPermissionError('Could not enumerate devices'); - logger.error(`Error enumerating devices: ${error}`); } }, []);