From 11e033c72d01d71b209fe0a47620fd7441fdadb4 Mon Sep 17 00:00:00 2001
From: Abhishek Kumar <abhishek@a6k.me>
Date: Mon, 12 Jan 2026 12:47:32 +0530
Subject: [PATCH] fix: formatting fix and fix #79

Improve Safari Permissions UX
---
 api/routes/telephony.py                       |  2 +-
 api/services/configuration/check_validity.py  |  2 +-
 api/services/configuration/registry.py        |  4 +-
 api/services/pipecat/run_pipeline.py          |  2 +-
 api/utils/telephony_helper.py                 |  3 +-
 .../run/[runId]/components/AudioControls.tsx  | 80 ++++++++++++++++---
 .../run/[runId]/hooks/useDeviceInputs.tsx     |  3 -
 7 files changed, 78 insertions(+), 18 deletions(-)

diff --git a/api/routes/telephony.py b/api/routes/telephony.py
index cb9f756..cd7eb35 100644
--- a/api/routes/telephony.py
+++ b/api/routes/telephony.py
@@ -10,7 +10,6 @@ from typing import Optional
 
 from fastapi import APIRouter, Depends, Header, HTTPException, Request, WebSocket
 from loguru import logger
-from pipecat.utils.context import set_current_run_id
 from pydantic import BaseModel
 from sqlalchemy import text
 from sqlalchemy.future import select
@@ -37,6 +36,7 @@ from api.utils.telephony_helper import (
     parse_webhook_request,
 )
 from api.utils.tunnel import TunnelURLProvider
+from pipecat.utils.context import set_current_run_id
 
 router = APIRouter(prefix="/telephony")
 
diff --git a/api/services/configuration/check_validity.py b/api/services/configuration/check_validity.py
index 4c3f9cf..4d0bc6e 100644
--- a/api/services/configuration/check_validity.py
+++ b/api/services/configuration/check_validity.py
@@ -138,6 +138,6 @@ class UserConfigurationValidator:
 
     def _check_sarvam_api_key(self, model: str, api_key: str) -> bool:
         return True
-    
+
     def _check_speechmatics_api_key(self, model: str, api_key: str) -> bool:
         return True
diff --git a/api/services/configuration/registry.py b/api/services/configuration/registry.py
index 6809060..f2c2a2b 100644
--- a/api/services/configuration/registry.py
+++ b/api/services/configuration/registry.py
@@ -408,7 +408,9 @@ SPEECHMATICS_STT_LANGUAGES = [
 @register_stt
 class SpeechmaticsSTTConfiguration(BaseSTTConfiguration):
     provider: Literal[ServiceProviders.SPEECHMATICS] = ServiceProviders.SPEECHMATICS
-    model: str = Field(default="enhanced", description="Operating point: standard or enhanced")
+    model: str = Field(
+        default="enhanced", description="Operating point: standard or enhanced"
+    )
     language: str = Field(
         default="en", json_schema_extra={"examples": SPEECHMATICS_STT_LANGUAGES}
     )
diff --git a/api/services/pipecat/run_pipeline.py b/api/services/pipecat/run_pipeline.py
index a516de0..c33d121 100644
--- a/api/services/pipecat/run_pipeline.py
+++ b/api/services/pipecat/run_pipeline.py
@@ -567,7 +567,7 @@ async def _run_pipeline(
 
     # Now set the task on the engine
     engine.set_task(task)
-    
+
     # Initialize the engine to set the initial context
     await engine.initialize()
 
diff --git a/api/utils/telephony_helper.py b/api/utils/telephony_helper.py
index 6ad917e..ee51e46 100644
--- a/api/utils/telephony_helper.py
+++ b/api/utils/telephony_helper.py
@@ -3,11 +3,12 @@ Telephony helper utilities.
 Common functions used across telephony operations.
 """
 
-from dograh.api.constants import COUNTRY_CODES
 from fastapi import Request
 from loguru import logger
 from starlette.responses import HTMLResponse
 
+from api.constants import COUNTRY_CODES
+
 
 def numbers_match(
     incoming_number: str,
diff --git a/ui/src/app/workflow/[workflowId]/run/[runId]/components/AudioControls.tsx b/ui/src/app/workflow/[workflowId]/run/[runId]/components/AudioControls.tsx
index b6dbce3..a2f5c34 100644
--- a/ui/src/app/workflow/[workflowId]/run/[runId]/components/AudioControls.tsx
+++ b/ui/src/app/workflow/[workflowId]/run/[runId]/components/AudioControls.tsx
@@ -1,5 +1,5 @@
-import { Mic, Phone, PhoneOff } from "lucide-react";
-import { useEffect } from "react";
+import { Loader2, Mic, Phone, PhoneOff } from "lucide-react";
+import { useEffect, useState } from "react";
 
 import { Button } from "@/components/ui/button";
 
@@ -28,23 +28,33 @@ export const AudioControls = ({
     isStarting,
     getAudioInputDevices
 }: AudioControlsProps) => {
-    // Check if we have valid audio devices (permissions granted)
+    const [isRequestingPermission, setIsRequestingPermission] = useState(false);
+    const [permissionDenied, setPermissionDenied] = useState(false);
+
     // Browsers only provide device labels after permission is granted
     const hasValidDevices = audioInputs.length > 0 && audioInputs.some(device => device.label && device.label.trim() !== '');
 
     const requestAudioPermissions = async () => {
+        setIsRequestingPermission(true);
+
         try {
-            // Request audio permissions - this triggers the browser permission prompt
             const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
-            // Stop the stream immediately - we just needed to trigger the permission prompt
             stream.getTracks().forEach(track => track.stop());
-            // Refresh the device list now that we have permissions
             await getAudioInputDevices();
         } catch (error) {
-            console.error('Failed to request audio permissions:', error);
+            if (error instanceof Error && error.name === 'NotAllowedError') {
+                setPermissionDenied(true);
+            }
+        } finally {
+            setIsRequestingPermission(false);
         }
     };
 
+    const handleTryAgain = () => {
+        setPermissionDenied(false);
+        requestAudioPermissions();
+    };
+
     // Handle auto-selection of first device if none selected
     useEffect(() => {
         if (hasValidDevices && !selectedAudioInput) {
@@ -60,18 +70,68 @@ export const AudioControls = ({
     }
 
     if (!hasValidDevices) {
+        // Show permission denied UI
+        if (permissionDenied) {
+            return (
+                <div className="flex flex-col items-center justify-center space-y-4 p-8">
+                    <div className="h-12 w-12 bg-destructive/10 rounded-full flex items-center justify-center">
+                        <Mic className="h-6 w-6 text-destructive" />
+                    </div>
+                    <div className="text-center space-y-2">
+                        <p className="text-foreground font-medium">Microphone access denied</p>
+                        <p className="text-sm text-muted-foreground max-w-md">
+                            To use the voice agent, you need to allow microphone access.
+                            Please enable it in your browser settings and try again.
+                        </p>
+                    </div>
+                    <Button
+                        onClick={handleTryAgain}
+                        size="lg"
+                        disabled={isRequestingPermission}
+                    >
+                        {isRequestingPermission ? (
+                            <>
+                                <Loader2 className="h-5 w-5 mr-2 animate-spin" />
+                                Waiting for permission...
+                            </>
+                        ) : (
+                            <>
+                                <Mic className="h-5 w-5 mr-2" />
+                                Try Again
+                            </>
+                        )}
+                    </Button>
+                </div>
+            );
+        }
+
+        // Show initial permission request UI
         return (
             <div className="flex flex-col items-center justify-center space-y-4 p-8">
                 <div className="text-center space-y-2">
                     <p className="text-foreground font-medium">Audio permissions required</p>
-                    <p className="text-sm text-muted-foreground">Click below to grant microphone access</p>
+                    <p className="text-sm text-muted-foreground">
+                        {isRequestingPermission
+                            ? "Please allow microphone access in the browser dialog"
+                            : "Click below to grant microphone access"}
+                    </p>
                 </div>
                 <Button
                     onClick={requestAudioPermissions}
                     size="lg"
+                    disabled={isRequestingPermission}
                 >
-                    <Mic className="h-5 w-5 mr-2" />
-                    Grant Audio Permissions
+                    {isRequestingPermission ? (
+                        <>
+                            <Loader2 className="h-5 w-5 mr-2 animate-spin" />
+                            Waiting for permission...
+                        </>
+                    ) : (
+                        <>
+                            <Mic className="h-5 w-5 mr-2" />
+                            Grant Audio Permissions
+                        </>
+                    )}
                 </Button>
             </div>
         );
diff --git a/ui/src/app/workflow/[workflowId]/run/[runId]/hooks/useDeviceInputs.tsx b/ui/src/app/workflow/[workflowId]/run/[runId]/hooks/useDeviceInputs.tsx
index c3dfd4e..a7cb232 100644
--- a/ui/src/app/workflow/[workflowId]/run/[runId]/hooks/useDeviceInputs.tsx
+++ b/ui/src/app/workflow/[workflowId]/run/[runId]/hooks/useDeviceInputs.tsx
@@ -1,7 +1,5 @@
 import { useCallback, useEffect, useState } from "react";
 
-import logger from '@/lib/logger';
-
 export const useDeviceInputs = () => {
     const [audioInputs, setAudioInputs] = useState<MediaDeviceInfo[]>([]);
     const [selectedAudioInput, setSelectedAudioInput] = useState('');
@@ -19,7 +17,6 @@ export const useDeviceInputs = () => {
             }
         } catch (error) {
             setPermissionError('Could not enumerate devices');
-            logger.error(`Error enumerating devices: ${error}`);
         }
     }, []);