fix: formatting fix and fix #79

Improve Safari Permissions UX
2026-07-22 11:51:04 +02:00 · 2026-01-12 12:47:32 +05:30 · 2026-01-12 12:47:32 +05:30 · 11e033c72d
commit 11e033c72d
parent 97fbd9b37b
7 changed files with 78 additions and 18 deletions
--- a/api/routes/telephony.py
+++ b/api/routes/telephony.py
@ -10,7 +10,6 @@ from typing import Optional

 from fastapi import APIRouter, Depends, Header, HTTPException, Request, WebSocket
 from loguru import logger
-from pipecat.utils.context import set_current_run_id
 from pydantic import BaseModel
 from sqlalchemy import text
 from sqlalchemy.future import select
@ -37,6 +36,7 @@ from api.utils.telephony_helper import (
    parse_webhook_request,
 )
 from api.utils.tunnel import TunnelURLProvider
+from pipecat.utils.context import set_current_run_id

 router = APIRouter(prefix="/telephony")

--- a/api/services/configuration/check_validity.py
+++ b/api/services/configuration/check_validity.py
@ -138,6 +138,6 @@ class UserConfigurationValidator:

    def _check_sarvam_api_key(self, model: str, api_key: str) -> bool:
        return True
-    
+
    def _check_speechmatics_api_key(self, model: str, api_key: str) -> bool:
        return True
--- a/api/services/configuration/registry.py
+++ b/api/services/configuration/registry.py
@ -408,7 +408,9 @@ SPEECHMATICS_STT_LANGUAGES = [
@register_stt
 class SpeechmaticsSTTConfiguration(BaseSTTConfiguration):
    provider: Literal[ServiceProviders.SPEECHMATICS] = ServiceProviders.SPEECHMATICS
-    model: str = Field(default="enhanced", description="Operating point: standard or enhanced")
+    model: str = Field(
+        default="enhanced", description="Operating point: standard or enhanced"
+    )
    language: str = Field(
        default="en", json_schema_extra={"examples": SPEECHMATICS_STT_LANGUAGES}
    )
--- a/api/services/pipecat/run_pipeline.py
+++ b/api/services/pipecat/run_pipeline.py
@ -567,7 +567,7 @@ async def _run_pipeline(

    # Now set the task on the engine
    engine.set_task(task)
-    
+
    # Initialize the engine to set the initial context
    await engine.initialize()

--- a/api/utils/telephony_helper.py
+++ b/api/utils/telephony_helper.py
@ -3,11 +3,12 @@ Telephony helper utilities.
 Common functions used across telephony operations.
 """

-from dograh.api.constants import COUNTRY_CODES
 from fastapi import Request
 from loguru import logger
 from starlette.responses import HTMLResponse

+from api.constants import COUNTRY_CODES
+

 def numbers_match(
    incoming_number: str,
--- a/ui/src/app/workflow/[workflowId]/run/[runId]/components/AudioControls.tsx
+++ b/ui/src/app/workflow/[workflowId]/run/[runId]/components/AudioControls.tsx
@ -1,5 +1,5 @@
-import { Mic, Phone, PhoneOff } from "lucide-react";
-import { useEffect } from "react";
+import { Loader2, Mic, Phone, PhoneOff } from "lucide-react";
+import { useEffect, useState } from "react";

 import { Button } from "@/components/ui/button";

@ -28,23 +28,33 @@ export const AudioControls = ({
    isStarting,
    getAudioInputDevices
 }: AudioControlsProps) => {
-    // Check if we have valid audio devices (permissions granted)
+    const [isRequestingPermission, setIsRequestingPermission] = useState(false);
+    const [permissionDenied, setPermissionDenied] = useState(false);
+
    // Browsers only provide device labels after permission is granted
    const hasValidDevices = audioInputs.length > 0 && audioInputs.some(device => device.label && device.label.trim() !== '');

    const requestAudioPermissions = async () => {
+        setIsRequestingPermission(true);
+
        try {
-            // Request audio permissions - this triggers the browser permission prompt
            const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
-            // Stop the stream immediately - we just needed to trigger the permission prompt
            stream.getTracks().forEach(track => track.stop());
-            // Refresh the device list now that we have permissions
            await getAudioInputDevices();
        } catch (error) {
-            console.error('Failed to request audio permissions:', error);
+            if (error instanceof Error && error.name === 'NotAllowedError') {
+                setPermissionDenied(true);
+            }
+        } finally {
+            setIsRequestingPermission(false);
        }
    };

+    const handleTryAgain = () => {
+        setPermissionDenied(false);
+        requestAudioPermissions();
+    };
+
    // Handle auto-selection of first device if none selected
    useEffect(() => {
        if (hasValidDevices && !selectedAudioInput) {
@ -60,18 +70,68 @@ export const AudioControls = ({
    }

    if (!hasValidDevices) {
+        // Show permission denied UI
+        if (permissionDenied) {
+            return (
+                <div className="flex flex-col items-center justify-center space-y-4 p-8">
+                    <div className="h-12 w-12 bg-destructive/10 rounded-full flex items-center justify-center">
+                        <Mic className="h-6 w-6 text-destructive" />
+                    </div>
+                    <div className="text-center space-y-2">
+                        <p className="text-foreground font-medium">Microphone access denied</p>
+                        <p className="text-sm text-muted-foreground max-w-md">
+                            To use the voice agent, you need to allow microphone access.
+                            Please enable it in your browser settings and try again.
+                        </p>
+                    </div>
+                    <Button
+                        onClick={handleTryAgain}
+                        size="lg"
+                        disabled={isRequestingPermission}
+                    >
+                        {isRequestingPermission ? (
+                            <>
+                                <Loader2 className="h-5 w-5 mr-2 animate-spin" />
+                                Waiting for permission...
+                            </>
+                        ) : (
+                            <>
+                                <Mic className="h-5 w-5 mr-2" />
+                                Try Again
+                            </>
+                        )}
+                    </Button>
+                </div>
+            );
+        }
+
+        // Show initial permission request UI
        return (
            <div className="flex flex-col items-center justify-center space-y-4 p-8">
                <div className="text-center space-y-2">
                    <p className="text-foreground font-medium">Audio permissions required</p>
-                    <p className="text-sm text-muted-foreground">Click below to grant microphone access</p>
+                    <p className="text-sm text-muted-foreground">
+                        {isRequestingPermission
+                            ? "Please allow microphone access in the browser dialog"
+                            : "Click below to grant microphone access"}
+                    </p>
                </div>
                <Button
                    onClick={requestAudioPermissions}
                    size="lg"
+                    disabled={isRequestingPermission}
                >
-                    <Mic className="h-5 w-5 mr-2" />
-                    Grant Audio Permissions
+                    {isRequestingPermission ? (
+                        <>
+                            <Loader2 className="h-5 w-5 mr-2 animate-spin" />
+                            Waiting for permission...
+                        </>
+                    ) : (
+                        <>
+                            <Mic className="h-5 w-5 mr-2" />
+                            Grant Audio Permissions
+                        </>
+                    )}
                </Button>
            </div>
        );
--- a/ui/src/app/workflow/[workflowId]/run/[runId]/hooks/useDeviceInputs.tsx
+++ b/ui/src/app/workflow/[workflowId]/run/[runId]/hooks/useDeviceInputs.tsx
@ -1,7 +1,5 @@
 import { useCallback, useEffect, useState } from "react";

-import logger from '@/lib/logger';
-
 export const useDeviceInputs = () => {
    const [audioInputs, setAudioInputs] = useState<MediaDeviceInfo[]>([]);
    const [selectedAudioInput, setSelectedAudioInput] = useState('');
@ -19,7 +17,6 @@ export const useDeviceInputs = () => {
            }
        } catch (error) {
            setPermissionError('Could not enumerate devices');
-            logger.error(`Error enumerating devices: ${error}`);
        }
    }, []);