diff --git a/apps/x/apps/main/src/ipc.ts b/apps/x/apps/main/src/ipc.ts index 52d314f5..364442be 100644 --- a/apps/x/apps/main/src/ipc.ts +++ b/apps/x/apps/main/src/ipc.ts @@ -944,6 +944,24 @@ export function setupIpcHandlers() { 'voice:synthesize': async (_event, args) => { return voice.synthesizeSpeech(args.text); }, + 'voice:ensureMicAccess': async () => { + if (process.platform !== 'darwin') return { granted: true }; + const status = systemPreferences.getMediaAccessStatus('microphone'); + console.log('[voice] Microphone permission status:', status); + if (status === 'granted') return { granted: true }; + // 'not-determined' shows the native TCC prompt and resolves once the + // user responds; 'denied'/'restricted' resolve false without prompting. + // Awaiting this here means the triggering mic click proceeds to + // getUserMedia only after permission is settled — fixing the first + // click silently failing while the prompt was still up. + try { + const granted = await systemPreferences.askForMediaAccess('microphone'); + console.log('[voice] Microphone permission after prompt:', granted); + return { granted }; + } catch { + return { granted: false }; + } + }, // Live-note handlers 'live-note:run': async (_event, args) => { const result = await runLiveNoteAgent(args.filePath, 'manual', args.context); diff --git a/apps/x/apps/renderer/src/hooks/useVoiceMode.ts b/apps/x/apps/renderer/src/hooks/useVoiceMode.ts index b5e1187d..fd13c758 100644 --- a/apps/x/apps/renderer/src/hooks/useVoiceMode.ts +++ b/apps/x/apps/renderer/src/hooks/useVoiceMode.ts @@ -151,6 +151,20 @@ export function useVoiceMode() { analytics.voiceInputStarted(); posthog.people.set_once({ has_used_voice: true }); + // Settle the OS-level microphone permission before capturing. On the + // first-ever use (macOS) the permission is 'not-determined'; calling + // getUserMedia directly would reject while the native prompt is up, + // making the first mic click silently do nothing. Resolving it here + // lets this same click proceed once the user grants access. + const mic = await window.ipc + .invoke('voice:ensureMicAccess', null) + .catch(() => ({ granted: true })); + if (!mic.granted) { + console.error('Microphone access denied'); + stopAudioCapture(); + return; + } + // Kick off mic + WebSocket in parallel, don't await WebSocket const [stream] = await Promise.all([ navigator.mediaDevices.getUserMedia({ audio: true }).catch((err) => { @@ -161,7 +175,10 @@ export function useVoiceMode() { ]); if (!stream) { - setState('idle'); + // connectWs() may have already opened a socket — tear everything + // down (close WS, reset buffers, state) rather than only resetting + // state, which would leak the socket into the next attempt. + stopAudioCapture(); return; } @@ -192,7 +209,7 @@ export function useVoiceMode() { source.connect(processor); processor.connect(audioCtx.destination); - }, [state, connectWs]); + }, [state, connectWs, stopAudioCapture]); /** Stop recording and return the full transcript (finalized + any current interim) */ const submit = useCallback((): string => { diff --git a/apps/x/packages/shared/src/ipc.ts b/apps/x/packages/shared/src/ipc.ts index e694be2f..c2e5e8e7 100644 --- a/apps/x/packages/shared/src/ipc.ts +++ b/apps/x/packages/shared/src/ipc.ts @@ -702,6 +702,16 @@ const ipcSchemas = { mimeType: z.string(), }), }, + // Ensures the OS-level microphone permission is settled before capturing. + // On first-ever use (macOS) the permission is 'not-determined'; resolving + // the native prompt up front prevents the in-flight getUserMedia from + // rejecting on the first mic click. + 'voice:ensureMicAccess': { + req: z.null(), + res: z.object({ + granted: z.boolean(), + }), + }, 'meeting:checkScreenPermission': { req: z.null(), res: z.object({