make voice input faster

2026-05-06 13:52:44 +02:00 · 2026-03-26 22:23:02 +05:30 · 2026-03-26 22:23:02 +05:30 · 473ffa5d4a
commit 473ffa5d4a
parent eb34873c32
2 changed files with 74 additions and 48 deletions
--- a/apps/x/apps/renderer/src/App.tsx
+++ b/apps/x/apps/renderer/src/App.tsx
@ -703,13 +703,18 @@ function App() {
      window.ipc.invoke('oauth:getState', null),
    ]).then(([config, oauthState]) => {
      const rowboatConnected = oauthState.config?.rowboat?.connected ?? false
-      setVoiceAvailable(!!config.deepgram || rowboatConnected)
+      const hasVoice = !!config.deepgram || rowboatConnected
      setVoiceAvailable(hasVoice)
      setTtsAvailable(!!config.elevenlabs || rowboatConnected)
      // Pre-cache auth details so mic click skips IPC round-trips
      if (hasVoice) {
        voice.warmup()
      }
    }).catch(() => {
      setVoiceAvailable(false)
      setTtsAvailable(false)
    })
-  }, [])
+  }, [voice])
  useEffect(() => {
    refreshVoiceAvailability()
--- a/apps/x/apps/renderer/src/hooks/useVoiceMode.ts
+++ b/apps/x/apps/renderer/src/hooks/useVoiceMode.ts
@ -16,6 +16,9 @@ const DEEPGRAM_PARAMS = new URLSearchParams({
 });
 const DEEPGRAM_LISTEN_URL = `wss://api.deepgram.com/v1/listen?${DEEPGRAM_PARAMS.toString()}`;
 // Cache auth details so we don't need IPC round-trips on every mic click
 let cachedAuth: { type: 'rowboat'; url: string; token: string } | { type: 'local'; apiKey: string } | null = null;
 export function useVoiceMode() {
    const { refresh: refreshRowboatAccount } = useRowboatAccount();
    const [state, setState] = useState<VoiceState>('idle');
@ -26,32 +29,54 @@ export function useVoiceMode() {
    const audioCtxRef = useRef<AudioContext | null>(null);
    const transcriptBufferRef = useRef('');
    const interimRef = useRef('');
    // Buffer audio chunks captured before the WebSocket is ready
    const audioBufferRef = useRef<ArrayBuffer[]>([]);
-    // Connect (or reconnect) the Deepgram WebSocket.
+    // Refresh cached auth details (called on warmup, not on mic click)
-    // Refreshes Rowboat account before connect so access token is current.
+    const refreshAuth = useCallback(async () => {
    const connectWs = useCallback(async () => {
        if (wsRef.current && (wsRef.current.readyState === WebSocket.OPEN || wsRef.current.readyState === WebSocket.CONNECTING)) return;
        let ws: WebSocket;
        const account = await refreshRowboatAccount();
        if (
            account?.signedIn &&
            account.accessToken &&
            account.config?.websocketApiUrl
        ) {
-            const listenUrl = buildDeepgramListenUrl(account.config.websocketApiUrl, DEEPGRAM_PARAMS);
+            cachedAuth = { type: 'rowboat', url: account.config.websocketApiUrl, token: account.accessToken };
            ws = new WebSocket(listenUrl, ['bearer', account.accessToken]);
        } else {
            // Fall back to local API key (passed as subprotocol)
            const config = await window.ipc.invoke('voice:getConfig', null);
-            if (!config?.deepgram) return;
+            if (config?.deepgram) {
-            ws = new WebSocket(DEEPGRAM_LISTEN_URL, ['token', config.deepgram.apiKey]);
+                cachedAuth = { type: 'local', apiKey: config.deepgram.apiKey };
            }
        }
    }, [refreshRowboatAccount]);
    // Create and connect a Deepgram WebSocket using cached auth.
    // Starts the connection and returns immediately (does not wait for open).
    const connectWs = useCallback(async () => {
        if (wsRef.current && (wsRef.current.readyState === WebSocket.OPEN || wsRef.current.readyState === WebSocket.CONNECTING)) return;
        // Refresh auth if we don't have it cached yet
        if (!cachedAuth) {
            await refreshAuth();
        }
        if (!cachedAuth) return;
        let ws: WebSocket;
        if (cachedAuth.type === 'rowboat') {
            const listenUrl = buildDeepgramListenUrl(cachedAuth.url, DEEPGRAM_PARAMS);
            ws = new WebSocket(listenUrl, ['bearer', cachedAuth.token]);
        } else {
            ws = new WebSocket(DEEPGRAM_LISTEN_URL, ['token', cachedAuth.apiKey]);
        }
        wsRef.current = ws;
        ws.onopen = () => {
            console.log('[voice] WebSocket connected');
            // Flush any buffered audio captured while we were connecting
            const buffered = audioBufferRef.current;
            audioBufferRef.current = [];
            for (const chunk of buffered) {
                ws.send(chunk);
            }
        };
        ws.onmessage = (event) => {
@ -73,13 +98,15 @@ export function useVoiceMode() {
        ws.onerror = () => {
            console.error('[voice] WebSocket error');
            // Auth may be stale — clear cache so next attempt refreshes
            cachedAuth = null;
        };
        ws.onclose = () => {
            console.log('[voice] WebSocket closed');
            wsRef.current = null;
        };
-    }, [refreshRowboatAccount]);
+    }, [refreshAuth]);
    // Stop audio capture and close WS
    const stopAudioCapture = useCallback(() => {
@ -100,6 +127,7 @@ export function useVoiceMode() {
            wsRef.current.close();
            wsRef.current = null;
        }
        audioBufferRef.current = [];
        setInterimText('');
        transcriptBufferRef.current = '';
        interimRef.current = '';
@ -112,45 +140,28 @@ export function useVoiceMode() {
        transcriptBufferRef.current = '';
        interimRef.current = '';
        setInterimText('');
        audioBufferRef.current = [];
-        // If WS isn't connected, connect and wait for it
+        // Show listening immediately — don't wait for WebSocket
        if (!wsRef.current || wsRef.current.readyState !== WebSocket.OPEN) {
            setState('connecting');
            connectWs();
            // Wait for WS to be ready (up to 5 seconds)
            const wsOk = await new Promise<boolean>((resolve) => {
                const checkInterval = setInterval(() => {
                    if (wsRef.current?.readyState === WebSocket.OPEN) {
                        clearInterval(checkInterval);
                        resolve(true);
                    }
                }, 50);
                setTimeout(() => {
                    clearInterval(checkInterval);
                    resolve(false);
                }, 5000);
            });
            if (!wsOk) {
                setState('idle');
                return;
            }
        }
        setState('listening');
-        // Start mic
+        // Kick off mic + WebSocket in parallel, don't await WebSocket
-        let stream: MediaStream | null = null;
+        const [stream] = await Promise.all([
-        try {
+            navigator.mediaDevices.getUserMedia({ audio: true }).catch((err) => {
-            stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+                console.error('Microphone access denied:', err);
-        } catch (err) {
+                return null;
-            console.error('Microphone access denied:', err);
+            }),
            connectWs(),
        ]);
        if (!stream) {
            setState('idle');
            return;
        }
        mediaStreamRef.current = stream;
-        // Start audio capture
+        // Start audio capture immediately — buffer if WS isn't open yet
        const audioCtx = new AudioContext({ sampleRate: 16000 });
        audioCtxRef.current = audioCtx;
        const source = audioCtx.createMediaStreamSource(stream);
@ -158,14 +169,19 @@ export function useVoiceMode() {
        processorRef.current = processor;
        processor.onaudioprocess = (e) => {
            if (!wsRef.current || wsRef.current.readyState !== WebSocket.OPEN) return;
            const float32 = e.inputBuffer.getChannelData(0);
            const int16 = new Int16Array(float32.length);
            for (let i = 0; i < float32.length; i++) {
                const s = Math.max(-1, Math.min(1, float32[i]));
                int16[i] = s < 0 ? s * 0x8000 : s * 0x7fff;
            }
-            wsRef.current.send(int16.buffer);
+            const buffer = int16.buffer;
            if (wsRef.current?.readyState === WebSocket.OPEN) {
                wsRef.current.send(buffer);
            } else {
                // WebSocket still connecting — buffer the audio
                audioBufferRef.current.push(buffer);
            }
        };
        source.connect(processor);
@ -188,5 +204,10 @@ export function useVoiceMode() {
        stopAudioCapture();
    }, [stopAudioCapture]);
-    return { state, interimText, start, submit, cancel };
+    /** Pre-cache auth details so mic click skips IPC round-trips */
    const warmup = useCallback(() => {
        refreshAuth().catch(() => {});
    }, [refreshAuth]);
    return { state, interimText, start, submit, cancel, warmup };
 }