faster voice responses

This commit is contained in:
Arjun 2026-03-13 11:32:10 +05:30
parent 47d5118448
commit 779ad51f9f
2 changed files with 60 additions and 24 deletions

View file

@ -2,11 +2,46 @@ import { useCallback, useRef, useState } from 'react';
export type TTSState = 'idle' | 'synthesizing' | 'speaking';
interface SynthesizedAudio {
dataUrl: string;
}
function synthesize(text: string): Promise<SynthesizedAudio> {
return window.ipc.invoke('voice:synthesize', { text }).then(
(result: { audioBase64: string; mimeType: string }) => ({
dataUrl: `data:${result.mimeType};base64,${result.audioBase64}`,
})
);
}
function playAudio(dataUrl: string, audioRef: React.MutableRefObject<HTMLAudioElement | null>): Promise<void> {
return new Promise<void>((resolve, reject) => {
const audio = new Audio(dataUrl);
audioRef.current = audio;
audio.onended = () => {
console.log('[tts] audio ended');
resolve();
};
audio.onerror = (e) => {
console.error('[tts] audio error:', e);
reject(new Error('Audio playback failed'));
};
audio.play().then(() => {
console.log('[tts] audio playing');
}).catch((err) => {
console.error('[tts] play() rejected:', err);
reject(err);
});
});
}
export function useVoiceTTS() {
const [state, setState] = useState<TTSState>('idle');
const audioRef = useRef<HTMLAudioElement | null>(null);
const queueRef = useRef<string[]>([]);
const processingRef = useRef(false);
// Pre-fetched audio ready to play immediately
const prefetchedRef = useRef<Promise<SynthesizedAudio> | null>(null);
const processQueue = useCallback(async () => {
if (processingRef.current) return;
@ -16,38 +51,38 @@ export function useVoiceTTS() {
const text = queueRef.current.shift()!;
if (!text.trim()) continue;
setState('synthesizing');
console.log('[tts] synthesizing:', text.substring(0, 80));
try {
const result = await window.ipc.invoke('voice:synthesize', { text });
console.log('[tts] got audio, mimeType:', result.mimeType, 'base64 length:', result.audioBase64.length);
// Use pre-fetched result if available, otherwise synthesize now
let audioPromise: Promise<SynthesizedAudio>;
if (prefetchedRef.current) {
console.log('[tts] using pre-fetched audio');
audioPromise = prefetchedRef.current;
prefetchedRef.current = null;
} else {
setState('synthesizing');
console.log('[tts] synthesizing:', text.substring(0, 80));
audioPromise = synthesize(text);
}
const audio = await audioPromise;
setState('speaking');
await new Promise<void>((resolve, reject) => {
const dataUrl = `data:${result.mimeType};base64,${result.audioBase64}`;
const audio = new Audio(dataUrl);
audioRef.current = audio;
audio.onended = () => {
console.log('[tts] audio ended');
resolve();
};
audio.onerror = (e) => {
console.error('[tts] audio error:', e);
reject(new Error('Audio playback failed'));
};
audio.play().then(() => {
console.log('[tts] audio playing');
}).catch((err) => {
console.error('[tts] play() rejected:', err);
reject(err);
});
});
// Kick off pre-fetch for next chunk while this one plays
const nextText = queueRef.current[0];
if (nextText?.trim()) {
console.log('[tts] pre-fetching next:', nextText.substring(0, 80));
prefetchedRef.current = synthesize(nextText);
}
await playAudio(audio.dataUrl, audioRef);
} catch (err) {
console.error('[tts] error:', err);
prefetchedRef.current = null;
}
}
audioRef.current = null;
prefetchedRef.current = null;
processingRef.current = false;
setState('idle');
}, []);
@ -60,6 +95,7 @@ export function useVoiceTTS() {
const cancel = useCallback(() => {
queueRef.current = [];
prefetchedRef.current = null;
if (audioRef.current) {
audioRef.current.pause();
audioRef.current = null;

View file

@ -49,7 +49,7 @@ export async function synthesizeSpeech(text: string): Promise<{ audioBase64: str
},
body: JSON.stringify({
text,
model_id: 'eleven_multilingual_v2',
model_id: 'eleven_flash_v2_5',
voice_settings: {
stability: 0.5,
similarity_boost: 0.75,