mirror of
https://github.com/rowboatlabs/rowboat.git
synced 2026-05-03 20:32:39 +02:00
faster voice responses
This commit is contained in:
parent
47d5118448
commit
779ad51f9f
2 changed files with 60 additions and 24 deletions
|
|
@ -2,11 +2,46 @@ import { useCallback, useRef, useState } from 'react';
|
|||
|
||||
export type TTSState = 'idle' | 'synthesizing' | 'speaking';
|
||||
|
||||
interface SynthesizedAudio {
|
||||
dataUrl: string;
|
||||
}
|
||||
|
||||
function synthesize(text: string): Promise<SynthesizedAudio> {
|
||||
return window.ipc.invoke('voice:synthesize', { text }).then(
|
||||
(result: { audioBase64: string; mimeType: string }) => ({
|
||||
dataUrl: `data:${result.mimeType};base64,${result.audioBase64}`,
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
function playAudio(dataUrl: string, audioRef: React.MutableRefObject<HTMLAudioElement | null>): Promise<void> {
|
||||
return new Promise<void>((resolve, reject) => {
|
||||
const audio = new Audio(dataUrl);
|
||||
audioRef.current = audio;
|
||||
audio.onended = () => {
|
||||
console.log('[tts] audio ended');
|
||||
resolve();
|
||||
};
|
||||
audio.onerror = (e) => {
|
||||
console.error('[tts] audio error:', e);
|
||||
reject(new Error('Audio playback failed'));
|
||||
};
|
||||
audio.play().then(() => {
|
||||
console.log('[tts] audio playing');
|
||||
}).catch((err) => {
|
||||
console.error('[tts] play() rejected:', err);
|
||||
reject(err);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
export function useVoiceTTS() {
|
||||
const [state, setState] = useState<TTSState>('idle');
|
||||
const audioRef = useRef<HTMLAudioElement | null>(null);
|
||||
const queueRef = useRef<string[]>([]);
|
||||
const processingRef = useRef(false);
|
||||
// Pre-fetched audio ready to play immediately
|
||||
const prefetchedRef = useRef<Promise<SynthesizedAudio> | null>(null);
|
||||
|
||||
const processQueue = useCallback(async () => {
|
||||
if (processingRef.current) return;
|
||||
|
|
@ -16,38 +51,38 @@ export function useVoiceTTS() {
|
|||
const text = queueRef.current.shift()!;
|
||||
if (!text.trim()) continue;
|
||||
|
||||
setState('synthesizing');
|
||||
console.log('[tts] synthesizing:', text.substring(0, 80));
|
||||
try {
|
||||
const result = await window.ipc.invoke('voice:synthesize', { text });
|
||||
console.log('[tts] got audio, mimeType:', result.mimeType, 'base64 length:', result.audioBase64.length);
|
||||
// Use pre-fetched result if available, otherwise synthesize now
|
||||
let audioPromise: Promise<SynthesizedAudio>;
|
||||
if (prefetchedRef.current) {
|
||||
console.log('[tts] using pre-fetched audio');
|
||||
audioPromise = prefetchedRef.current;
|
||||
prefetchedRef.current = null;
|
||||
} else {
|
||||
setState('synthesizing');
|
||||
console.log('[tts] synthesizing:', text.substring(0, 80));
|
||||
audioPromise = synthesize(text);
|
||||
}
|
||||
|
||||
const audio = await audioPromise;
|
||||
setState('speaking');
|
||||
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
const dataUrl = `data:${result.mimeType};base64,${result.audioBase64}`;
|
||||
const audio = new Audio(dataUrl);
|
||||
audioRef.current = audio;
|
||||
audio.onended = () => {
|
||||
console.log('[tts] audio ended');
|
||||
resolve();
|
||||
};
|
||||
audio.onerror = (e) => {
|
||||
console.error('[tts] audio error:', e);
|
||||
reject(new Error('Audio playback failed'));
|
||||
};
|
||||
audio.play().then(() => {
|
||||
console.log('[tts] audio playing');
|
||||
}).catch((err) => {
|
||||
console.error('[tts] play() rejected:', err);
|
||||
reject(err);
|
||||
});
|
||||
});
|
||||
// Kick off pre-fetch for next chunk while this one plays
|
||||
const nextText = queueRef.current[0];
|
||||
if (nextText?.trim()) {
|
||||
console.log('[tts] pre-fetching next:', nextText.substring(0, 80));
|
||||
prefetchedRef.current = synthesize(nextText);
|
||||
}
|
||||
|
||||
await playAudio(audio.dataUrl, audioRef);
|
||||
} catch (err) {
|
||||
console.error('[tts] error:', err);
|
||||
prefetchedRef.current = null;
|
||||
}
|
||||
}
|
||||
|
||||
audioRef.current = null;
|
||||
prefetchedRef.current = null;
|
||||
processingRef.current = false;
|
||||
setState('idle');
|
||||
}, []);
|
||||
|
|
@ -60,6 +95,7 @@ export function useVoiceTTS() {
|
|||
|
||||
const cancel = useCallback(() => {
|
||||
queueRef.current = [];
|
||||
prefetchedRef.current = null;
|
||||
if (audioRef.current) {
|
||||
audioRef.current.pause();
|
||||
audioRef.current = null;
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ export async function synthesizeSpeech(text: string): Promise<{ audioBase64: str
|
|||
},
|
||||
body: JSON.stringify({
|
||||
text,
|
||||
model_id: 'eleven_multilingual_v2',
|
||||
model_id: 'eleven_flash_v2_5',
|
||||
voice_settings: {
|
||||
stability: 0.5,
|
||||
similarity_boost: 0.75,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue