mirror of
https://github.com/rowboatlabs/rowboat.git
synced 2026-06-09 19:45:17 +02:00
feat: make meeting recording auto-stop when the meeting ends
Detect silence from raw mic+system audio armed at recording start, add a quiet-meeting stop nudge, shorten the window once past the calendar end time, and stop instantly when the shared call window closes.
This commit is contained in:
parent
e2178c1488
commit
077c47f5ad
1 changed files with 109 additions and 25 deletions
|
|
@ -1,4 +1,5 @@
|
|||
import { useCallback, useRef, useState } from 'react';
|
||||
import { toast } from 'sonner';
|
||||
import { buildDeepgramListenUrl } from '@/lib/deepgram-listen-url';
|
||||
import { useRowboatAccount } from '@/hooks/useRowboatAccount';
|
||||
|
||||
|
|
@ -21,8 +22,23 @@ const DEEPGRAM_LISTEN_URL = `wss://api.deepgram.com/v1/listen?${DEEPGRAM_PARAMS.
|
|||
// RMS threshold: system audio above this = "active" (speakers playing)
|
||||
const SYSTEM_AUDIO_GATE_THRESHOLD = 0.005;
|
||||
|
||||
// Auto-stop after 2 minutes of silence (no transcript from Deepgram)
|
||||
const SILENCE_AUTO_STOP_MS = 2 * 60 * 1000;
|
||||
// RMS threshold for "someone is talking" on either channel. Drives silence
|
||||
// detection — kept a touch above the gate threshold so faint room noise on the
|
||||
// mic doesn't read as speech and keep a finished recording alive.
|
||||
const SPEECH_RMS_THRESHOLD = 0.01;
|
||||
|
||||
// Silence handling. "Silence" = no audio above SPEECH_RMS_THRESHOLD on EITHER
|
||||
// the mic or the system-audio channel (i.e. nobody — local or remote — talking).
|
||||
// - After SILENCE_NUDGE_MS we ask the user (toast) whether to stop.
|
||||
// - After SILENCE_BACKSTOP_MS we stop unconditionally.
|
||||
// - Once past the linked calendar event's end time we use the shorter
|
||||
// POST_CALENDAR_END_SILENCE_MS, since a lull after the scheduled end is a
|
||||
// strong signal the meeting is actually over.
|
||||
const SILENCE_NUDGE_MS = 2 * 60 * 1000;
|
||||
const SILENCE_BACKSTOP_MS = 5 * 60 * 1000;
|
||||
const POST_CALENDAR_END_SILENCE_MS = 2 * 60 * 1000;
|
||||
// How often the silence checker runs.
|
||||
const SILENCE_CHECK_INTERVAL_MS = 5 * 1000;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Headphone detection
|
||||
|
|
@ -119,7 +135,13 @@ export function useMeetingTranscription(onAutoStop?: () => void) {
|
|||
const interimRef = useRef<Map<number, { speaker: string; text: string }>>(new Map());
|
||||
const notePathRef = useRef<string>('');
|
||||
const writeTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
|
||||
const silenceTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
|
||||
// Silence detection: timestamp of the last speech-level audio on either
|
||||
// channel, plus the interval that checks it. calendarEndMsRef holds the
|
||||
// linked event's end time (null if none).
|
||||
const lastAudioActivityRef = useRef<number>(0);
|
||||
const silenceCheckRef = useRef<ReturnType<typeof setInterval> | null>(null);
|
||||
const calendarEndMsRef = useRef<number | null>(null);
|
||||
const nudgeToastIdRef = useRef<string | number | null>(null);
|
||||
const onAutoStopRef = useRef(onAutoStop);
|
||||
onAutoStopRef.current = onAutoStop;
|
||||
const dateRef = useRef<string>('');
|
||||
|
|
@ -161,9 +183,13 @@ export function useMeetingTranscription(onAutoStop?: () => void) {
|
|||
clearTimeout(writeTimerRef.current);
|
||||
writeTimerRef.current = null;
|
||||
}
|
||||
if (silenceTimerRef.current) {
|
||||
clearTimeout(silenceTimerRef.current);
|
||||
silenceTimerRef.current = null;
|
||||
if (silenceCheckRef.current) {
|
||||
clearInterval(silenceCheckRef.current);
|
||||
silenceCheckRef.current = null;
|
||||
}
|
||||
if (nudgeToastIdRef.current !== null) {
|
||||
toast.dismiss(nudgeToastIdRef.current);
|
||||
nudgeToastIdRef.current = null;
|
||||
}
|
||||
if (processorRef.current) {
|
||||
processorRef.current.disconnect();
|
||||
|
|
@ -279,13 +305,6 @@ export function useMeetingTranscription(onAutoStop?: () => void) {
|
|||
const transcript = data.channel.alternatives[0].transcript;
|
||||
if (!transcript) return;
|
||||
|
||||
// Reset silence auto-stop timer on any transcript
|
||||
if (silenceTimerRef.current) clearTimeout(silenceTimerRef.current);
|
||||
silenceTimerRef.current = setTimeout(() => {
|
||||
console.log('[meeting] 2 minutes of silence — auto-stopping');
|
||||
onAutoStopRef.current?.();
|
||||
}, SILENCE_AUTO_STOP_MS);
|
||||
|
||||
const channelIndex = data.channel_index?.[0] ?? 0;
|
||||
const isMic = channelIndex === 0;
|
||||
|
||||
|
|
@ -325,6 +344,17 @@ export function useMeetingTranscription(onAutoStop?: () => void) {
|
|||
const systemStream = systemResult.value;
|
||||
systemStreamRef.current = systemStream;
|
||||
|
||||
// If the shared source goes away (user closes the call window / clicks
|
||||
// "Stop sharing"), the track fires "ended" — treat that as the meeting
|
||||
// ending and stop. Our own cleanup() calls track.stop(), which does NOT
|
||||
// fire "ended", so this won't double-trigger on a manual stop.
|
||||
systemStream.getAudioTracks().forEach(track => {
|
||||
track.addEventListener('ended', () => {
|
||||
console.log('[meeting] system-audio track ended (shared source closed) — auto-stopping');
|
||||
onAutoStopRef.current?.();
|
||||
});
|
||||
});
|
||||
|
||||
// ----- Audio pipeline -----
|
||||
const audioCtx = new AudioContext({ sampleRate: 16000 });
|
||||
audioCtxRef.current = audioCtx;
|
||||
|
|
@ -345,24 +375,33 @@ export function useMeetingTranscription(onAutoStop?: () => void) {
|
|||
const micRaw = e.inputBuffer.getChannelData(0);
|
||||
const sysRaw = e.inputBuffer.getChannelData(1);
|
||||
|
||||
// RMS of each channel, computed once per frame and reused for
|
||||
// silence detection and gating the mic in speaker mode.
|
||||
let micSum = 0;
|
||||
for (let i = 0; i < micRaw.length; i++) micSum += micRaw[i] * micRaw[i];
|
||||
const micRms = Math.sqrt(micSum / micRaw.length);
|
||||
let sysSum = 0;
|
||||
for (let i = 0; i < sysRaw.length; i++) sysSum += sysRaw[i] * sysRaw[i];
|
||||
const sysRms = Math.sqrt(sysSum / sysRaw.length);
|
||||
|
||||
// Reset the silence clock whenever EITHER channel has speech-level
|
||||
// audio. Uses the raw mic (pre-gating) so the user's own voice counts
|
||||
// even in speaker mode where the outgoing mic gets muted.
|
||||
if (micRms > SPEECH_RMS_THRESHOLD || sysRms > SPEECH_RMS_THRESHOLD) {
|
||||
lastAudioActivityRef.current = Date.now();
|
||||
}
|
||||
|
||||
// Mode 1 (headphones): pass both streams through unmodified
|
||||
// Mode 2 (speakers): gate/mute mic when system audio is active
|
||||
let micOut: Float32Array;
|
||||
if (usingHeadphones) {
|
||||
micOut = micRaw;
|
||||
} else if (sysRms > SYSTEM_AUDIO_GATE_THRESHOLD) {
|
||||
// System audio is playing — mute mic to prevent bleed
|
||||
micOut = new Float32Array(micRaw.length); // all zeros
|
||||
} else {
|
||||
// Compute system audio RMS to detect activity
|
||||
let sysSum = 0;
|
||||
for (let i = 0; i < sysRaw.length; i++) sysSum += sysRaw[i] * sysRaw[i];
|
||||
const sysRms = Math.sqrt(sysSum / sysRaw.length);
|
||||
|
||||
if (sysRms > SYSTEM_AUDIO_GATE_THRESHOLD) {
|
||||
// System audio is playing — mute mic to prevent bleed
|
||||
micOut = new Float32Array(micRaw.length); // all zeros
|
||||
} else {
|
||||
// System audio is silent — pass mic through
|
||||
micOut = micRaw;
|
||||
}
|
||||
// System audio is silent — pass mic through
|
||||
micOut = micRaw;
|
||||
}
|
||||
|
||||
// Interleave mic (ch0) + system audio (ch1) into stereo int16 PCM
|
||||
|
|
@ -391,6 +430,12 @@ export function useMeetingTranscription(onAutoStop?: () => void) {
|
|||
const notePath = `knowledge/Meetings/rowboat/${dateFolder}/${filename}.md`;
|
||||
notePathRef.current = notePath;
|
||||
calendarEventRef.current = calendarEvent;
|
||||
|
||||
// Parse the linked event's end time (timed events only) so the silence
|
||||
// window can shorten once the meeting is past its scheduled end.
|
||||
const calEndMs = calendarEvent?.end?.dateTime ? Date.parse(calendarEvent.end.dateTime) : NaN;
|
||||
calendarEndMsRef.current = Number.isFinite(calEndMs) ? calEndMs : null;
|
||||
|
||||
const initialContent = formatTranscript([], dateStr, calendarEvent);
|
||||
await window.ipc.invoke('workspace:writeFile', {
|
||||
path: notePath,
|
||||
|
|
@ -398,6 +443,45 @@ export function useMeetingTranscription(onAutoStop?: () => void) {
|
|||
opts: { encoding: 'utf8', mkdirp: true },
|
||||
});
|
||||
|
||||
// Arm silence detection. Initialise the activity clock to "now" so the
|
||||
// checker is live from the very start of recording — a session that
|
||||
// never captures any audio still auto-stops at the backstop instead of
|
||||
// running forever.
|
||||
lastAudioActivityRef.current = Date.now();
|
||||
if (silenceCheckRef.current) clearInterval(silenceCheckRef.current);
|
||||
silenceCheckRef.current = setInterval(() => {
|
||||
const silentMs = Date.now() - lastAudioActivityRef.current;
|
||||
const endMs = calendarEndMsRef.current;
|
||||
const pastCalendarEnd = endMs != null && Date.now() > endMs;
|
||||
const hardStopMs = pastCalendarEnd ? POST_CALENDAR_END_SILENCE_MS : SILENCE_BACKSTOP_MS;
|
||||
|
||||
if (silentMs >= hardStopMs) {
|
||||
console.log(`[meeting] ${Math.round(silentMs / 1000)}s of silence${pastCalendarEnd ? ' (past scheduled end)' : ''} — auto-stopping`);
|
||||
onAutoStopRef.current?.();
|
||||
return;
|
||||
}
|
||||
|
||||
if (silentMs >= SILENCE_NUDGE_MS) {
|
||||
// Ask once; the toast persists until dismissed or acted on. Past
|
||||
// the scheduled end we skip straight to the hard stop above, so
|
||||
// the nudge only ever shows for an in-progress meeting.
|
||||
if (nudgeToastIdRef.current === null) {
|
||||
nudgeToastIdRef.current = toast('Still in a meeting?', {
|
||||
description: "It's been quiet for a couple of minutes.",
|
||||
duration: Infinity,
|
||||
action: {
|
||||
label: 'Stop recording',
|
||||
onClick: () => { onAutoStopRef.current?.(); },
|
||||
},
|
||||
});
|
||||
}
|
||||
} else if (nudgeToastIdRef.current !== null) {
|
||||
// Audio resumed before the backstop — retract the nudge.
|
||||
toast.dismiss(nudgeToastIdRef.current);
|
||||
nudgeToastIdRef.current = null;
|
||||
}
|
||||
}, SILENCE_CHECK_INTERVAL_MS);
|
||||
|
||||
setState('recording');
|
||||
return notePath;
|
||||
}, [state, cleanup, scheduleDebouncedWrite, refreshRowboatAccount]);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue