mirror of
https://github.com/rowboatlabs/rowboat.git
synced 2026-05-01 19:32:40 +02:00
meeting transcription first commit
This commit is contained in:
parent
128f433e5c
commit
ca9d5761d3
5 changed files with 341 additions and 4 deletions
10
apps/x/apps/main/entitlements.plist
Normal file
10
apps/x/apps/main/entitlements.plist
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>com.apple.security.device.audio-input</key>
|
||||
<true/>
|
||||
<key>com.apple.security.device.screen-capture</key>
|
||||
<true/>
|
||||
</dict>
|
||||
</plist>
|
||||
|
|
@ -13,6 +13,10 @@ module.exports = {
|
|||
appCategoryType: 'public.app-category.productivity',
|
||||
osxSign: {
|
||||
batchCodesignCalls: true,
|
||||
optionsForFile: () => ({
|
||||
entitlements: path.join(__dirname, 'entitlements.plist'),
|
||||
'entitlements-inherit': path.join(__dirname, 'entitlements.plist'),
|
||||
}),
|
||||
},
|
||||
osxNotarize: {
|
||||
appleId: process.env.APPLE_ID,
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
import { app, BrowserWindow, protocol, net, shell, session } from "electron";
|
||||
import { app, BrowserWindow, desktopCapturer, protocol, net, shell, session } from "electron";
|
||||
import path from "node:path";
|
||||
import {
|
||||
setupIpcHandlers,
|
||||
|
|
@ -92,15 +92,27 @@ function createWindow() {
|
|||
},
|
||||
});
|
||||
|
||||
// Grant microphone permission for voice mode
|
||||
// Grant microphone and display-capture permissions
|
||||
session.defaultSession.setPermissionRequestHandler((_webContents, permission, callback) => {
|
||||
if (permission === 'media') {
|
||||
if (permission === 'media' || permission === 'display-capture') {
|
||||
callback(true);
|
||||
} else {
|
||||
callback(false);
|
||||
}
|
||||
});
|
||||
|
||||
// Auto-approve display media requests and route system audio as loopback.
|
||||
// Electron requires a video source in the callback even if we only want audio.
|
||||
// We pass the first available screen source; the renderer discards the video track.
|
||||
session.defaultSession.setDisplayMediaRequestHandler(async (_request, callback) => {
|
||||
const sources = await desktopCapturer.getSources({ types: ['screen'] });
|
||||
if (sources.length === 0) {
|
||||
callback({});
|
||||
return;
|
||||
}
|
||||
callback({ video: sources[0], audio: 'loopback' });
|
||||
});
|
||||
|
||||
// Show window when content is ready to prevent blank screen
|
||||
win.once("ready-to-show", () => {
|
||||
win.maximize();
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ import { RunEvent, ListRunsResponse } from '@x/shared/src/runs.js';
|
|||
import type { LanguageModelUsage, ToolUIPart } from 'ai';
|
||||
import './App.css'
|
||||
import z from 'zod';
|
||||
import { CheckIcon, LoaderIcon, PanelLeftIcon, Maximize2, Minimize2, ChevronLeftIcon, ChevronRightIcon, SquarePen, SearchIcon, HistoryIcon } from 'lucide-react';
|
||||
import { CheckIcon, LoaderIcon, PanelLeftIcon, Maximize2, Minimize2, ChevronLeftIcon, ChevronRightIcon, SquarePen, SearchIcon, HistoryIcon, RadioIcon, SquareIcon } from 'lucide-react';
|
||||
import { cn } from '@/lib/utils';
|
||||
import { MarkdownEditor } from './components/markdown-editor';
|
||||
import { ChatSidebar } from './components/chat-sidebar';
|
||||
|
|
@ -78,6 +78,7 @@ import { AgentScheduleState } from '@x/shared/dist/agent-schedule-state.js'
|
|||
import { toast } from "sonner"
|
||||
import { useVoiceMode } from '@/hooks/useVoiceMode'
|
||||
import { useVoiceTTS } from '@/hooks/useVoiceTTS'
|
||||
import { useMeetingTranscription, type MeetingTranscriptionState } from '@/hooks/useMeetingTranscription'
|
||||
|
||||
type DirEntry = z.infer<typeof workspace.DirEntry>
|
||||
type RunEventType = z.infer<typeof RunEvent>
|
||||
|
|
@ -383,6 +384,8 @@ function FixedSidebarToggle({
|
|||
canNavigateForward,
|
||||
onNewChat,
|
||||
onOpenSearch,
|
||||
meetingState,
|
||||
onToggleMeeting,
|
||||
leftInsetPx,
|
||||
}: {
|
||||
onNavigateBack: () => void
|
||||
|
|
@ -391,6 +394,8 @@ function FixedSidebarToggle({
|
|||
canNavigateForward: boolean
|
||||
onNewChat: () => void
|
||||
onOpenSearch: () => void
|
||||
meetingState: MeetingTranscriptionState
|
||||
onToggleMeeting: () => void
|
||||
leftInsetPx: number
|
||||
}) {
|
||||
const { toggleSidebar, state } = useSidebar()
|
||||
|
|
@ -426,6 +431,25 @@ function FixedSidebarToggle({
|
|||
>
|
||||
<SearchIcon className="size-5" />
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
onClick={onToggleMeeting}
|
||||
disabled={meetingState === 'connecting' || meetingState === 'stopping'}
|
||||
className={cn(
|
||||
"flex h-8 w-8 items-center justify-center rounded-md transition-colors disabled:opacity-50 disabled:pointer-events-none",
|
||||
meetingState === 'recording'
|
||||
? "text-red-500 hover:bg-accent"
|
||||
: "text-muted-foreground hover:bg-accent hover:text-foreground"
|
||||
)}
|
||||
style={{ marginLeft: TITLEBAR_BUTTON_GAP_PX }}
|
||||
aria-label={meetingState === 'recording' ? "Stop meeting transcription" : "Start meeting transcription"}
|
||||
>
|
||||
{meetingState === 'recording' ? (
|
||||
<SquareIcon className="size-4 animate-pulse" />
|
||||
) : (
|
||||
<RadioIcon className="size-5" />
|
||||
)}
|
||||
</button>
|
||||
{/* Back / Forward navigation */}
|
||||
{isCollapsed && (
|
||||
<>
|
||||
|
|
@ -619,6 +643,11 @@ function App() {
|
|||
const voiceRef = useRef(voice)
|
||||
voiceRef.current = voice
|
||||
|
||||
const handleToggleMeetingRef = useRef<(() => void) | undefined>(undefined)
|
||||
const meetingTranscription = useMeetingTranscription(() => {
|
||||
handleToggleMeetingRef.current?.()
|
||||
})
|
||||
|
||||
// Check if voice is available on mount and when OAuth state changes
|
||||
const refreshVoiceAvailability = useCallback(() => {
|
||||
Promise.all([
|
||||
|
|
@ -3314,6 +3343,17 @@ function App() {
|
|||
navigateToFile(notePath)
|
||||
}, [loadDirectory, navigateToFile, fileTabs])
|
||||
|
||||
const handleToggleMeeting = useCallback(async () => {
|
||||
if (meetingTranscription.state === 'recording') {
|
||||
await meetingTranscription.stop()
|
||||
} else if (meetingTranscription.state === 'idle') {
|
||||
const notePath = await meetingTranscription.start()
|
||||
if (notePath) {
|
||||
await handleVoiceNoteCreated(notePath)
|
||||
}
|
||||
}
|
||||
}, [meetingTranscription, handleVoiceNoteCreated])
|
||||
|
||||
const ensureWikiFile = useCallback(async (wikiPath: string) => {
|
||||
const resolvedPath = toKnowledgePath(wikiPath)
|
||||
if (!resolvedPath) return null
|
||||
|
|
@ -4175,6 +4215,8 @@ function App() {
|
|||
canNavigateForward={canNavigateForward}
|
||||
onNewChat={handleNewChatTab}
|
||||
onOpenSearch={() => setIsSearchOpen(true)}
|
||||
meetingState={meetingTranscription.state}
|
||||
onToggleMeeting={() => { void handleToggleMeeting() }}
|
||||
leftInsetPx={isMac ? MACOS_TRAFFIC_LIGHTS_RESERVED_PX : 0}
|
||||
/>
|
||||
</SidebarProvider>
|
||||
|
|
|
|||
269
apps/x/apps/renderer/src/hooks/useMeetingTranscription.ts
Normal file
269
apps/x/apps/renderer/src/hooks/useMeetingTranscription.ts
Normal file
|
|
@ -0,0 +1,269 @@
|
|||
import { useCallback, useRef, useState } from 'react';
|
||||
|
||||
export type MeetingTranscriptionState = 'idle' | 'connecting' | 'recording' | 'stopping';
|
||||
|
||||
const DEEPGRAM_PARAMS = new URLSearchParams({
|
||||
model: 'nova-3',
|
||||
encoding: 'linear16',
|
||||
sample_rate: '16000',
|
||||
channels: '2',
|
||||
multichannel: 'true',
|
||||
interim_results: 'true',
|
||||
smart_format: 'true',
|
||||
punctuate: 'true',
|
||||
});
|
||||
const DEEPGRAM_LISTEN_URL = `wss://api.deepgram.com/v1/listen?${DEEPGRAM_PARAMS.toString()}`;
|
||||
|
||||
interface TranscriptEntry {
|
||||
speaker: string;
|
||||
text: string;
|
||||
}
|
||||
|
||||
function formatTranscript(entries: TranscriptEntry[], date: string): string {
|
||||
const lines = [
|
||||
'---',
|
||||
'type: meeting',
|
||||
'source: rowboat',
|
||||
'title: Meeting Transcription',
|
||||
`date: "${date}"`,
|
||||
'---',
|
||||
'',
|
||||
'# Meeting Transcription',
|
||||
'',
|
||||
];
|
||||
for (const entry of entries) {
|
||||
lines.push(`**${entry.speaker}:** ${entry.text}`);
|
||||
lines.push('');
|
||||
}
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
export function useMeetingTranscription() {
|
||||
const [state, setState] = useState<MeetingTranscriptionState>('idle');
|
||||
const wsRef = useRef<WebSocket | null>(null);
|
||||
const micStreamRef = useRef<MediaStream | null>(null);
|
||||
const systemStreamRef = useRef<MediaStream | null>(null);
|
||||
const processorRef = useRef<ScriptProcessorNode | null>(null);
|
||||
const audioCtxRef = useRef<AudioContext | null>(null);
|
||||
const transcriptRef = useRef<TranscriptEntry[]>([]);
|
||||
const notePathRef = useRef<string>('');
|
||||
const writeTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
|
||||
const dateRef = useRef<string>('');
|
||||
|
||||
const writeTranscriptToFile = useCallback(async () => {
|
||||
if (!notePathRef.current || transcriptRef.current.length === 0) return;
|
||||
const content = formatTranscript(transcriptRef.current, dateRef.current);
|
||||
try {
|
||||
await window.ipc.invoke('workspace:writeFile', {
|
||||
path: notePathRef.current,
|
||||
data: content,
|
||||
opts: { encoding: 'utf8' },
|
||||
});
|
||||
} catch (err) {
|
||||
console.error('[meeting] Failed to write transcript:', err);
|
||||
}
|
||||
}, []);
|
||||
|
||||
const scheduleDebouncedWrite = useCallback(() => {
|
||||
if (writeTimerRef.current) clearTimeout(writeTimerRef.current);
|
||||
writeTimerRef.current = setTimeout(() => {
|
||||
void writeTranscriptToFile();
|
||||
}, 5000);
|
||||
}, [writeTranscriptToFile]);
|
||||
|
||||
const cleanup = useCallback(() => {
|
||||
if (writeTimerRef.current) {
|
||||
clearTimeout(writeTimerRef.current);
|
||||
writeTimerRef.current = null;
|
||||
}
|
||||
if (processorRef.current) {
|
||||
processorRef.current.disconnect();
|
||||
processorRef.current = null;
|
||||
}
|
||||
if (audioCtxRef.current) {
|
||||
audioCtxRef.current.close();
|
||||
audioCtxRef.current = null;
|
||||
}
|
||||
if (micStreamRef.current) {
|
||||
micStreamRef.current.getTracks().forEach(t => t.stop());
|
||||
micStreamRef.current = null;
|
||||
}
|
||||
if (systemStreamRef.current) {
|
||||
systemStreamRef.current.getTracks().forEach(t => t.stop());
|
||||
systemStreamRef.current = null;
|
||||
}
|
||||
if (wsRef.current) {
|
||||
wsRef.current.onclose = null;
|
||||
wsRef.current.close();
|
||||
wsRef.current = null;
|
||||
}
|
||||
}, []);
|
||||
|
||||
const start = useCallback(async (): Promise<string | null> => {
|
||||
if (state !== 'idle') return null;
|
||||
setState('connecting');
|
||||
|
||||
// Get Deepgram token
|
||||
let ws: WebSocket;
|
||||
try {
|
||||
const result = await window.ipc.invoke('voice:getDeepgramToken', null);
|
||||
if (result) {
|
||||
console.log('[meeting] Using proxy token');
|
||||
ws = new WebSocket(DEEPGRAM_LISTEN_URL, ['bearer', result.token]);
|
||||
} else {
|
||||
const config = await window.ipc.invoke('voice:getConfig', null);
|
||||
if (!config?.deepgram) {
|
||||
console.error('[meeting] No Deepgram config available');
|
||||
setState('idle');
|
||||
return null;
|
||||
}
|
||||
console.log('[meeting] Using API key');
|
||||
ws = new WebSocket(DEEPGRAM_LISTEN_URL, ['token', config.deepgram.apiKey]);
|
||||
}
|
||||
} catch (err) {
|
||||
console.error('[meeting] Failed to get Deepgram token:', err);
|
||||
setState('idle');
|
||||
return null;
|
||||
}
|
||||
wsRef.current = ws;
|
||||
|
||||
// Wait for WS open
|
||||
const wsOk = await new Promise<boolean>((resolve) => {
|
||||
ws.onopen = () => resolve(true);
|
||||
ws.onerror = () => resolve(false);
|
||||
setTimeout(() => resolve(false), 5000);
|
||||
});
|
||||
if (!wsOk) {
|
||||
console.error('[meeting] WebSocket failed to connect');
|
||||
cleanup();
|
||||
setState('idle');
|
||||
return null;
|
||||
}
|
||||
console.log('[meeting] WebSocket connected');
|
||||
|
||||
// Set up WS message handler
|
||||
transcriptRef.current = [];
|
||||
ws.onmessage = (event) => {
|
||||
const data = JSON.parse(event.data);
|
||||
if (!data.channel?.alternatives?.[0]) return;
|
||||
const transcript = data.channel.alternatives[0].transcript;
|
||||
if (!transcript || !data.is_final) return;
|
||||
|
||||
const channelIndex = data.channel_index?.[0] ?? 0;
|
||||
const speaker = channelIndex === 0 ? 'You' : 'Speaker';
|
||||
|
||||
// Merge with last entry if same speaker
|
||||
const entries = transcriptRef.current;
|
||||
if (entries.length > 0 && entries[entries.length - 1].speaker === speaker) {
|
||||
entries[entries.length - 1].text += ' ' + transcript;
|
||||
} else {
|
||||
entries.push({ speaker, text: transcript });
|
||||
}
|
||||
scheduleDebouncedWrite();
|
||||
};
|
||||
|
||||
ws.onclose = () => {
|
||||
console.log('[meeting] WebSocket closed');
|
||||
wsRef.current = null;
|
||||
};
|
||||
|
||||
// Get mic stream
|
||||
let micStream: MediaStream;
|
||||
try {
|
||||
micStream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||
} catch (err) {
|
||||
console.error('[meeting] Microphone access denied:', err);
|
||||
cleanup();
|
||||
setState('idle');
|
||||
return null;
|
||||
}
|
||||
micStreamRef.current = micStream;
|
||||
|
||||
// Get system audio via getDisplayMedia
|
||||
// The main process setDisplayMediaRequestHandler auto-approves with loopback audio
|
||||
let systemStream: MediaStream;
|
||||
try {
|
||||
systemStream = await navigator.mediaDevices.getDisplayMedia({ audio: true, video: true });
|
||||
// Stop any video tracks — we only need audio
|
||||
systemStream.getVideoTracks().forEach(t => t.stop());
|
||||
} catch (err) {
|
||||
console.error('[meeting] System audio access denied:', err);
|
||||
cleanup();
|
||||
setState('idle');
|
||||
return null;
|
||||
}
|
||||
if (systemStream.getAudioTracks().length === 0) {
|
||||
console.error('[meeting] No audio track from getDisplayMedia');
|
||||
systemStream.getTracks().forEach(t => t.stop());
|
||||
cleanup();
|
||||
setState('idle');
|
||||
return null;
|
||||
}
|
||||
console.log('[meeting] System audio captured');
|
||||
systemStreamRef.current = systemStream;
|
||||
|
||||
// Set up AudioContext with channel merger
|
||||
const audioCtx = new AudioContext({ sampleRate: 16000 });
|
||||
audioCtxRef.current = audioCtx;
|
||||
|
||||
const micSource = audioCtx.createMediaStreamSource(micStream);
|
||||
const systemSource = audioCtx.createMediaStreamSource(systemStream);
|
||||
const merger = audioCtx.createChannelMerger(2);
|
||||
|
||||
micSource.connect(merger, 0, 0); // mic → channel 0
|
||||
systemSource.connect(merger, 0, 1); // system audio → channel 1
|
||||
|
||||
const processor = audioCtx.createScriptProcessor(4096, 2, 2);
|
||||
processorRef.current = processor;
|
||||
|
||||
processor.onaudioprocess = (e) => {
|
||||
if (!wsRef.current || wsRef.current.readyState !== WebSocket.OPEN) return;
|
||||
const ch0 = e.inputBuffer.getChannelData(0);
|
||||
const ch1 = e.inputBuffer.getChannelData(1);
|
||||
// Interleave 2 channels into stereo int16 PCM
|
||||
const int16 = new Int16Array(ch0.length * 2);
|
||||
for (let i = 0; i < ch0.length; i++) {
|
||||
const s0 = Math.max(-1, Math.min(1, ch0[i]));
|
||||
const s1 = Math.max(-1, Math.min(1, ch1[i]));
|
||||
int16[i * 2] = s0 < 0 ? s0 * 0x8000 : s0 * 0x7fff;
|
||||
int16[i * 2 + 1] = s1 < 0 ? s1 * 0x8000 : s1 * 0x7fff;
|
||||
}
|
||||
wsRef.current.send(int16.buffer);
|
||||
};
|
||||
|
||||
merger.connect(processor);
|
||||
processor.connect(audioCtx.destination);
|
||||
|
||||
// Create the note file
|
||||
const now = new Date();
|
||||
const dateStr = now.toISOString();
|
||||
dateRef.current = dateStr;
|
||||
const timestamp = dateStr.replace(/:/g, '-').replace(/\.\d+Z$/, '');
|
||||
const notePath = `knowledge/Meetings/rowboat/meeting-${timestamp}.md`;
|
||||
notePathRef.current = notePath;
|
||||
|
||||
const initialContent = formatTranscript([], dateStr);
|
||||
await window.ipc.invoke('workspace:writeFile', {
|
||||
path: notePath,
|
||||
data: initialContent,
|
||||
opts: { encoding: 'utf8', mkdirp: true },
|
||||
});
|
||||
|
||||
setState('recording');
|
||||
return notePath;
|
||||
}, [state, cleanup, scheduleDebouncedWrite]);
|
||||
|
||||
const stop = useCallback(async () => {
|
||||
if (state !== 'recording') return;
|
||||
setState('stopping');
|
||||
|
||||
cleanup();
|
||||
|
||||
// Write final transcript
|
||||
await writeTranscriptToFile();
|
||||
|
||||
setState('idle');
|
||||
}, [state, cleanup, writeTranscriptToFile]);
|
||||
|
||||
return { state, start, stop };
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue