mirror of
https://github.com/rowboatlabs/rowboat.git
synced 2026-04-25 00:16:29 +02:00
Ola (#438)
Native meeting transcription that captures mic and system audio, transcribes
live via Deepgram, and generates AI-powered meeting notes.
- Toggle button in toolbar to start/stop meeting transcription
- Dual-stream audio capture: mic (You) + system audio (They) via getDisplayMedia
loopback
- Multichannel Deepgram transcription with diarization for speaker
identification
- Headphone detection with mic gating when using speakers to prevent echo bleed
- Live transcript saved to knowledge/Meetings/rowboat/{date}/ as markdown
- Auto-stop after 2 minutes of silence
- LLM-generated meeting notes prepended above raw transcript on stop
- Calendar event matching: pulls nearby events from calendar_sync to identify
meeting title and participant names
- First-time permissions setup modal on macOS for Screen Recording
- Button only visible when Deepgram is available (logged in or API key
configured)
This commit is contained in:
parent
7966501a79
commit
c0138af3ab
8 changed files with 662 additions and 4 deletions
10
apps/x/apps/main/entitlements.plist
Normal file
10
apps/x/apps/main/entitlements.plist
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>com.apple.security.device.audio-input</key>
|
||||
<true/>
|
||||
<key>com.apple.security.device.screen-capture</key>
|
||||
<true/>
|
||||
</dict>
|
||||
</plist>
|
||||
|
|
@ -13,6 +13,10 @@ module.exports = {
|
|||
appCategoryType: 'public.app-category.productivity',
|
||||
osxSign: {
|
||||
batchCodesignCalls: true,
|
||||
optionsForFile: () => ({
|
||||
entitlements: path.join(__dirname, 'entitlements.plist'),
|
||||
'entitlements-inherit': path.join(__dirname, 'entitlements.plist'),
|
||||
}),
|
||||
},
|
||||
osxNotarize: {
|
||||
appleId: process.env.APPLE_ID,
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@ import { search } from '@x/core/dist/search/search.js';
|
|||
import { versionHistory, voice } from '@x/core';
|
||||
import { classifySchedule, processRowboatInstruction } from '@x/core/dist/knowledge/inline_tasks.js';
|
||||
import { getBillingInfo } from '@x/core/dist/billing/billing.js';
|
||||
import { summarizeMeeting } from '@x/core/dist/knowledge/summarize_meeting.js';
|
||||
|
||||
/**
|
||||
* Convert markdown to a styled HTML document for PDF/DOCX export.
|
||||
|
|
@ -701,6 +702,10 @@ export function setupIpcHandlers() {
|
|||
|
||||
return { success: false, error: 'Unknown format' };
|
||||
},
|
||||
'meeting:summarize': async (_event, args) => {
|
||||
const notes = await summarizeMeeting(args.transcript, args.meetingStartTime);
|
||||
return { notes };
|
||||
},
|
||||
'inline-task:classifySchedule': async (_event, args) => {
|
||||
const schedule = await classifySchedule(args.instruction);
|
||||
return { schedule };
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
import { app, BrowserWindow, protocol, net, shell, session } from "electron";
|
||||
import { app, BrowserWindow, desktopCapturer, protocol, net, shell, session } from "electron";
|
||||
import path from "node:path";
|
||||
import {
|
||||
setupIpcHandlers,
|
||||
|
|
@ -92,15 +92,27 @@ function createWindow() {
|
|||
},
|
||||
});
|
||||
|
||||
// Grant microphone permission for voice mode
|
||||
// Grant microphone and display-capture permissions
|
||||
session.defaultSession.setPermissionRequestHandler((_webContents, permission, callback) => {
|
||||
if (permission === 'media') {
|
||||
if (permission === 'media' || permission === 'display-capture') {
|
||||
callback(true);
|
||||
} else {
|
||||
callback(false);
|
||||
}
|
||||
});
|
||||
|
||||
// Auto-approve display media requests and route system audio as loopback.
|
||||
// Electron requires a video source in the callback even if we only want audio.
|
||||
// We pass the first available screen source; the renderer discards the video track.
|
||||
session.defaultSession.setDisplayMediaRequestHandler(async (_request, callback) => {
|
||||
const sources = await desktopCapturer.getSources({ types: ['screen'] });
|
||||
if (sources.length === 0) {
|
||||
callback({});
|
||||
return;
|
||||
}
|
||||
callback({ video: sources[0], audio: 'loopback' });
|
||||
});
|
||||
|
||||
// Show window when content is ready to prevent blank screen
|
||||
win.once("ready-to-show", () => {
|
||||
win.maximize();
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ import { RunEvent, ListRunsResponse } from '@x/shared/src/runs.js';
|
|||
import type { LanguageModelUsage, ToolUIPart } from 'ai';
|
||||
import './App.css'
|
||||
import z from 'zod';
|
||||
import { CheckIcon, LoaderIcon, PanelLeftIcon, Maximize2, Minimize2, ChevronLeftIcon, ChevronRightIcon, SquarePen, SearchIcon, HistoryIcon } from 'lucide-react';
|
||||
import { CheckIcon, LoaderIcon, PanelLeftIcon, Maximize2, Minimize2, ChevronLeftIcon, ChevronRightIcon, SquarePen, SearchIcon, HistoryIcon, RadioIcon, SquareIcon } from 'lucide-react';
|
||||
import { cn } from '@/lib/utils';
|
||||
import { MarkdownEditor } from './components/markdown-editor';
|
||||
import { ChatSidebar } from './components/chat-sidebar';
|
||||
|
|
@ -46,6 +46,8 @@ import {
|
|||
useSidebar,
|
||||
} from "@/components/ui/sidebar"
|
||||
import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from "@/components/ui/tooltip"
|
||||
import { Dialog, DialogContent, DialogHeader, DialogTitle, DialogDescription, DialogFooter } from "@/components/ui/dialog"
|
||||
import { Button } from "@/components/ui/button"
|
||||
import { Toaster } from "@/components/ui/sonner"
|
||||
import { stripKnowledgePrefix, toKnowledgePath, wikiLabel } from '@/lib/wiki-links'
|
||||
import { splitFrontmatter, joinFrontmatter } from '@/lib/frontmatter'
|
||||
|
|
@ -78,6 +80,7 @@ import { AgentScheduleState } from '@x/shared/dist/agent-schedule-state.js'
|
|||
import { toast } from "sonner"
|
||||
import { useVoiceMode } from '@/hooks/useVoiceMode'
|
||||
import { useVoiceTTS } from '@/hooks/useVoiceTTS'
|
||||
import { useMeetingTranscription, type MeetingTranscriptionState } from '@/hooks/useMeetingTranscription'
|
||||
|
||||
type DirEntry = z.infer<typeof workspace.DirEntry>
|
||||
type RunEventType = z.infer<typeof RunEvent>
|
||||
|
|
@ -383,6 +386,10 @@ function FixedSidebarToggle({
|
|||
canNavigateForward,
|
||||
onNewChat,
|
||||
onOpenSearch,
|
||||
meetingState,
|
||||
meetingSummarizing,
|
||||
meetingAvailable,
|
||||
onToggleMeeting,
|
||||
leftInsetPx,
|
||||
}: {
|
||||
onNavigateBack: () => void
|
||||
|
|
@ -391,6 +398,10 @@ function FixedSidebarToggle({
|
|||
canNavigateForward: boolean
|
||||
onNewChat: () => void
|
||||
onOpenSearch: () => void
|
||||
meetingState: MeetingTranscriptionState
|
||||
meetingSummarizing: boolean
|
||||
meetingAvailable: boolean
|
||||
onToggleMeeting: () => void
|
||||
leftInsetPx: number
|
||||
}) {
|
||||
const { toggleSidebar, state } = useSidebar()
|
||||
|
|
@ -426,6 +437,37 @@ function FixedSidebarToggle({
|
|||
>
|
||||
<SearchIcon className="size-5" />
|
||||
</button>
|
||||
{meetingAvailable && (
|
||||
<Tooltip>
|
||||
<TooltipTrigger asChild>
|
||||
<button
|
||||
type="button"
|
||||
onClick={onToggleMeeting}
|
||||
disabled={meetingState === 'connecting' || meetingState === 'stopping' || meetingSummarizing}
|
||||
className={cn(
|
||||
"flex h-8 w-8 items-center justify-center rounded-md transition-colors disabled:pointer-events-none",
|
||||
meetingSummarizing
|
||||
? "text-muted-foreground"
|
||||
: meetingState === 'recording'
|
||||
? "text-red-500 hover:bg-accent"
|
||||
: "text-muted-foreground hover:bg-accent hover:text-foreground"
|
||||
)}
|
||||
style={{ marginLeft: TITLEBAR_BUTTON_GAP_PX }}
|
||||
>
|
||||
{meetingSummarizing ? (
|
||||
<LoaderIcon className="size-4 animate-spin" />
|
||||
) : meetingState === 'recording' ? (
|
||||
<SquareIcon className="size-4 animate-pulse" />
|
||||
) : (
|
||||
<RadioIcon className="size-5" />
|
||||
)}
|
||||
</button>
|
||||
</TooltipTrigger>
|
||||
<TooltipContent side="bottom">
|
||||
{meetingSummarizing ? 'Generating meeting notes...' : meetingState === 'recording' ? 'Stop meeting notes' : 'Take new meeting notes'}
|
||||
</TooltipContent>
|
||||
</Tooltip>
|
||||
)}
|
||||
{/* Back / Forward navigation */}
|
||||
{isCollapsed && (
|
||||
<>
|
||||
|
|
@ -619,6 +661,11 @@ function App() {
|
|||
const voiceRef = useRef(voice)
|
||||
voiceRef.current = voice
|
||||
|
||||
const handleToggleMeetingRef = useRef<(() => void) | undefined>(undefined)
|
||||
const meetingTranscription = useMeetingTranscription(() => {
|
||||
handleToggleMeetingRef.current?.()
|
||||
})
|
||||
|
||||
// Check if voice is available on mount and when OAuth state changes
|
||||
const refreshVoiceAvailability = useCallback(() => {
|
||||
Promise.all([
|
||||
|
|
@ -3314,6 +3361,73 @@ function App() {
|
|||
navigateToFile(notePath)
|
||||
}, [loadDirectory, navigateToFile, fileTabs])
|
||||
|
||||
const meetingNotePathRef = useRef<string | null>(null)
|
||||
const [meetingSummarizing, setMeetingSummarizing] = useState(false)
|
||||
const [showMeetingPermissions, setShowMeetingPermissions] = useState(false)
|
||||
|
||||
const startMeetingAfterPermissions = useCallback(async () => {
|
||||
setShowMeetingPermissions(false)
|
||||
localStorage.setItem('meeting-permissions-acknowledged', '1')
|
||||
const notePath = await meetingTranscription.start()
|
||||
if (notePath) {
|
||||
meetingNotePathRef.current = notePath
|
||||
await handleVoiceNoteCreated(notePath)
|
||||
}
|
||||
}, [meetingTranscription, handleVoiceNoteCreated])
|
||||
|
||||
const handleToggleMeeting = useCallback(async () => {
|
||||
if (meetingTranscription.state === 'recording') {
|
||||
await meetingTranscription.stop()
|
||||
|
||||
// Read the final transcript and generate meeting notes via LLM
|
||||
const notePath = meetingNotePathRef.current
|
||||
if (notePath) {
|
||||
setMeetingSummarizing(true)
|
||||
try {
|
||||
const result = await window.ipc.invoke('workspace:readFile', { path: notePath, encoding: 'utf8' })
|
||||
const fileContent = result.data
|
||||
if (fileContent && fileContent.trim()) {
|
||||
// Extract meeting start time from frontmatter for calendar matching
|
||||
const dateMatch = fileContent.match(/^date:\s*"(.+)"$/m)
|
||||
const meetingStartTime = dateMatch?.[1]
|
||||
const { notes } = await window.ipc.invoke('meeting:summarize', { transcript: fileContent, meetingStartTime })
|
||||
if (notes) {
|
||||
// Prepend meeting notes below the title but above the transcript
|
||||
const { raw: fm, body: transcriptBody } = splitFrontmatter(fileContent)
|
||||
// Strip the "# Meeting note" title from transcript body — we'll put it first
|
||||
const bodyWithoutTitle = transcriptBody.replace(/^#\s+Meeting note\s*\n*/, '')
|
||||
const newBody = '# Meeting note\n\n' + notes + '\n\n---\n\n## Raw transcript\n\n' + bodyWithoutTitle
|
||||
const newContent = fm ? `${fm}\n${newBody}` : newBody
|
||||
await window.ipc.invoke('workspace:writeFile', {
|
||||
path: notePath,
|
||||
data: newContent,
|
||||
opts: { encoding: 'utf8' },
|
||||
})
|
||||
// Refresh the file view
|
||||
await handleVoiceNoteCreated(notePath)
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
console.error('[meeting] Failed to generate meeting notes:', err)
|
||||
}
|
||||
setMeetingSummarizing(false)
|
||||
meetingNotePathRef.current = null
|
||||
}
|
||||
} else if (meetingTranscription.state === 'idle') {
|
||||
// Show permissions modal on first use (macOS only — Windows works out of the box)
|
||||
if (isMac && !localStorage.getItem('meeting-permissions-acknowledged')) {
|
||||
setShowMeetingPermissions(true)
|
||||
return
|
||||
}
|
||||
const notePath = await meetingTranscription.start()
|
||||
if (notePath) {
|
||||
meetingNotePathRef.current = notePath
|
||||
await handleVoiceNoteCreated(notePath)
|
||||
}
|
||||
}
|
||||
}, [meetingTranscription, handleVoiceNoteCreated])
|
||||
handleToggleMeetingRef.current = handleToggleMeeting
|
||||
|
||||
const ensureWikiFile = useCallback(async (wikiPath: string) => {
|
||||
const resolvedPath = toKnowledgePath(wikiPath)
|
||||
if (!resolvedPath) return null
|
||||
|
|
@ -4176,6 +4290,10 @@ function App() {
|
|||
canNavigateForward={canNavigateForward}
|
||||
onNewChat={handleNewChatTab}
|
||||
onOpenSearch={() => setIsSearchOpen(true)}
|
||||
meetingState={meetingTranscription.state}
|
||||
meetingSummarizing={meetingSummarizing}
|
||||
meetingAvailable={voiceAvailable}
|
||||
onToggleMeeting={() => { void handleToggleMeeting() }}
|
||||
leftInsetPx={isMac ? MACOS_TRAFFIC_LIGHTS_RESERVED_PX : 0}
|
||||
/>
|
||||
</SidebarProvider>
|
||||
|
|
@ -4192,6 +4310,29 @@ function App() {
|
|||
open={showOnboarding}
|
||||
onComplete={handleOnboardingComplete}
|
||||
/>
|
||||
<Dialog open={showMeetingPermissions} onOpenChange={setShowMeetingPermissions}>
|
||||
<DialogContent showCloseButton={false}>
|
||||
<DialogHeader>
|
||||
<DialogTitle>Meeting transcription setup</DialogTitle>
|
||||
<DialogDescription>
|
||||
Rowboat needs <strong>Screen Recording</strong> permission to capture meeting audio from other apps (Zoom, Meet, etc.).
|
||||
</DialogDescription>
|
||||
</DialogHeader>
|
||||
<div className="space-y-3 text-sm text-muted-foreground">
|
||||
<p>To enable this:</p>
|
||||
<ol className="list-decimal list-inside space-y-1.5">
|
||||
<li>Open <strong>System Settings</strong> → <strong>Privacy & Security</strong></li>
|
||||
<li>Click <strong>Screen Recording</strong></li>
|
||||
<li>Toggle on <strong>Rowboat</strong></li>
|
||||
<li>You may need to restart the app after granting permission</li>
|
||||
</ol>
|
||||
</div>
|
||||
<DialogFooter>
|
||||
<Button variant="outline" onClick={() => setShowMeetingPermissions(false)}>Cancel</Button>
|
||||
<Button onClick={() => { void startMeetingAfterPermissions() }}>Continue</Button>
|
||||
</DialogFooter>
|
||||
</DialogContent>
|
||||
</Dialog>
|
||||
</TooltipProvider>
|
||||
)
|
||||
}
|
||||
|
|
|
|||
374
apps/x/apps/renderer/src/hooks/useMeetingTranscription.ts
Normal file
374
apps/x/apps/renderer/src/hooks/useMeetingTranscription.ts
Normal file
|
|
@ -0,0 +1,374 @@
|
|||
import { useCallback, useRef, useState } from 'react';
|
||||
|
||||
export type MeetingTranscriptionState = 'idle' | 'connecting' | 'recording' | 'stopping';
|
||||
|
||||
const DEEPGRAM_PARAMS = new URLSearchParams({
|
||||
model: 'nova-3',
|
||||
encoding: 'linear16',
|
||||
sample_rate: '16000',
|
||||
channels: '2',
|
||||
multichannel: 'true',
|
||||
diarize: 'true',
|
||||
interim_results: 'true',
|
||||
smart_format: 'true',
|
||||
punctuate: 'true',
|
||||
language: 'en',
|
||||
});
|
||||
const DEEPGRAM_LISTEN_URL = `wss://api.deepgram.com/v1/listen?${DEEPGRAM_PARAMS.toString()}`;
|
||||
|
||||
// RMS threshold: system audio above this = "active" (speakers playing)
|
||||
const SYSTEM_AUDIO_GATE_THRESHOLD = 0.005;
|
||||
|
||||
// Auto-stop after 2 minutes of silence (no transcript from Deepgram)
|
||||
const SILENCE_AUTO_STOP_MS = 2 * 60 * 1000;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Headphone detection
|
||||
// ---------------------------------------------------------------------------
|
||||
async function detectHeadphones(): Promise<boolean> {
|
||||
try {
|
||||
const devices = await navigator.mediaDevices.enumerateDevices();
|
||||
const outputs = devices.filter(d => d.kind === 'audiooutput');
|
||||
const defaultOutput = outputs.find(d => d.deviceId === 'default');
|
||||
const label = (defaultOutput?.label ?? '').toLowerCase();
|
||||
// Heuristic: built-in speakers won't match these patterns
|
||||
const headphonePatterns = ['headphone', 'airpod', 'earpod', 'earphone', 'earbud', 'bluetooth', 'bt_', 'jabra', 'bose', 'sony wh', 'sony wf'];
|
||||
return headphonePatterns.some(p => label.includes(p));
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Transcript formatting
|
||||
// ---------------------------------------------------------------------------
|
||||
interface TranscriptEntry {
|
||||
speaker: string;
|
||||
text: string;
|
||||
}
|
||||
|
||||
function formatTranscript(entries: TranscriptEntry[], date: string): string {
|
||||
const lines = [
|
||||
'---',
|
||||
'type: meeting',
|
||||
'source: rowboat',
|
||||
'title: Meeting note',
|
||||
`date: "${date}"`,
|
||||
'---',
|
||||
'',
|
||||
'# Meeting note',
|
||||
'',
|
||||
];
|
||||
for (let i = 0; i < entries.length; i++) {
|
||||
if (i > 0 && entries[i].speaker !== entries[i - 1].speaker) {
|
||||
lines.push('');
|
||||
}
|
||||
lines.push(`**${entries[i].speaker}:** ${entries[i].text}`);
|
||||
lines.push('');
|
||||
}
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Hook
|
||||
// ---------------------------------------------------------------------------
|
||||
export function useMeetingTranscription(onAutoStop?: () => void) {
|
||||
const [state, setState] = useState<MeetingTranscriptionState>('idle');
|
||||
const wsRef = useRef<WebSocket | null>(null);
|
||||
const micStreamRef = useRef<MediaStream | null>(null);
|
||||
const systemStreamRef = useRef<MediaStream | null>(null);
|
||||
const processorRef = useRef<ScriptProcessorNode | null>(null);
|
||||
const audioCtxRef = useRef<AudioContext | null>(null);
|
||||
const transcriptRef = useRef<TranscriptEntry[]>([]);
|
||||
const interimRef = useRef<Map<number, { speaker: string; text: string }>>(new Map());
|
||||
const notePathRef = useRef<string>('');
|
||||
const writeTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
|
||||
const silenceTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
|
||||
const onAutoStopRef = useRef(onAutoStop);
|
||||
onAutoStopRef.current = onAutoStop;
|
||||
const dateRef = useRef<string>('');
|
||||
|
||||
const writeTranscriptToFile = useCallback(async () => {
|
||||
if (!notePathRef.current) return;
|
||||
const entries = [...transcriptRef.current];
|
||||
for (const interim of interimRef.current.values()) {
|
||||
if (!interim.text) continue;
|
||||
if (entries.length > 0 && entries[entries.length - 1].speaker === interim.speaker) {
|
||||
entries[entries.length - 1] = { speaker: interim.speaker, text: entries[entries.length - 1].text + ' ' + interim.text };
|
||||
} else {
|
||||
entries.push({ speaker: interim.speaker, text: interim.text });
|
||||
}
|
||||
}
|
||||
if (entries.length === 0) return;
|
||||
const content = formatTranscript(entries, dateRef.current);
|
||||
try {
|
||||
await window.ipc.invoke('workspace:writeFile', {
|
||||
path: notePathRef.current,
|
||||
data: content,
|
||||
opts: { encoding: 'utf8' },
|
||||
});
|
||||
} catch (err) {
|
||||
console.error('[meeting] Failed to write transcript:', err);
|
||||
}
|
||||
}, []);
|
||||
|
||||
const scheduleDebouncedWrite = useCallback(() => {
|
||||
if (writeTimerRef.current) clearTimeout(writeTimerRef.current);
|
||||
writeTimerRef.current = setTimeout(() => {
|
||||
void writeTranscriptToFile();
|
||||
}, 1000);
|
||||
}, [writeTranscriptToFile]);
|
||||
|
||||
const cleanup = useCallback(() => {
|
||||
if (writeTimerRef.current) {
|
||||
clearTimeout(writeTimerRef.current);
|
||||
writeTimerRef.current = null;
|
||||
}
|
||||
if (silenceTimerRef.current) {
|
||||
clearTimeout(silenceTimerRef.current);
|
||||
silenceTimerRef.current = null;
|
||||
}
|
||||
if (processorRef.current) {
|
||||
processorRef.current.disconnect();
|
||||
processorRef.current = null;
|
||||
}
|
||||
if (audioCtxRef.current) {
|
||||
audioCtxRef.current.close();
|
||||
audioCtxRef.current = null;
|
||||
}
|
||||
if (micStreamRef.current) {
|
||||
micStreamRef.current.getTracks().forEach(t => t.stop());
|
||||
micStreamRef.current = null;
|
||||
}
|
||||
if (systemStreamRef.current) {
|
||||
systemStreamRef.current.getTracks().forEach(t => t.stop());
|
||||
systemStreamRef.current = null;
|
||||
}
|
||||
if (wsRef.current) {
|
||||
wsRef.current.onclose = null;
|
||||
wsRef.current.close();
|
||||
wsRef.current = null;
|
||||
}
|
||||
}, []);
|
||||
|
||||
const start = useCallback(async (): Promise<string | null> => {
|
||||
if (state !== 'idle') return null;
|
||||
setState('connecting');
|
||||
|
||||
// Detect headphones vs speakers
|
||||
const usingHeadphones = await detectHeadphones();
|
||||
console.log(`[meeting] Audio output mode: ${usingHeadphones ? 'headphones' : 'speakers'}`);
|
||||
|
||||
// Get Deepgram token
|
||||
let ws: WebSocket;
|
||||
try {
|
||||
const result = await window.ipc.invoke('voice:getDeepgramToken', null);
|
||||
if (result) {
|
||||
console.log('[meeting] Using proxy token');
|
||||
ws = new WebSocket(DEEPGRAM_LISTEN_URL, ['bearer', result.token]);
|
||||
} else {
|
||||
const config = await window.ipc.invoke('voice:getConfig', null);
|
||||
if (!config?.deepgram) {
|
||||
console.error('[meeting] No Deepgram config available');
|
||||
setState('idle');
|
||||
return null;
|
||||
}
|
||||
console.log('[meeting] Using API key');
|
||||
ws = new WebSocket(DEEPGRAM_LISTEN_URL, ['token', config.deepgram.apiKey]);
|
||||
}
|
||||
} catch (err) {
|
||||
console.error('[meeting] Failed to get Deepgram token:', err);
|
||||
setState('idle');
|
||||
return null;
|
||||
}
|
||||
wsRef.current = ws;
|
||||
|
||||
// Wait for WS open
|
||||
const wsOk = await new Promise<boolean>((resolve) => {
|
||||
ws.onopen = () => resolve(true);
|
||||
ws.onerror = () => resolve(false);
|
||||
setTimeout(() => resolve(false), 5000);
|
||||
});
|
||||
if (!wsOk) {
|
||||
console.error('[meeting] WebSocket failed to connect');
|
||||
cleanup();
|
||||
setState('idle');
|
||||
return null;
|
||||
}
|
||||
console.log('[meeting] WebSocket connected');
|
||||
|
||||
// Set up WS message handler
|
||||
transcriptRef.current = [];
|
||||
interimRef.current = new Map();
|
||||
ws.onmessage = (event) => {
|
||||
const data = JSON.parse(event.data);
|
||||
if (!data.channel?.alternatives?.[0]) return;
|
||||
const transcript = data.channel.alternatives[0].transcript;
|
||||
if (!transcript) return;
|
||||
|
||||
// Reset silence auto-stop timer on any transcript
|
||||
if (silenceTimerRef.current) clearTimeout(silenceTimerRef.current);
|
||||
silenceTimerRef.current = setTimeout(() => {
|
||||
console.log('[meeting] 2 minutes of silence — auto-stopping');
|
||||
onAutoStopRef.current?.();
|
||||
}, SILENCE_AUTO_STOP_MS);
|
||||
|
||||
const channelIndex = data.channel_index?.[0] ?? 0;
|
||||
const isMic = channelIndex === 0;
|
||||
|
||||
// Channel 0 = mic = "You", Channel 1 = system audio with diarization
|
||||
let speaker: string;
|
||||
if (isMic) {
|
||||
speaker = 'You';
|
||||
} else {
|
||||
// Use Deepgram diarization speaker ID for system audio channel
|
||||
const words = data.channel.alternatives[0].words;
|
||||
const speakerId = words?.[0]?.speaker;
|
||||
speaker = speakerId != null ? `Speaker ${speakerId}` : 'System audio';
|
||||
}
|
||||
|
||||
if (data.is_final) {
|
||||
interimRef.current.delete(channelIndex);
|
||||
const entries = transcriptRef.current;
|
||||
if (entries.length > 0 && entries[entries.length - 1].speaker === speaker) {
|
||||
entries[entries.length - 1].text += ' ' + transcript;
|
||||
} else {
|
||||
entries.push({ speaker, text: transcript });
|
||||
}
|
||||
} else {
|
||||
interimRef.current.set(channelIndex, { speaker, text: transcript });
|
||||
}
|
||||
scheduleDebouncedWrite();
|
||||
};
|
||||
|
||||
ws.onclose = () => {
|
||||
console.log('[meeting] WebSocket closed');
|
||||
wsRef.current = null;
|
||||
};
|
||||
|
||||
// Get mic stream
|
||||
let micStream: MediaStream;
|
||||
try {
|
||||
micStream = await navigator.mediaDevices.getUserMedia({
|
||||
audio: {
|
||||
echoCancellation: true,
|
||||
noiseSuppression: true,
|
||||
autoGainControl: true,
|
||||
},
|
||||
});
|
||||
} catch (err) {
|
||||
console.error('[meeting] Microphone access denied:', err);
|
||||
cleanup();
|
||||
setState('idle');
|
||||
return null;
|
||||
}
|
||||
micStreamRef.current = micStream;
|
||||
|
||||
// Get system audio via getDisplayMedia (loopback)
|
||||
let systemStream: MediaStream;
|
||||
try {
|
||||
systemStream = await navigator.mediaDevices.getDisplayMedia({ audio: true, video: true });
|
||||
systemStream.getVideoTracks().forEach(t => t.stop());
|
||||
} catch (err) {
|
||||
console.error('[meeting] System audio access denied:', err);
|
||||
cleanup();
|
||||
setState('idle');
|
||||
return null;
|
||||
}
|
||||
if (systemStream.getAudioTracks().length === 0) {
|
||||
console.error('[meeting] No audio track from getDisplayMedia');
|
||||
systemStream.getTracks().forEach(t => t.stop());
|
||||
cleanup();
|
||||
setState('idle');
|
||||
return null;
|
||||
}
|
||||
console.log('[meeting] System audio captured');
|
||||
systemStreamRef.current = systemStream;
|
||||
|
||||
// ----- Audio pipeline -----
|
||||
const audioCtx = new AudioContext({ sampleRate: 16000 });
|
||||
audioCtxRef.current = audioCtx;
|
||||
|
||||
const micSource = audioCtx.createMediaStreamSource(micStream);
|
||||
const systemSource = audioCtx.createMediaStreamSource(systemStream);
|
||||
const merger = audioCtx.createChannelMerger(2);
|
||||
|
||||
micSource.connect(merger, 0, 0); // mic → channel 0
|
||||
systemSource.connect(merger, 0, 1); // system audio → channel 1
|
||||
|
||||
const processor = audioCtx.createScriptProcessor(4096, 2, 2);
|
||||
processorRef.current = processor;
|
||||
|
||||
processor.onaudioprocess = (e) => {
|
||||
if (!wsRef.current || wsRef.current.readyState !== WebSocket.OPEN) return;
|
||||
|
||||
const micRaw = e.inputBuffer.getChannelData(0);
|
||||
const sysRaw = e.inputBuffer.getChannelData(1);
|
||||
|
||||
// Mode 1 (headphones): pass both streams through unmodified
|
||||
// Mode 2 (speakers): gate/mute mic when system audio is active
|
||||
let micOut: Float32Array;
|
||||
if (usingHeadphones) {
|
||||
micOut = micRaw;
|
||||
} else {
|
||||
// Compute system audio RMS to detect activity
|
||||
let sysSum = 0;
|
||||
for (let i = 0; i < sysRaw.length; i++) sysSum += sysRaw[i] * sysRaw[i];
|
||||
const sysRms = Math.sqrt(sysSum / sysRaw.length);
|
||||
|
||||
if (sysRms > SYSTEM_AUDIO_GATE_THRESHOLD) {
|
||||
// System audio is playing — mute mic to prevent bleed
|
||||
micOut = new Float32Array(micRaw.length); // all zeros
|
||||
} else {
|
||||
// System audio is silent — pass mic through
|
||||
micOut = micRaw;
|
||||
}
|
||||
}
|
||||
|
||||
// Interleave mic (ch0) + system audio (ch1) into stereo int16 PCM
|
||||
const int16 = new Int16Array(micOut.length * 2);
|
||||
for (let i = 0; i < micOut.length; i++) {
|
||||
const s0 = Math.max(-1, Math.min(1, micOut[i]));
|
||||
const s1 = Math.max(-1, Math.min(1, sysRaw[i]));
|
||||
int16[i * 2] = s0 < 0 ? s0 * 0x8000 : s0 * 0x7fff;
|
||||
int16[i * 2 + 1] = s1 < 0 ? s1 * 0x8000 : s1 * 0x7fff;
|
||||
}
|
||||
wsRef.current.send(int16.buffer);
|
||||
};
|
||||
|
||||
merger.connect(processor);
|
||||
processor.connect(audioCtx.destination);
|
||||
|
||||
// Create the note file, organized by date like voice memos
|
||||
const now = new Date();
|
||||
const dateStr = now.toISOString();
|
||||
dateRef.current = dateStr;
|
||||
const dateFolder = dateStr.split('T')[0]; // YYYY-MM-DD
|
||||
const timestamp = dateStr.replace(/:/g, '-').replace(/\.\d+Z$/, '');
|
||||
const notePath = `knowledge/Meetings/rowboat/${dateFolder}/meeting-${timestamp}.md`;
|
||||
notePathRef.current = notePath;
|
||||
|
||||
const initialContent = formatTranscript([], dateStr);
|
||||
await window.ipc.invoke('workspace:writeFile', {
|
||||
path: notePath,
|
||||
data: initialContent,
|
||||
opts: { encoding: 'utf8', mkdirp: true },
|
||||
});
|
||||
|
||||
setState('recording');
|
||||
return notePath;
|
||||
}, [state, cleanup, scheduleDebouncedWrite]);
|
||||
|
||||
const stop = useCallback(async () => {
|
||||
if (state !== 'recording') return;
|
||||
setState('stopping');
|
||||
|
||||
cleanup();
|
||||
interimRef.current = new Map();
|
||||
await writeTranscriptToFile();
|
||||
|
||||
setState('idle');
|
||||
}, [state, cleanup, writeTranscriptToFile]);
|
||||
|
||||
return { state, start, stop };
|
||||
}
|
||||
103
apps/x/packages/core/src/knowledge/summarize_meeting.ts
Normal file
103
apps/x/packages/core/src/knowledge/summarize_meeting.ts
Normal file
|
|
@ -0,0 +1,103 @@
|
|||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import { generateText } from 'ai';
|
||||
import container from '../di/container.js';
|
||||
import type { IModelConfigRepo } from '../models/repo.js';
|
||||
import { createProvider } from '../models/models.js';
|
||||
import { WorkDir } from '../config/config.js';
|
||||
|
||||
const CALENDAR_SYNC_DIR = path.join(WorkDir, 'calendar_sync');
|
||||
|
||||
const SYSTEM_PROMPT = `You are a meeting notes assistant. Given a raw meeting transcript and a list of calendar events from around the same time, create concise, well-organized meeting notes.
|
||||
|
||||
## Calendar matching
|
||||
You will be given the transcript (with a timestamp of when recording started) and recent calendar events with their titles, times, and attendees. If a calendar event clearly matches this meeting (overlapping time + content aligns), then:
|
||||
- Use the calendar event title as the meeting title (output it as the first line: "## <event title>")
|
||||
- Replace generic speaker labels ("Speaker 0", "Speaker 1", "System audio") with actual attendee names, but ONLY if you have HIGH CONFIDENCE about which speaker is which based on the discussion content. If unsure, use "They" instead of "Speaker 0" etc.
|
||||
- "You" in the transcript is the local user — if the calendar event has an organizer or you can identify who "You" is from context, use their name.
|
||||
|
||||
If no calendar event matches with high confidence, or if no calendar events are provided, skip the title line and use "They" for all non-"You" speakers.
|
||||
|
||||
## Format rules
|
||||
- Use ### for section headers that group related discussion topics
|
||||
- Section headers should be in sentence case (e.g. "### Onboarding flow status"), NOT Title Case
|
||||
- Use bullet points with sub-bullets for details
|
||||
- Include a "### Action items" section at the end if any were discussed
|
||||
- Focus on decisions, key discussions, and takeaways — not verbatim quotes
|
||||
- Attribute statements to speakers when relevant
|
||||
- Keep it concise — the notes should be much shorter than the transcript
|
||||
- Output markdown only, no preamble or explanation`;
|
||||
|
||||
/**
|
||||
* Load recent calendar events from the calendar_sync directory.
|
||||
* Returns a formatted string of events for the LLM prompt.
|
||||
*/
|
||||
function loadRecentCalendarEvents(meetingTime: string): string {
|
||||
try {
|
||||
if (!fs.existsSync(CALENDAR_SYNC_DIR)) return '';
|
||||
|
||||
const files = fs.readdirSync(CALENDAR_SYNC_DIR).filter(f => f.endsWith('.json') && f !== 'sync_state.json' && f !== 'composio_state.json');
|
||||
if (files.length === 0) return '';
|
||||
|
||||
const meetingDate = new Date(meetingTime);
|
||||
// Only consider events within ±3 hours of the meeting
|
||||
const windowMs = 3 * 60 * 60 * 1000;
|
||||
|
||||
const relevantEvents: string[] = [];
|
||||
|
||||
for (const file of files) {
|
||||
try {
|
||||
const content = fs.readFileSync(path.join(CALENDAR_SYNC_DIR, file), 'utf-8');
|
||||
const event = JSON.parse(content);
|
||||
|
||||
const startTime = event.start?.dateTime || event.start?.date;
|
||||
if (!startTime) continue;
|
||||
|
||||
const eventStart = new Date(startTime);
|
||||
if (Math.abs(eventStart.getTime() - meetingDate.getTime()) > windowMs) continue;
|
||||
|
||||
const attendees = (event.attendees || [])
|
||||
.map((a: { displayName?: string; email?: string }) => a.displayName || a.email)
|
||||
.filter(Boolean)
|
||||
.join(', ');
|
||||
|
||||
const endTime = event.end?.dateTime || event.end?.date || '';
|
||||
const organizer = event.organizer?.displayName || event.organizer?.email || '';
|
||||
|
||||
relevantEvents.push(
|
||||
`- Title: ${event.summary || 'Untitled'}\n` +
|
||||
` Start: ${startTime}\n` +
|
||||
` End: ${endTime}\n` +
|
||||
` Organizer: ${organizer}\n` +
|
||||
` Attendees: ${attendees || 'none listed'}`
|
||||
);
|
||||
} catch {
|
||||
// Skip malformed files
|
||||
}
|
||||
}
|
||||
|
||||
if (relevantEvents.length === 0) return '';
|
||||
return `\n\n## Calendar events around this time\n\n${relevantEvents.join('\n\n')}`;
|
||||
} catch {
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
export async function summarizeMeeting(transcript: string, meetingStartTime?: string): Promise<string> {
|
||||
const repo = container.resolve<IModelConfigRepo>('modelConfigRepo');
|
||||
const config = await repo.getConfig();
|
||||
const provider = createProvider(config.provider);
|
||||
const model = provider.languageModel(config.model);
|
||||
|
||||
const calendarContext = meetingStartTime ? loadRecentCalendarEvents(meetingStartTime) : '';
|
||||
|
||||
const prompt = `Meeting recording started at: ${meetingStartTime || 'unknown'}\n\n${transcript}${calendarContext}`;
|
||||
|
||||
const result = await generateText({
|
||||
model,
|
||||
system: SYSTEM_PROMPT,
|
||||
prompt,
|
||||
});
|
||||
|
||||
return result.text.trim();
|
||||
}
|
||||
|
|
@ -498,6 +498,15 @@ const ipcSchemas = {
|
|||
token: z.string(),
|
||||
}).nullable(),
|
||||
},
|
||||
'meeting:summarize': {
|
||||
req: z.object({
|
||||
transcript: z.string(),
|
||||
meetingStartTime: z.string().optional(),
|
||||
}),
|
||||
res: z.object({
|
||||
notes: z.string(),
|
||||
}),
|
||||
},
|
||||
// Inline task schedule classification
|
||||
'export:note': {
|
||||
req: z.object({
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue