mirror of
https://github.com/rowboatlabs/rowboat.git
synced 2026-05-02 20:03:21 +02:00
Merge branch 'dev' of github.com:rowboatlabs/rowboat into dev
This commit is contained in:
commit
e408f859d6
11 changed files with 666 additions and 11 deletions
10
apps/x/apps/main/entitlements.plist
Normal file
10
apps/x/apps/main/entitlements.plist
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>com.apple.security.device.audio-input</key>
|
||||
<true/>
|
||||
<key>com.apple.security.device.screen-capture</key>
|
||||
<true/>
|
||||
</dict>
|
||||
</plist>
|
||||
|
|
@ -13,6 +13,10 @@ module.exports = {
|
|||
appCategoryType: 'public.app-category.productivity',
|
||||
osxSign: {
|
||||
batchCodesignCalls: true,
|
||||
optionsForFile: () => ({
|
||||
entitlements: path.join(__dirname, 'entitlements.plist'),
|
||||
'entitlements-inherit': path.join(__dirname, 'entitlements.plist'),
|
||||
}),
|
||||
},
|
||||
osxNotarize: {
|
||||
appleId: process.env.APPLE_ID,
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@ import { search } from '@x/core/dist/search/search.js';
|
|||
import { versionHistory, voice } from '@x/core';
|
||||
import { classifySchedule, processRowboatInstruction } from '@x/core/dist/knowledge/inline_tasks.js';
|
||||
import { getBillingInfo } from '@x/core/dist/billing/billing.js';
|
||||
import { summarizeMeeting } from '@x/core/dist/knowledge/summarize_meeting.js';
|
||||
|
||||
/**
|
||||
* Convert markdown to a styled HTML document for PDF/DOCX export.
|
||||
|
|
@ -701,6 +702,10 @@ export function setupIpcHandlers() {
|
|||
|
||||
return { success: false, error: 'Unknown format' };
|
||||
},
|
||||
'meeting:summarize': async (_event, args) => {
|
||||
const notes = await summarizeMeeting(args.transcript, args.meetingStartTime);
|
||||
return { notes };
|
||||
},
|
||||
'inline-task:classifySchedule': async (_event, args) => {
|
||||
const schedule = await classifySchedule(args.instruction);
|
||||
return { schedule };
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
import { app, BrowserWindow, protocol, net, shell, session } from "electron";
|
||||
import { app, BrowserWindow, desktopCapturer, protocol, net, shell, session } from "electron";
|
||||
import path from "node:path";
|
||||
import {
|
||||
setupIpcHandlers,
|
||||
|
|
@ -115,15 +115,27 @@ function createWindow() {
|
|||
},
|
||||
});
|
||||
|
||||
// Grant microphone permission for voice mode
|
||||
// Grant microphone and display-capture permissions
|
||||
session.defaultSession.setPermissionRequestHandler((_webContents, permission, callback) => {
|
||||
if (permission === 'media') {
|
||||
if (permission === 'media' || permission === 'display-capture') {
|
||||
callback(true);
|
||||
} else {
|
||||
callback(false);
|
||||
}
|
||||
});
|
||||
|
||||
// Auto-approve display media requests and route system audio as loopback.
|
||||
// Electron requires a video source in the callback even if we only want audio.
|
||||
// We pass the first available screen source; the renderer discards the video track.
|
||||
session.defaultSession.setDisplayMediaRequestHandler(async (_request, callback) => {
|
||||
const sources = await desktopCapturer.getSources({ types: ['screen'] });
|
||||
if (sources.length === 0) {
|
||||
callback({});
|
||||
return;
|
||||
}
|
||||
callback({ video: sources[0], audio: 'loopback' });
|
||||
});
|
||||
|
||||
// Show window when content is ready to prevent blank screen
|
||||
win.once("ready-to-show", () => {
|
||||
win.maximize();
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ import { RunEvent, ListRunsResponse } from '@x/shared/src/runs.js';
|
|||
import type { LanguageModelUsage, ToolUIPart } from 'ai';
|
||||
import './App.css'
|
||||
import z from 'zod';
|
||||
import { CheckIcon, LoaderIcon, PanelLeftIcon, Maximize2, Minimize2, ChevronLeftIcon, ChevronRightIcon, SquarePen, SearchIcon, HistoryIcon } from 'lucide-react';
|
||||
import { CheckIcon, LoaderIcon, PanelLeftIcon, Maximize2, Minimize2, ChevronLeftIcon, ChevronRightIcon, SquarePen, SearchIcon, HistoryIcon, RadioIcon, SquareIcon } from 'lucide-react';
|
||||
import { cn } from '@/lib/utils';
|
||||
import { MarkdownEditor } from './components/markdown-editor';
|
||||
import { ChatSidebar } from './components/chat-sidebar';
|
||||
|
|
@ -46,6 +46,8 @@ import {
|
|||
useSidebar,
|
||||
} from "@/components/ui/sidebar"
|
||||
import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from "@/components/ui/tooltip"
|
||||
import { Dialog, DialogContent, DialogHeader, DialogTitle, DialogDescription, DialogFooter } from "@/components/ui/dialog"
|
||||
import { Button } from "@/components/ui/button"
|
||||
import { Toaster } from "@/components/ui/sonner"
|
||||
import { stripKnowledgePrefix, toKnowledgePath, wikiLabel } from '@/lib/wiki-links'
|
||||
import { splitFrontmatter, joinFrontmatter } from '@/lib/frontmatter'
|
||||
|
|
@ -78,6 +80,7 @@ import { AgentScheduleState } from '@x/shared/dist/agent-schedule-state.js'
|
|||
import { toast } from "sonner"
|
||||
import { useVoiceMode } from '@/hooks/useVoiceMode'
|
||||
import { useVoiceTTS } from '@/hooks/useVoiceTTS'
|
||||
import { useMeetingTranscription, type MeetingTranscriptionState } from '@/hooks/useMeetingTranscription'
|
||||
|
||||
type DirEntry = z.infer<typeof workspace.DirEntry>
|
||||
type RunEventType = z.infer<typeof RunEvent>
|
||||
|
|
@ -383,6 +386,10 @@ function FixedSidebarToggle({
|
|||
canNavigateForward,
|
||||
onNewChat,
|
||||
onOpenSearch,
|
||||
meetingState,
|
||||
meetingSummarizing,
|
||||
meetingAvailable,
|
||||
onToggleMeeting,
|
||||
leftInsetPx,
|
||||
}: {
|
||||
onNavigateBack: () => void
|
||||
|
|
@ -391,6 +398,10 @@ function FixedSidebarToggle({
|
|||
canNavigateForward: boolean
|
||||
onNewChat: () => void
|
||||
onOpenSearch: () => void
|
||||
meetingState: MeetingTranscriptionState
|
||||
meetingSummarizing: boolean
|
||||
meetingAvailable: boolean
|
||||
onToggleMeeting: () => void
|
||||
leftInsetPx: number
|
||||
}) {
|
||||
const { toggleSidebar, state } = useSidebar()
|
||||
|
|
@ -426,6 +437,37 @@ function FixedSidebarToggle({
|
|||
>
|
||||
<SearchIcon className="size-5" />
|
||||
</button>
|
||||
{meetingAvailable && (
|
||||
<Tooltip>
|
||||
<TooltipTrigger asChild>
|
||||
<button
|
||||
type="button"
|
||||
onClick={onToggleMeeting}
|
||||
disabled={meetingState === 'connecting' || meetingState === 'stopping' || meetingSummarizing}
|
||||
className={cn(
|
||||
"flex h-8 w-8 items-center justify-center rounded-md transition-colors disabled:pointer-events-none",
|
||||
meetingSummarizing
|
||||
? "text-muted-foreground"
|
||||
: meetingState === 'recording'
|
||||
? "text-red-500 hover:bg-accent"
|
||||
: "text-muted-foreground hover:bg-accent hover:text-foreground"
|
||||
)}
|
||||
style={{ marginLeft: TITLEBAR_BUTTON_GAP_PX }}
|
||||
>
|
||||
{meetingSummarizing ? (
|
||||
<LoaderIcon className="size-4 animate-spin" />
|
||||
) : meetingState === 'recording' ? (
|
||||
<SquareIcon className="size-4 animate-pulse" />
|
||||
) : (
|
||||
<RadioIcon className="size-5" />
|
||||
)}
|
||||
</button>
|
||||
</TooltipTrigger>
|
||||
<TooltipContent side="bottom">
|
||||
{meetingSummarizing ? 'Generating meeting notes...' : meetingState === 'recording' ? 'Stop meeting notes' : 'Take new meeting notes'}
|
||||
</TooltipContent>
|
||||
</Tooltip>
|
||||
)}
|
||||
{/* Back / Forward navigation */}
|
||||
{isCollapsed && (
|
||||
<>
|
||||
|
|
@ -619,6 +661,11 @@ function App() {
|
|||
const voiceRef = useRef(voice)
|
||||
voiceRef.current = voice
|
||||
|
||||
const handleToggleMeetingRef = useRef<(() => void) | undefined>(undefined)
|
||||
const meetingTranscription = useMeetingTranscription(() => {
|
||||
handleToggleMeetingRef.current?.()
|
||||
})
|
||||
|
||||
// Check if voice is available on mount and when OAuth state changes
|
||||
const refreshVoiceAvailability = useCallback(() => {
|
||||
Promise.all([
|
||||
|
|
@ -3314,6 +3361,73 @@ function App() {
|
|||
navigateToFile(notePath)
|
||||
}, [loadDirectory, navigateToFile, fileTabs])
|
||||
|
||||
const meetingNotePathRef = useRef<string | null>(null)
|
||||
const [meetingSummarizing, setMeetingSummarizing] = useState(false)
|
||||
const [showMeetingPermissions, setShowMeetingPermissions] = useState(false)
|
||||
|
||||
const startMeetingAfterPermissions = useCallback(async () => {
|
||||
setShowMeetingPermissions(false)
|
||||
localStorage.setItem('meeting-permissions-acknowledged', '1')
|
||||
const notePath = await meetingTranscription.start()
|
||||
if (notePath) {
|
||||
meetingNotePathRef.current = notePath
|
||||
await handleVoiceNoteCreated(notePath)
|
||||
}
|
||||
}, [meetingTranscription, handleVoiceNoteCreated])
|
||||
|
||||
const handleToggleMeeting = useCallback(async () => {
|
||||
if (meetingTranscription.state === 'recording') {
|
||||
await meetingTranscription.stop()
|
||||
|
||||
// Read the final transcript and generate meeting notes via LLM
|
||||
const notePath = meetingNotePathRef.current
|
||||
if (notePath) {
|
||||
setMeetingSummarizing(true)
|
||||
try {
|
||||
const result = await window.ipc.invoke('workspace:readFile', { path: notePath, encoding: 'utf8' })
|
||||
const fileContent = result.data
|
||||
if (fileContent && fileContent.trim()) {
|
||||
// Extract meeting start time from frontmatter for calendar matching
|
||||
const dateMatch = fileContent.match(/^date:\s*"(.+)"$/m)
|
||||
const meetingStartTime = dateMatch?.[1]
|
||||
const { notes } = await window.ipc.invoke('meeting:summarize', { transcript: fileContent, meetingStartTime })
|
||||
if (notes) {
|
||||
// Prepend meeting notes below the title but above the transcript
|
||||
const { raw: fm, body: transcriptBody } = splitFrontmatter(fileContent)
|
||||
// Strip the "# Meeting note" title from transcript body — we'll put it first
|
||||
const bodyWithoutTitle = transcriptBody.replace(/^#\s+Meeting note\s*\n*/, '')
|
||||
const newBody = '# Meeting note\n\n' + notes + '\n\n---\n\n## Raw transcript\n\n' + bodyWithoutTitle
|
||||
const newContent = fm ? `${fm}\n${newBody}` : newBody
|
||||
await window.ipc.invoke('workspace:writeFile', {
|
||||
path: notePath,
|
||||
data: newContent,
|
||||
opts: { encoding: 'utf8' },
|
||||
})
|
||||
// Refresh the file view
|
||||
await handleVoiceNoteCreated(notePath)
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
console.error('[meeting] Failed to generate meeting notes:', err)
|
||||
}
|
||||
setMeetingSummarizing(false)
|
||||
meetingNotePathRef.current = null
|
||||
}
|
||||
} else if (meetingTranscription.state === 'idle') {
|
||||
// Show permissions modal on first use (macOS only — Windows works out of the box)
|
||||
if (isMac && !localStorage.getItem('meeting-permissions-acknowledged')) {
|
||||
setShowMeetingPermissions(true)
|
||||
return
|
||||
}
|
||||
const notePath = await meetingTranscription.start()
|
||||
if (notePath) {
|
||||
meetingNotePathRef.current = notePath
|
||||
await handleVoiceNoteCreated(notePath)
|
||||
}
|
||||
}
|
||||
}, [meetingTranscription, handleVoiceNoteCreated])
|
||||
handleToggleMeetingRef.current = handleToggleMeeting
|
||||
|
||||
const ensureWikiFile = useCallback(async (wikiPath: string) => {
|
||||
const resolvedPath = toKnowledgePath(wikiPath)
|
||||
if (!resolvedPath) return null
|
||||
|
|
@ -4176,6 +4290,10 @@ function App() {
|
|||
canNavigateForward={canNavigateForward}
|
||||
onNewChat={handleNewChatTab}
|
||||
onOpenSearch={() => setIsSearchOpen(true)}
|
||||
meetingState={meetingTranscription.state}
|
||||
meetingSummarizing={meetingSummarizing}
|
||||
meetingAvailable={voiceAvailable}
|
||||
onToggleMeeting={() => { void handleToggleMeeting() }}
|
||||
leftInsetPx={isMac ? MACOS_TRAFFIC_LIGHTS_RESERVED_PX : 0}
|
||||
/>
|
||||
</SidebarProvider>
|
||||
|
|
@ -4192,6 +4310,29 @@ function App() {
|
|||
open={showOnboarding}
|
||||
onComplete={handleOnboardingComplete}
|
||||
/>
|
||||
<Dialog open={showMeetingPermissions} onOpenChange={setShowMeetingPermissions}>
|
||||
<DialogContent showCloseButton={false}>
|
||||
<DialogHeader>
|
||||
<DialogTitle>Meeting transcription setup</DialogTitle>
|
||||
<DialogDescription>
|
||||
Rowboat needs <strong>Screen Recording</strong> permission to capture meeting audio from other apps (Zoom, Meet, etc.).
|
||||
</DialogDescription>
|
||||
</DialogHeader>
|
||||
<div className="space-y-3 text-sm text-muted-foreground">
|
||||
<p>To enable this:</p>
|
||||
<ol className="list-decimal list-inside space-y-1.5">
|
||||
<li>Open <strong>System Settings</strong> → <strong>Privacy & Security</strong></li>
|
||||
<li>Click <strong>Screen Recording</strong></li>
|
||||
<li>Toggle on <strong>Rowboat</strong></li>
|
||||
<li>You may need to restart the app after granting permission</li>
|
||||
</ol>
|
||||
</div>
|
||||
<DialogFooter>
|
||||
<Button variant="outline" onClick={() => setShowMeetingPermissions(false)}>Cancel</Button>
|
||||
<Button onClick={() => { void startMeetingAfterPermissions() }}>Continue</Button>
|
||||
</DialogFooter>
|
||||
</DialogContent>
|
||||
</Dialog>
|
||||
</TooltipProvider>
|
||||
)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -952,7 +952,8 @@ export function MarkdownEditor({
|
|||
setRowboatAnchorTop(null)
|
||||
|
||||
// Get editor content for the agent
|
||||
const editorContent = editor.storage.markdown?.getMarkdown?.() ?? ''
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const editorContent = (editor.storage as any).markdown?.getMarkdown?.() ?? ''
|
||||
|
||||
// Helper to find the processing block
|
||||
const findProcessingBlock = (): number | null => {
|
||||
|
|
|
|||
374
apps/x/apps/renderer/src/hooks/useMeetingTranscription.ts
Normal file
374
apps/x/apps/renderer/src/hooks/useMeetingTranscription.ts
Normal file
|
|
@ -0,0 +1,374 @@
|
|||
import { useCallback, useRef, useState } from 'react';
|
||||
|
||||
export type MeetingTranscriptionState = 'idle' | 'connecting' | 'recording' | 'stopping';
|
||||
|
||||
const DEEPGRAM_PARAMS = new URLSearchParams({
|
||||
model: 'nova-3',
|
||||
encoding: 'linear16',
|
||||
sample_rate: '16000',
|
||||
channels: '2',
|
||||
multichannel: 'true',
|
||||
diarize: 'true',
|
||||
interim_results: 'true',
|
||||
smart_format: 'true',
|
||||
punctuate: 'true',
|
||||
language: 'en',
|
||||
});
|
||||
const DEEPGRAM_LISTEN_URL = `wss://api.deepgram.com/v1/listen?${DEEPGRAM_PARAMS.toString()}`;
|
||||
|
||||
// RMS threshold: system audio above this = "active" (speakers playing)
|
||||
const SYSTEM_AUDIO_GATE_THRESHOLD = 0.005;
|
||||
|
||||
// Auto-stop after 2 minutes of silence (no transcript from Deepgram)
|
||||
const SILENCE_AUTO_STOP_MS = 2 * 60 * 1000;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Headphone detection
|
||||
// ---------------------------------------------------------------------------
|
||||
async function detectHeadphones(): Promise<boolean> {
|
||||
try {
|
||||
const devices = await navigator.mediaDevices.enumerateDevices();
|
||||
const outputs = devices.filter(d => d.kind === 'audiooutput');
|
||||
const defaultOutput = outputs.find(d => d.deviceId === 'default');
|
||||
const label = (defaultOutput?.label ?? '').toLowerCase();
|
||||
// Heuristic: built-in speakers won't match these patterns
|
||||
const headphonePatterns = ['headphone', 'airpod', 'earpod', 'earphone', 'earbud', 'bluetooth', 'bt_', 'jabra', 'bose', 'sony wh', 'sony wf'];
|
||||
return headphonePatterns.some(p => label.includes(p));
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Transcript formatting
|
||||
// ---------------------------------------------------------------------------
|
||||
interface TranscriptEntry {
|
||||
speaker: string;
|
||||
text: string;
|
||||
}
|
||||
|
||||
function formatTranscript(entries: TranscriptEntry[], date: string): string {
|
||||
const lines = [
|
||||
'---',
|
||||
'type: meeting',
|
||||
'source: rowboat',
|
||||
'title: Meeting note',
|
||||
`date: "${date}"`,
|
||||
'---',
|
||||
'',
|
||||
'# Meeting note',
|
||||
'',
|
||||
];
|
||||
for (let i = 0; i < entries.length; i++) {
|
||||
if (i > 0 && entries[i].speaker !== entries[i - 1].speaker) {
|
||||
lines.push('');
|
||||
}
|
||||
lines.push(`**${entries[i].speaker}:** ${entries[i].text}`);
|
||||
lines.push('');
|
||||
}
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Hook
|
||||
// ---------------------------------------------------------------------------
|
||||
export function useMeetingTranscription(onAutoStop?: () => void) {
|
||||
const [state, setState] = useState<MeetingTranscriptionState>('idle');
|
||||
const wsRef = useRef<WebSocket | null>(null);
|
||||
const micStreamRef = useRef<MediaStream | null>(null);
|
||||
const systemStreamRef = useRef<MediaStream | null>(null);
|
||||
const processorRef = useRef<ScriptProcessorNode | null>(null);
|
||||
const audioCtxRef = useRef<AudioContext | null>(null);
|
||||
const transcriptRef = useRef<TranscriptEntry[]>([]);
|
||||
const interimRef = useRef<Map<number, { speaker: string; text: string }>>(new Map());
|
||||
const notePathRef = useRef<string>('');
|
||||
const writeTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
|
||||
const silenceTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
|
||||
const onAutoStopRef = useRef(onAutoStop);
|
||||
onAutoStopRef.current = onAutoStop;
|
||||
const dateRef = useRef<string>('');
|
||||
|
||||
const writeTranscriptToFile = useCallback(async () => {
|
||||
if (!notePathRef.current) return;
|
||||
const entries = [...transcriptRef.current];
|
||||
for (const interim of interimRef.current.values()) {
|
||||
if (!interim.text) continue;
|
||||
if (entries.length > 0 && entries[entries.length - 1].speaker === interim.speaker) {
|
||||
entries[entries.length - 1] = { speaker: interim.speaker, text: entries[entries.length - 1].text + ' ' + interim.text };
|
||||
} else {
|
||||
entries.push({ speaker: interim.speaker, text: interim.text });
|
||||
}
|
||||
}
|
||||
if (entries.length === 0) return;
|
||||
const content = formatTranscript(entries, dateRef.current);
|
||||
try {
|
||||
await window.ipc.invoke('workspace:writeFile', {
|
||||
path: notePathRef.current,
|
||||
data: content,
|
||||
opts: { encoding: 'utf8' },
|
||||
});
|
||||
} catch (err) {
|
||||
console.error('[meeting] Failed to write transcript:', err);
|
||||
}
|
||||
}, []);
|
||||
|
||||
const scheduleDebouncedWrite = useCallback(() => {
|
||||
if (writeTimerRef.current) clearTimeout(writeTimerRef.current);
|
||||
writeTimerRef.current = setTimeout(() => {
|
||||
void writeTranscriptToFile();
|
||||
}, 1000);
|
||||
}, [writeTranscriptToFile]);
|
||||
|
||||
const cleanup = useCallback(() => {
|
||||
if (writeTimerRef.current) {
|
||||
clearTimeout(writeTimerRef.current);
|
||||
writeTimerRef.current = null;
|
||||
}
|
||||
if (silenceTimerRef.current) {
|
||||
clearTimeout(silenceTimerRef.current);
|
||||
silenceTimerRef.current = null;
|
||||
}
|
||||
if (processorRef.current) {
|
||||
processorRef.current.disconnect();
|
||||
processorRef.current = null;
|
||||
}
|
||||
if (audioCtxRef.current) {
|
||||
audioCtxRef.current.close();
|
||||
audioCtxRef.current = null;
|
||||
}
|
||||
if (micStreamRef.current) {
|
||||
micStreamRef.current.getTracks().forEach(t => t.stop());
|
||||
micStreamRef.current = null;
|
||||
}
|
||||
if (systemStreamRef.current) {
|
||||
systemStreamRef.current.getTracks().forEach(t => t.stop());
|
||||
systemStreamRef.current = null;
|
||||
}
|
||||
if (wsRef.current) {
|
||||
wsRef.current.onclose = null;
|
||||
wsRef.current.close();
|
||||
wsRef.current = null;
|
||||
}
|
||||
}, []);
|
||||
|
||||
const start = useCallback(async (): Promise<string | null> => {
|
||||
if (state !== 'idle') return null;
|
||||
setState('connecting');
|
||||
|
||||
// Detect headphones vs speakers
|
||||
const usingHeadphones = await detectHeadphones();
|
||||
console.log(`[meeting] Audio output mode: ${usingHeadphones ? 'headphones' : 'speakers'}`);
|
||||
|
||||
// Get Deepgram token
|
||||
let ws: WebSocket;
|
||||
try {
|
||||
const result = await window.ipc.invoke('voice:getDeepgramToken', null);
|
||||
if (result) {
|
||||
console.log('[meeting] Using proxy token');
|
||||
ws = new WebSocket(DEEPGRAM_LISTEN_URL, ['bearer', result.token]);
|
||||
} else {
|
||||
const config = await window.ipc.invoke('voice:getConfig', null);
|
||||
if (!config?.deepgram) {
|
||||
console.error('[meeting] No Deepgram config available');
|
||||
setState('idle');
|
||||
return null;
|
||||
}
|
||||
console.log('[meeting] Using API key');
|
||||
ws = new WebSocket(DEEPGRAM_LISTEN_URL, ['token', config.deepgram.apiKey]);
|
||||
}
|
||||
} catch (err) {
|
||||
console.error('[meeting] Failed to get Deepgram token:', err);
|
||||
setState('idle');
|
||||
return null;
|
||||
}
|
||||
wsRef.current = ws;
|
||||
|
||||
// Wait for WS open
|
||||
const wsOk = await new Promise<boolean>((resolve) => {
|
||||
ws.onopen = () => resolve(true);
|
||||
ws.onerror = () => resolve(false);
|
||||
setTimeout(() => resolve(false), 5000);
|
||||
});
|
||||
if (!wsOk) {
|
||||
console.error('[meeting] WebSocket failed to connect');
|
||||
cleanup();
|
||||
setState('idle');
|
||||
return null;
|
||||
}
|
||||
console.log('[meeting] WebSocket connected');
|
||||
|
||||
// Set up WS message handler
|
||||
transcriptRef.current = [];
|
||||
interimRef.current = new Map();
|
||||
ws.onmessage = (event) => {
|
||||
const data = JSON.parse(event.data);
|
||||
if (!data.channel?.alternatives?.[0]) return;
|
||||
const transcript = data.channel.alternatives[0].transcript;
|
||||
if (!transcript) return;
|
||||
|
||||
// Reset silence auto-stop timer on any transcript
|
||||
if (silenceTimerRef.current) clearTimeout(silenceTimerRef.current);
|
||||
silenceTimerRef.current = setTimeout(() => {
|
||||
console.log('[meeting] 2 minutes of silence — auto-stopping');
|
||||
onAutoStopRef.current?.();
|
||||
}, SILENCE_AUTO_STOP_MS);
|
||||
|
||||
const channelIndex = data.channel_index?.[0] ?? 0;
|
||||
const isMic = channelIndex === 0;
|
||||
|
||||
// Channel 0 = mic = "You", Channel 1 = system audio with diarization
|
||||
let speaker: string;
|
||||
if (isMic) {
|
||||
speaker = 'You';
|
||||
} else {
|
||||
// Use Deepgram diarization speaker ID for system audio channel
|
||||
const words = data.channel.alternatives[0].words;
|
||||
const speakerId = words?.[0]?.speaker;
|
||||
speaker = speakerId != null ? `Speaker ${speakerId}` : 'System audio';
|
||||
}
|
||||
|
||||
if (data.is_final) {
|
||||
interimRef.current.delete(channelIndex);
|
||||
const entries = transcriptRef.current;
|
||||
if (entries.length > 0 && entries[entries.length - 1].speaker === speaker) {
|
||||
entries[entries.length - 1].text += ' ' + transcript;
|
||||
} else {
|
||||
entries.push({ speaker, text: transcript });
|
||||
}
|
||||
} else {
|
||||
interimRef.current.set(channelIndex, { speaker, text: transcript });
|
||||
}
|
||||
scheduleDebouncedWrite();
|
||||
};
|
||||
|
||||
ws.onclose = () => {
|
||||
console.log('[meeting] WebSocket closed');
|
||||
wsRef.current = null;
|
||||
};
|
||||
|
||||
// Get mic stream
|
||||
let micStream: MediaStream;
|
||||
try {
|
||||
micStream = await navigator.mediaDevices.getUserMedia({
|
||||
audio: {
|
||||
echoCancellation: true,
|
||||
noiseSuppression: true,
|
||||
autoGainControl: true,
|
||||
},
|
||||
});
|
||||
} catch (err) {
|
||||
console.error('[meeting] Microphone access denied:', err);
|
||||
cleanup();
|
||||
setState('idle');
|
||||
return null;
|
||||
}
|
||||
micStreamRef.current = micStream;
|
||||
|
||||
// Get system audio via getDisplayMedia (loopback)
|
||||
let systemStream: MediaStream;
|
||||
try {
|
||||
systemStream = await navigator.mediaDevices.getDisplayMedia({ audio: true, video: true });
|
||||
systemStream.getVideoTracks().forEach(t => t.stop());
|
||||
} catch (err) {
|
||||
console.error('[meeting] System audio access denied:', err);
|
||||
cleanup();
|
||||
setState('idle');
|
||||
return null;
|
||||
}
|
||||
if (systemStream.getAudioTracks().length === 0) {
|
||||
console.error('[meeting] No audio track from getDisplayMedia');
|
||||
systemStream.getTracks().forEach(t => t.stop());
|
||||
cleanup();
|
||||
setState('idle');
|
||||
return null;
|
||||
}
|
||||
console.log('[meeting] System audio captured');
|
||||
systemStreamRef.current = systemStream;
|
||||
|
||||
// ----- Audio pipeline -----
|
||||
const audioCtx = new AudioContext({ sampleRate: 16000 });
|
||||
audioCtxRef.current = audioCtx;
|
||||
|
||||
const micSource = audioCtx.createMediaStreamSource(micStream);
|
||||
const systemSource = audioCtx.createMediaStreamSource(systemStream);
|
||||
const merger = audioCtx.createChannelMerger(2);
|
||||
|
||||
micSource.connect(merger, 0, 0); // mic → channel 0
|
||||
systemSource.connect(merger, 0, 1); // system audio → channel 1
|
||||
|
||||
const processor = audioCtx.createScriptProcessor(4096, 2, 2);
|
||||
processorRef.current = processor;
|
||||
|
||||
processor.onaudioprocess = (e) => {
|
||||
if (!wsRef.current || wsRef.current.readyState !== WebSocket.OPEN) return;
|
||||
|
||||
const micRaw = e.inputBuffer.getChannelData(0);
|
||||
const sysRaw = e.inputBuffer.getChannelData(1);
|
||||
|
||||
// Mode 1 (headphones): pass both streams through unmodified
|
||||
// Mode 2 (speakers): gate/mute mic when system audio is active
|
||||
let micOut: Float32Array;
|
||||
if (usingHeadphones) {
|
||||
micOut = micRaw;
|
||||
} else {
|
||||
// Compute system audio RMS to detect activity
|
||||
let sysSum = 0;
|
||||
for (let i = 0; i < sysRaw.length; i++) sysSum += sysRaw[i] * sysRaw[i];
|
||||
const sysRms = Math.sqrt(sysSum / sysRaw.length);
|
||||
|
||||
if (sysRms > SYSTEM_AUDIO_GATE_THRESHOLD) {
|
||||
// System audio is playing — mute mic to prevent bleed
|
||||
micOut = new Float32Array(micRaw.length); // all zeros
|
||||
} else {
|
||||
// System audio is silent — pass mic through
|
||||
micOut = micRaw;
|
||||
}
|
||||
}
|
||||
|
||||
// Interleave mic (ch0) + system audio (ch1) into stereo int16 PCM
|
||||
const int16 = new Int16Array(micOut.length * 2);
|
||||
for (let i = 0; i < micOut.length; i++) {
|
||||
const s0 = Math.max(-1, Math.min(1, micOut[i]));
|
||||
const s1 = Math.max(-1, Math.min(1, sysRaw[i]));
|
||||
int16[i * 2] = s0 < 0 ? s0 * 0x8000 : s0 * 0x7fff;
|
||||
int16[i * 2 + 1] = s1 < 0 ? s1 * 0x8000 : s1 * 0x7fff;
|
||||
}
|
||||
wsRef.current.send(int16.buffer);
|
||||
};
|
||||
|
||||
merger.connect(processor);
|
||||
processor.connect(audioCtx.destination);
|
||||
|
||||
// Create the note file, organized by date like voice memos
|
||||
const now = new Date();
|
||||
const dateStr = now.toISOString();
|
||||
dateRef.current = dateStr;
|
||||
const dateFolder = dateStr.split('T')[0]; // YYYY-MM-DD
|
||||
const timestamp = dateStr.replace(/:/g, '-').replace(/\.\d+Z$/, '');
|
||||
const notePath = `knowledge/Meetings/rowboat/${dateFolder}/meeting-${timestamp}.md`;
|
||||
notePathRef.current = notePath;
|
||||
|
||||
const initialContent = formatTranscript([], dateStr);
|
||||
await window.ipc.invoke('workspace:writeFile', {
|
||||
path: notePath,
|
||||
data: initialContent,
|
||||
opts: { encoding: 'utf8', mkdirp: true },
|
||||
});
|
||||
|
||||
setState('recording');
|
||||
return notePath;
|
||||
}, [state, cleanup, scheduleDebouncedWrite]);
|
||||
|
||||
const stop = useCallback(async () => {
|
||||
if (state !== 'recording') return;
|
||||
setState('stopping');
|
||||
|
||||
cleanup();
|
||||
interimRef.current = new Map();
|
||||
await writeTranscriptToFile();
|
||||
|
||||
setState('idle');
|
||||
}, [state, cleanup, writeTranscriptToFile]);
|
||||
|
||||
return { state, start, stop };
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue