diff --git a/apps/x/apps/main/entitlements.plist b/apps/x/apps/main/entitlements.plist
new file mode 100644
index 00000000..db2dbd7e
--- /dev/null
+++ b/apps/x/apps/main/entitlements.plist
@@ -0,0 +1,10 @@
+
+
+
+
+ com.apple.security.device.audio-input
+
+ com.apple.security.device.screen-capture
+
+
+
diff --git a/apps/x/apps/main/forge.config.cjs b/apps/x/apps/main/forge.config.cjs
index 57f733f2..c79a8c43 100644
--- a/apps/x/apps/main/forge.config.cjs
+++ b/apps/x/apps/main/forge.config.cjs
@@ -13,6 +13,10 @@ module.exports = {
appCategoryType: 'public.app-category.productivity',
osxSign: {
batchCodesignCalls: true,
+ optionsForFile: () => ({
+ entitlements: path.join(__dirname, 'entitlements.plist'),
+ 'entitlements-inherit': path.join(__dirname, 'entitlements.plist'),
+ }),
},
osxNotarize: {
appleId: process.env.APPLE_ID,
diff --git a/apps/x/apps/main/src/main.ts b/apps/x/apps/main/src/main.ts
index 579fdbfa..060f0433 100644
--- a/apps/x/apps/main/src/main.ts
+++ b/apps/x/apps/main/src/main.ts
@@ -1,4 +1,4 @@
-import { app, BrowserWindow, protocol, net, shell, session } from "electron";
+import { app, BrowserWindow, desktopCapturer, protocol, net, shell, session } from "electron";
import path from "node:path";
import {
setupIpcHandlers,
@@ -92,15 +92,27 @@ function createWindow() {
},
});
- // Grant microphone permission for voice mode
+ // Grant microphone and display-capture permissions
session.defaultSession.setPermissionRequestHandler((_webContents, permission, callback) => {
- if (permission === 'media') {
+ if (permission === 'media' || permission === 'display-capture') {
callback(true);
} else {
callback(false);
}
});
+ // Auto-approve display media requests and route system audio as loopback.
+ // Electron requires a video source in the callback even if we only want audio.
+ // We pass the first available screen source; the renderer discards the video track.
+ session.defaultSession.setDisplayMediaRequestHandler(async (_request, callback) => {
+ const sources = await desktopCapturer.getSources({ types: ['screen'] });
+ if (sources.length === 0) {
+ callback({});
+ return;
+ }
+ callback({ video: sources[0], audio: 'loopback' });
+ });
+
// Show window when content is ready to prevent blank screen
win.once("ready-to-show", () => {
win.maximize();
diff --git a/apps/x/apps/renderer/src/App.tsx b/apps/x/apps/renderer/src/App.tsx
index a92f2d28..fdea8bac 100644
--- a/apps/x/apps/renderer/src/App.tsx
+++ b/apps/x/apps/renderer/src/App.tsx
@@ -5,7 +5,7 @@ import { RunEvent, ListRunsResponse } from '@x/shared/src/runs.js';
import type { LanguageModelUsage, ToolUIPart } from 'ai';
import './App.css'
import z from 'zod';
-import { CheckIcon, LoaderIcon, PanelLeftIcon, Maximize2, Minimize2, ChevronLeftIcon, ChevronRightIcon, SquarePen, SearchIcon, HistoryIcon } from 'lucide-react';
+import { CheckIcon, LoaderIcon, PanelLeftIcon, Maximize2, Minimize2, ChevronLeftIcon, ChevronRightIcon, SquarePen, SearchIcon, HistoryIcon, RadioIcon, SquareIcon } from 'lucide-react';
import { cn } from '@/lib/utils';
import { MarkdownEditor } from './components/markdown-editor';
import { ChatSidebar } from './components/chat-sidebar';
@@ -78,6 +78,7 @@ import { AgentScheduleState } from '@x/shared/dist/agent-schedule-state.js'
import { toast } from "sonner"
import { useVoiceMode } from '@/hooks/useVoiceMode'
import { useVoiceTTS } from '@/hooks/useVoiceTTS'
+import { useMeetingTranscription, type MeetingTranscriptionState } from '@/hooks/useMeetingTranscription'
type DirEntry = z.infer
type RunEventType = z.infer
@@ -383,6 +384,8 @@ function FixedSidebarToggle({
canNavigateForward,
onNewChat,
onOpenSearch,
+ meetingState,
+ onToggleMeeting,
leftInsetPx,
}: {
onNavigateBack: () => void
@@ -391,6 +394,8 @@ function FixedSidebarToggle({
canNavigateForward: boolean
onNewChat: () => void
onOpenSearch: () => void
+ meetingState: MeetingTranscriptionState
+ onToggleMeeting: () => void
leftInsetPx: number
}) {
const { toggleSidebar, state } = useSidebar()
@@ -426,6 +431,25 @@ function FixedSidebarToggle({
>
+
{/* Back / Forward navigation */}
{isCollapsed && (
<>
@@ -619,6 +643,11 @@ function App() {
const voiceRef = useRef(voice)
voiceRef.current = voice
+ const handleToggleMeetingRef = useRef<(() => void) | undefined>(undefined)
+ const meetingTranscription = useMeetingTranscription(() => {
+ handleToggleMeetingRef.current?.()
+ })
+
// Check if voice is available on mount and when OAuth state changes
const refreshVoiceAvailability = useCallback(() => {
Promise.all([
@@ -3314,6 +3343,17 @@ function App() {
navigateToFile(notePath)
}, [loadDirectory, navigateToFile, fileTabs])
+ const handleToggleMeeting = useCallback(async () => {
+ if (meetingTranscription.state === 'recording') {
+ await meetingTranscription.stop()
+ } else if (meetingTranscription.state === 'idle') {
+ const notePath = await meetingTranscription.start()
+ if (notePath) {
+ await handleVoiceNoteCreated(notePath)
+ }
+ }
+ }, [meetingTranscription, handleVoiceNoteCreated])
+
const ensureWikiFile = useCallback(async (wikiPath: string) => {
const resolvedPath = toKnowledgePath(wikiPath)
if (!resolvedPath) return null
@@ -4175,6 +4215,8 @@ function App() {
canNavigateForward={canNavigateForward}
onNewChat={handleNewChatTab}
onOpenSearch={() => setIsSearchOpen(true)}
+ meetingState={meetingTranscription.state}
+ onToggleMeeting={() => { void handleToggleMeeting() }}
leftInsetPx={isMac ? MACOS_TRAFFIC_LIGHTS_RESERVED_PX : 0}
/>
diff --git a/apps/x/apps/renderer/src/hooks/useMeetingTranscription.ts b/apps/x/apps/renderer/src/hooks/useMeetingTranscription.ts
new file mode 100644
index 00000000..103cfe74
--- /dev/null
+++ b/apps/x/apps/renderer/src/hooks/useMeetingTranscription.ts
@@ -0,0 +1,269 @@
+import { useCallback, useRef, useState } from 'react';
+
+export type MeetingTranscriptionState = 'idle' | 'connecting' | 'recording' | 'stopping';
+
+const DEEPGRAM_PARAMS = new URLSearchParams({
+ model: 'nova-3',
+ encoding: 'linear16',
+ sample_rate: '16000',
+ channels: '2',
+ multichannel: 'true',
+ interim_results: 'true',
+ smart_format: 'true',
+ punctuate: 'true',
+});
+const DEEPGRAM_LISTEN_URL = `wss://api.deepgram.com/v1/listen?${DEEPGRAM_PARAMS.toString()}`;
+
+interface TranscriptEntry {
+ speaker: string;
+ text: string;
+}
+
+function formatTranscript(entries: TranscriptEntry[], date: string): string {
+ const lines = [
+ '---',
+ 'type: meeting',
+ 'source: rowboat',
+ 'title: Meeting Transcription',
+ `date: "${date}"`,
+ '---',
+ '',
+ '# Meeting Transcription',
+ '',
+ ];
+ for (const entry of entries) {
+ lines.push(`**${entry.speaker}:** ${entry.text}`);
+ lines.push('');
+ }
+ return lines.join('\n');
+}
+
+export function useMeetingTranscription() {
+ const [state, setState] = useState('idle');
+ const wsRef = useRef(null);
+ const micStreamRef = useRef(null);
+ const systemStreamRef = useRef(null);
+ const processorRef = useRef(null);
+ const audioCtxRef = useRef(null);
+ const transcriptRef = useRef([]);
+ const notePathRef = useRef('');
+ const writeTimerRef = useRef | null>(null);
+ const dateRef = useRef('');
+
+ const writeTranscriptToFile = useCallback(async () => {
+ if (!notePathRef.current || transcriptRef.current.length === 0) return;
+ const content = formatTranscript(transcriptRef.current, dateRef.current);
+ try {
+ await window.ipc.invoke('workspace:writeFile', {
+ path: notePathRef.current,
+ data: content,
+ opts: { encoding: 'utf8' },
+ });
+ } catch (err) {
+ console.error('[meeting] Failed to write transcript:', err);
+ }
+ }, []);
+
+ const scheduleDebouncedWrite = useCallback(() => {
+ if (writeTimerRef.current) clearTimeout(writeTimerRef.current);
+ writeTimerRef.current = setTimeout(() => {
+ void writeTranscriptToFile();
+ }, 5000);
+ }, [writeTranscriptToFile]);
+
+ const cleanup = useCallback(() => {
+ if (writeTimerRef.current) {
+ clearTimeout(writeTimerRef.current);
+ writeTimerRef.current = null;
+ }
+ if (processorRef.current) {
+ processorRef.current.disconnect();
+ processorRef.current = null;
+ }
+ if (audioCtxRef.current) {
+ audioCtxRef.current.close();
+ audioCtxRef.current = null;
+ }
+ if (micStreamRef.current) {
+ micStreamRef.current.getTracks().forEach(t => t.stop());
+ micStreamRef.current = null;
+ }
+ if (systemStreamRef.current) {
+ systemStreamRef.current.getTracks().forEach(t => t.stop());
+ systemStreamRef.current = null;
+ }
+ if (wsRef.current) {
+ wsRef.current.onclose = null;
+ wsRef.current.close();
+ wsRef.current = null;
+ }
+ }, []);
+
+ const start = useCallback(async (): Promise => {
+ if (state !== 'idle') return null;
+ setState('connecting');
+
+ // Get Deepgram token
+ let ws: WebSocket;
+ try {
+ const result = await window.ipc.invoke('voice:getDeepgramToken', null);
+ if (result) {
+ console.log('[meeting] Using proxy token');
+ ws = new WebSocket(DEEPGRAM_LISTEN_URL, ['bearer', result.token]);
+ } else {
+ const config = await window.ipc.invoke('voice:getConfig', null);
+ if (!config?.deepgram) {
+ console.error('[meeting] No Deepgram config available');
+ setState('idle');
+ return null;
+ }
+ console.log('[meeting] Using API key');
+ ws = new WebSocket(DEEPGRAM_LISTEN_URL, ['token', config.deepgram.apiKey]);
+ }
+ } catch (err) {
+ console.error('[meeting] Failed to get Deepgram token:', err);
+ setState('idle');
+ return null;
+ }
+ wsRef.current = ws;
+
+ // Wait for WS open
+ const wsOk = await new Promise((resolve) => {
+ ws.onopen = () => resolve(true);
+ ws.onerror = () => resolve(false);
+ setTimeout(() => resolve(false), 5000);
+ });
+ if (!wsOk) {
+ console.error('[meeting] WebSocket failed to connect');
+ cleanup();
+ setState('idle');
+ return null;
+ }
+ console.log('[meeting] WebSocket connected');
+
+ // Set up WS message handler
+ transcriptRef.current = [];
+ ws.onmessage = (event) => {
+ const data = JSON.parse(event.data);
+ if (!data.channel?.alternatives?.[0]) return;
+ const transcript = data.channel.alternatives[0].transcript;
+ if (!transcript || !data.is_final) return;
+
+ const channelIndex = data.channel_index?.[0] ?? 0;
+ const speaker = channelIndex === 0 ? 'You' : 'Speaker';
+
+ // Merge with last entry if same speaker
+ const entries = transcriptRef.current;
+ if (entries.length > 0 && entries[entries.length - 1].speaker === speaker) {
+ entries[entries.length - 1].text += ' ' + transcript;
+ } else {
+ entries.push({ speaker, text: transcript });
+ }
+ scheduleDebouncedWrite();
+ };
+
+ ws.onclose = () => {
+ console.log('[meeting] WebSocket closed');
+ wsRef.current = null;
+ };
+
+ // Get mic stream
+ let micStream: MediaStream;
+ try {
+ micStream = await navigator.mediaDevices.getUserMedia({ audio: true });
+ } catch (err) {
+ console.error('[meeting] Microphone access denied:', err);
+ cleanup();
+ setState('idle');
+ return null;
+ }
+ micStreamRef.current = micStream;
+
+ // Get system audio via getDisplayMedia
+ // The main process setDisplayMediaRequestHandler auto-approves with loopback audio
+ let systemStream: MediaStream;
+ try {
+ systemStream = await navigator.mediaDevices.getDisplayMedia({ audio: true, video: true });
+ // Stop any video tracks — we only need audio
+ systemStream.getVideoTracks().forEach(t => t.stop());
+ } catch (err) {
+ console.error('[meeting] System audio access denied:', err);
+ cleanup();
+ setState('idle');
+ return null;
+ }
+ if (systemStream.getAudioTracks().length === 0) {
+ console.error('[meeting] No audio track from getDisplayMedia');
+ systemStream.getTracks().forEach(t => t.stop());
+ cleanup();
+ setState('idle');
+ return null;
+ }
+ console.log('[meeting] System audio captured');
+ systemStreamRef.current = systemStream;
+
+ // Set up AudioContext with channel merger
+ const audioCtx = new AudioContext({ sampleRate: 16000 });
+ audioCtxRef.current = audioCtx;
+
+ const micSource = audioCtx.createMediaStreamSource(micStream);
+ const systemSource = audioCtx.createMediaStreamSource(systemStream);
+ const merger = audioCtx.createChannelMerger(2);
+
+ micSource.connect(merger, 0, 0); // mic → channel 0
+ systemSource.connect(merger, 0, 1); // system audio → channel 1
+
+ const processor = audioCtx.createScriptProcessor(4096, 2, 2);
+ processorRef.current = processor;
+
+ processor.onaudioprocess = (e) => {
+ if (!wsRef.current || wsRef.current.readyState !== WebSocket.OPEN) return;
+ const ch0 = e.inputBuffer.getChannelData(0);
+ const ch1 = e.inputBuffer.getChannelData(1);
+ // Interleave 2 channels into stereo int16 PCM
+ const int16 = new Int16Array(ch0.length * 2);
+ for (let i = 0; i < ch0.length; i++) {
+ const s0 = Math.max(-1, Math.min(1, ch0[i]));
+ const s1 = Math.max(-1, Math.min(1, ch1[i]));
+ int16[i * 2] = s0 < 0 ? s0 * 0x8000 : s0 * 0x7fff;
+ int16[i * 2 + 1] = s1 < 0 ? s1 * 0x8000 : s1 * 0x7fff;
+ }
+ wsRef.current.send(int16.buffer);
+ };
+
+ merger.connect(processor);
+ processor.connect(audioCtx.destination);
+
+ // Create the note file
+ const now = new Date();
+ const dateStr = now.toISOString();
+ dateRef.current = dateStr;
+ const timestamp = dateStr.replace(/:/g, '-').replace(/\.\d+Z$/, '');
+ const notePath = `knowledge/Meetings/rowboat/meeting-${timestamp}.md`;
+ notePathRef.current = notePath;
+
+ const initialContent = formatTranscript([], dateStr);
+ await window.ipc.invoke('workspace:writeFile', {
+ path: notePath,
+ data: initialContent,
+ opts: { encoding: 'utf8', mkdirp: true },
+ });
+
+ setState('recording');
+ return notePath;
+ }, [state, cleanup, scheduleDebouncedWrite]);
+
+ const stop = useCallback(async () => {
+ if (state !== 'recording') return;
+ setState('stopping');
+
+ cleanup();
+
+ // Write final transcript
+ await writeTranscriptToFile();
+
+ setState('idle');
+ }, [state, cleanup, writeTranscriptToFile]);
+
+ return { state, start, stop };
+}