From dff3440f72a7cbfe2c788ca96bc3067c99b6c5ef Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 20 Mar 2026 19:44:48 +0200 Subject: [PATCH 001/102] refactor(desktop): extract error handling into modules/errors.ts --- surfsense_desktop/src/main.ts | 33 +++---------------------- surfsense_desktop/src/modules/errors.ts | 33 +++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 30 deletions(-) create mode 100644 surfsense_desktop/src/modules/errors.ts diff --git a/surfsense_desktop/src/main.ts b/surfsense_desktop/src/main.ts index e0a6c3be5..a7a12c485 100644 --- a/surfsense_desktop/src/main.ts +++ b/surfsense_desktop/src/main.ts @@ -1,37 +1,10 @@ -import { app, BrowserWindow, shell, ipcMain, session, dialog, clipboard, Menu } from 'electron'; +import { app, BrowserWindow, shell, ipcMain, session, dialog, Menu } from 'electron'; import path from 'path'; import { getPort } from 'get-port-please'; import { autoUpdater } from 'electron-updater'; +import { registerGlobalErrorHandlers, showErrorDialog } from './modules/errors'; -function showErrorDialog(title: string, error: unknown): void { - const err = error instanceof Error ? error : new Error(String(error)); - console.error(`${title}:`, err); - - if (app.isReady()) { - const detail = err.stack || err.message; - const buttonIndex = dialog.showMessageBoxSync({ - type: 'error', - buttons: ['OK', process.platform === 'darwin' ? 'Copy Error' : 'Copy error'], - defaultId: 0, - noLink: true, - message: title, - detail, - }); - if (buttonIndex === 1) { - clipboard.writeText(`${title}\n${detail}`); - } - } else { - dialog.showErrorBox(title, err.stack || err.message); - } -} - -process.on('uncaughtException', (error) => { - showErrorDialog('Unhandled Error', error); -}); - -process.on('unhandledRejection', (reason) => { - showErrorDialog('Unhandled Promise Rejection', reason); -}); +registerGlobalErrorHandlers(); const isDev = !app.isPackaged; let mainWindow: BrowserWindow | null = null; diff --git a/surfsense_desktop/src/modules/errors.ts b/surfsense_desktop/src/modules/errors.ts new file mode 100644 index 000000000..ab9f7088c --- /dev/null +++ b/surfsense_desktop/src/modules/errors.ts @@ -0,0 +1,33 @@ +import { app, clipboard, dialog } from 'electron'; + +export function showErrorDialog(title: string, error: unknown): void { + const err = error instanceof Error ? error : new Error(String(error)); + console.error(`${title}:`, err); + + if (app.isReady()) { + const detail = err.stack || err.message; + const buttonIndex = dialog.showMessageBoxSync({ + type: 'error', + buttons: ['OK', process.platform === 'darwin' ? 'Copy Error' : 'Copy error'], + defaultId: 0, + noLink: true, + message: title, + detail, + }); + if (buttonIndex === 1) { + clipboard.writeText(`${title}\n${detail}`); + } + } else { + dialog.showErrorBox(title, err.stack || err.message); + } +} + +export function registerGlobalErrorHandlers(): void { + process.on('uncaughtException', (error) => { + showErrorDialog('Unhandled Error', error); + }); + + process.on('unhandledRejection', (reason) => { + showErrorDialog('Unhandled Promise Rejection', reason); + }); +} From f08199ececd8df86c8695bc06dd6c6bed15b8302 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 20 Mar 2026 19:48:35 +0200 Subject: [PATCH 002/102] refactor(desktop): extract server startup into modules/server.ts --- surfsense_desktop/src/main.ts | 57 ++----------------------- surfsense_desktop/src/modules/server.ts | 53 +++++++++++++++++++++++ 2 files changed, 57 insertions(+), 53 deletions(-) create mode 100644 surfsense_desktop/src/modules/server.ts diff --git a/surfsense_desktop/src/main.ts b/surfsense_desktop/src/main.ts index a7a12c485..db0f8f937 100644 --- a/surfsense_desktop/src/main.ts +++ b/surfsense_desktop/src/main.ts @@ -1,67 +1,18 @@ import { app, BrowserWindow, shell, ipcMain, session, dialog, Menu } from 'electron'; import path from 'path'; -import { getPort } from 'get-port-please'; import { autoUpdater } from 'electron-updater'; import { registerGlobalErrorHandlers, showErrorDialog } from './modules/errors'; +import { startNextServer, getServerPort } from './modules/server'; registerGlobalErrorHandlers(); const isDev = !app.isPackaged; let mainWindow: BrowserWindow | null = null; let deepLinkUrl: string | null = null; -let serverPort: number = 3000; // overwritten at startup with a free port const PROTOCOL = 'surfsense'; -// Injected at compile time from .env via esbuild define const HOSTED_FRONTEND_URL = process.env.HOSTED_FRONTEND_URL as string; -function getStandalonePath(): string { - if (isDev) { - return path.join(__dirname, '..', '..', 'surfsense_web', '.next', 'standalone', 'surfsense_web'); - } - return path.join(process.resourcesPath, 'standalone'); -} - -async function waitForServer(url: string, maxRetries = 60): Promise { - for (let i = 0; i < maxRetries; i++) { - try { - const res = await fetch(url); - if (res.ok || res.status === 404 || res.status === 500) return true; - } catch { - // not ready yet - } - await new Promise((r) => setTimeout(r, 500)); - } - return false; -} - -async function startNextServer(): Promise { - if (isDev) return; - - serverPort = await getPort({ port: 3000, portRange: [30_011, 50_000] }); - console.log(`Selected port ${serverPort}`); - - const standalonePath = getStandalonePath(); - const serverScript = path.join(standalonePath, 'server.js'); - - // The standalone server.js reads PORT / HOSTNAME from process.env and - // uses process.chdir(__dirname). Running it via require() in the same - // process is the proven approach (avoids spawning a second Electron - // instance whose ASAR-patched fs breaks Next.js static file serving). - process.env.PORT = String(serverPort); - process.env.HOSTNAME = 'localhost'; - process.env.NODE_ENV = 'production'; - process.chdir(standalonePath); - - require(serverScript); - - const ready = await waitForServer(`http://localhost:${serverPort}`); - if (!ready) { - throw new Error('Next.js server failed to start within 30 s'); - } - console.log(`Next.js server ready on port ${serverPort}`); -} - function createWindow() { mainWindow = new BrowserWindow({ width: 1280, @@ -83,7 +34,7 @@ function createWindow() { mainWindow?.show(); }); - mainWindow.loadURL(`http://localhost:${serverPort}/login`); + mainWindow.loadURL(`http://localhost:${getServerPort()}/login`); // External links open in system browser, not in the Electron window mainWindow.webContents.setWindowOpenHandler(({ url }) => { @@ -98,7 +49,7 @@ function createWindow() { // and rewrite them to localhost so the user stays in the desktop app. const filter = { urls: [`${HOSTED_FRONTEND_URL}/*`] }; session.defaultSession.webRequest.onBeforeRequest(filter, (details, callback) => { - const rewritten = details.url.replace(HOSTED_FRONTEND_URL, `http://localhost:${serverPort}`); + const rewritten = details.url.replace(HOSTED_FRONTEND_URL, `http://localhost:${getServerPort()}`); callback({ redirectURL: rewritten }); }); @@ -145,7 +96,7 @@ function handleDeepLink(url: string) { const parsed = new URL(url); if (parsed.hostname === 'auth' && parsed.pathname === '/callback') { const params = parsed.searchParams.toString(); - mainWindow.loadURL(`http://localhost:${serverPort}/auth/callback?${params}`); + mainWindow.loadURL(`http://localhost:${getServerPort()}/auth/callback?${params}`); } mainWindow.show(); diff --git a/surfsense_desktop/src/modules/server.ts b/surfsense_desktop/src/modules/server.ts new file mode 100644 index 000000000..969478e4a --- /dev/null +++ b/surfsense_desktop/src/modules/server.ts @@ -0,0 +1,53 @@ +import path from 'path'; +import { app } from 'electron'; +import { getPort } from 'get-port-please'; + +const isDev = !app.isPackaged; +let serverPort = 3000; + +export function getServerPort(): number { + return serverPort; +} + +function getStandalonePath(): string { + if (isDev) { + return path.join(__dirname, '..', '..', 'surfsense_web', '.next', 'standalone', 'surfsense_web'); + } + return path.join(process.resourcesPath, 'standalone'); +} + +async function waitForServer(url: string, maxRetries = 60): Promise { + for (let i = 0; i < maxRetries; i++) { + try { + const res = await fetch(url); + if (res.ok || res.status === 404 || res.status === 500) return true; + } catch { + // not ready yet + } + await new Promise((r) => setTimeout(r, 500)); + } + return false; +} + +export async function startNextServer(): Promise { + if (isDev) return; + + serverPort = await getPort({ port: 3000, portRange: [30_011, 50_000] }); + console.log(`Selected port ${serverPort}`); + + const standalonePath = getStandalonePath(); + const serverScript = path.join(standalonePath, 'server.js'); + + process.env.PORT = String(serverPort); + process.env.HOSTNAME = 'localhost'; + process.env.NODE_ENV = 'production'; + process.chdir(standalonePath); + + require(serverScript); + + const ready = await waitForServer(`http://localhost:${serverPort}`); + if (!ready) { + throw new Error('Next.js server failed to start within 30 s'); + } + console.log(`Next.js server ready on port ${serverPort}`); +} From 95c4a674be26a76fce84b95edbef07be9cc61ae5 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 20 Mar 2026 19:50:50 +0200 Subject: [PATCH 003/102] refactor(desktop): extract window creation into modules/window.ts --- surfsense_desktop/src/main.ts | 81 ++++--------------------- surfsense_desktop/src/modules/window.ts | 67 ++++++++++++++++++++ 2 files changed, 80 insertions(+), 68 deletions(-) create mode 100644 surfsense_desktop/src/modules/window.ts diff --git a/surfsense_desktop/src/main.ts b/surfsense_desktop/src/main.ts index db0f8f937..efbee44bb 100644 --- a/surfsense_desktop/src/main.ts +++ b/surfsense_desktop/src/main.ts @@ -1,72 +1,16 @@ -import { app, BrowserWindow, shell, ipcMain, session, dialog, Menu } from 'electron'; +import { app, BrowserWindow, shell, ipcMain, dialog, Menu } from 'electron'; import path from 'path'; import { autoUpdater } from 'electron-updater'; import { registerGlobalErrorHandlers, showErrorDialog } from './modules/errors'; import { startNextServer, getServerPort } from './modules/server'; +import { createMainWindow, getMainWindow } from './modules/window'; registerGlobalErrorHandlers(); const isDev = !app.isPackaged; -let mainWindow: BrowserWindow | null = null; let deepLinkUrl: string | null = null; const PROTOCOL = 'surfsense'; -const HOSTED_FRONTEND_URL = process.env.HOSTED_FRONTEND_URL as string; - -function createWindow() { - mainWindow = new BrowserWindow({ - width: 1280, - height: 800, - minWidth: 800, - minHeight: 600, - webPreferences: { - preload: path.join(__dirname, 'preload.js'), - contextIsolation: true, - nodeIntegration: false, - sandbox: true, - webviewTag: false, - }, - show: false, - titleBarStyle: 'hiddenInset', - }); - - mainWindow.once('ready-to-show', () => { - mainWindow?.show(); - }); - - mainWindow.loadURL(`http://localhost:${getServerPort()}/login`); - - // External links open in system browser, not in the Electron window - mainWindow.webContents.setWindowOpenHandler(({ url }) => { - if (url.startsWith('http://localhost')) { - return { action: 'allow' }; - } - shell.openExternal(url); - return { action: 'deny' }; - }); - - // Intercept backend OAuth redirects targeting the hosted web frontend - // and rewrite them to localhost so the user stays in the desktop app. - const filter = { urls: [`${HOSTED_FRONTEND_URL}/*`] }; - session.defaultSession.webRequest.onBeforeRequest(filter, (details, callback) => { - const rewritten = details.url.replace(HOSTED_FRONTEND_URL, `http://localhost:${getServerPort()}`); - callback({ redirectURL: rewritten }); - }); - - mainWindow.webContents.on('did-fail-load', (_event, errorCode, errorDescription, validatedURL) => { - console.error(`Failed to load ${validatedURL}: ${errorDescription} (${errorCode})`); - if (errorCode === -3) return; // ERR_ABORTED — normal during redirects - showErrorDialog('Page failed to load', new Error(`${errorDescription} (${errorCode})\n${validatedURL}`)); - }); - - if (isDev) { - mainWindow.webContents.openDevTools(); - } - - mainWindow.on('closed', () => { - mainWindow = null; - }); -} // IPC handlers ipcMain.on('open-external', (_event, url: string) => { @@ -90,17 +34,17 @@ function handleDeepLink(url: string) { deepLinkUrl = url; - if (!mainWindow) return; + const win = getMainWindow(); + if (!win) return; - // Rewrite surfsense:// deep link to localhost so TokenHandler.tsx processes it const parsed = new URL(url); if (parsed.hostname === 'auth' && parsed.pathname === '/callback') { const params = parsed.searchParams.toString(); - mainWindow.loadURL(`http://localhost:${getServerPort()}/auth/callback?${params}`); + win.loadURL(`http://localhost:${getServerPort()}/auth/callback?${params}`); } - mainWindow.show(); - mainWindow.focus(); + win.show(); + win.focus(); } // Single instance lock — second instance passes deep link to first @@ -113,9 +57,10 @@ if (!gotTheLock) { const url = argv.find((arg) => arg.startsWith(`${PROTOCOL}://`)); if (url) handleDeepLink(url); - if (mainWindow) { - if (mainWindow.isMinimized()) mainWindow.restore(); - mainWindow.focus(); + const win = getMainWindow(); + if (win) { + if (win.isMinimized()) win.restore(); + win.focus(); } }); } @@ -188,7 +133,7 @@ app.whenReady().then(async () => { setTimeout(() => app.quit(), 0); return; } - createWindow(); + createMainWindow(); setupAutoUpdater(); // If a deep link was received before the window was ready, handle it now @@ -199,7 +144,7 @@ app.whenReady().then(async () => { app.on('activate', () => { if (BrowserWindow.getAllWindows().length === 0) { - createWindow(); + createMainWindow(); } }); }); diff --git a/surfsense_desktop/src/modules/window.ts b/surfsense_desktop/src/modules/window.ts new file mode 100644 index 000000000..1b3f3baed --- /dev/null +++ b/surfsense_desktop/src/modules/window.ts @@ -0,0 +1,67 @@ +import { app, BrowserWindow, shell, session } from 'electron'; +import path from 'path'; +import { showErrorDialog } from './errors'; +import { getServerPort } from './server'; + +const isDev = !app.isPackaged; +const HOSTED_FRONTEND_URL = process.env.HOSTED_FRONTEND_URL as string; + +let mainWindow: BrowserWindow | null = null; + +export function getMainWindow(): BrowserWindow | null { + return mainWindow; +} + +export function createMainWindow(): BrowserWindow { + mainWindow = new BrowserWindow({ + width: 1280, + height: 800, + minWidth: 800, + minHeight: 600, + webPreferences: { + preload: path.join(__dirname, 'preload.js'), + contextIsolation: true, + nodeIntegration: false, + sandbox: true, + webviewTag: false, + }, + show: false, + titleBarStyle: 'hiddenInset', + }); + + mainWindow.once('ready-to-show', () => { + mainWindow?.show(); + }); + + mainWindow.loadURL(`http://localhost:${getServerPort()}/login`); + + mainWindow.webContents.setWindowOpenHandler(({ url }) => { + if (url.startsWith('http://localhost')) { + return { action: 'allow' }; + } + shell.openExternal(url); + return { action: 'deny' }; + }); + + const filter = { urls: [`${HOSTED_FRONTEND_URL}/*`] }; + session.defaultSession.webRequest.onBeforeRequest(filter, (details, callback) => { + const rewritten = details.url.replace(HOSTED_FRONTEND_URL, `http://localhost:${getServerPort()}`); + callback({ redirectURL: rewritten }); + }); + + mainWindow.webContents.on('did-fail-load', (_event, errorCode, errorDescription, validatedURL) => { + console.error(`Failed to load ${validatedURL}: ${errorDescription} (${errorCode})`); + if (errorCode === -3) return; + showErrorDialog('Page failed to load', new Error(`${errorDescription} (${errorCode})\n${validatedURL}`)); + }); + + if (isDev) { + mainWindow.webContents.openDevTools(); + } + + mainWindow.on('closed', () => { + mainWindow = null; + }); + + return mainWindow; +} From 35da1cf1b4e8f5476388fbce110fcc0689299b61 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 20 Mar 2026 19:55:44 +0200 Subject: [PATCH 004/102] refactor(desktop): extract deep link handling into modules/deep-links.ts --- surfsense_desktop/src/main.ts | 71 +++------------------ surfsense_desktop/src/modules/deep-links.ts | 66 +++++++++++++++++++ 2 files changed, 74 insertions(+), 63 deletions(-) create mode 100644 surfsense_desktop/src/modules/deep-links.ts diff --git a/surfsense_desktop/src/main.ts b/surfsense_desktop/src/main.ts index efbee44bb..b61e82008 100644 --- a/surfsense_desktop/src/main.ts +++ b/surfsense_desktop/src/main.ts @@ -1,16 +1,17 @@ import { app, BrowserWindow, shell, ipcMain, dialog, Menu } from 'electron'; -import path from 'path'; import { autoUpdater } from 'electron-updater'; import { registerGlobalErrorHandlers, showErrorDialog } from './modules/errors'; -import { startNextServer, getServerPort } from './modules/server'; -import { createMainWindow, getMainWindow } from './modules/window'; +import { startNextServer } from './modules/server'; +import { createMainWindow } from './modules/window'; +import { setupDeepLinks, handlePendingDeepLink } from './modules/deep-links'; registerGlobalErrorHandlers(); -const isDev = !app.isPackaged; -let deepLinkUrl: string | null = null; +if (!setupDeepLinks()) { + app.quit(); +} -const PROTOCOL = 'surfsense'; +const isDev = !app.isPackaged; // IPC handlers ipcMain.on('open-external', (_event, url: string) => { @@ -28,58 +29,6 @@ ipcMain.handle('get-app-version', () => { return app.getVersion(); }); -// Deep link handling -function handleDeepLink(url: string) { - if (!url.startsWith(`${PROTOCOL}://`)) return; - - deepLinkUrl = url; - - const win = getMainWindow(); - if (!win) return; - - const parsed = new URL(url); - if (parsed.hostname === 'auth' && parsed.pathname === '/callback') { - const params = parsed.searchParams.toString(); - win.loadURL(`http://localhost:${getServerPort()}/auth/callback?${params}`); - } - - win.show(); - win.focus(); -} - -// Single instance lock — second instance passes deep link to first -const gotTheLock = app.requestSingleInstanceLock(); -if (!gotTheLock) { - app.quit(); -} else { - app.on('second-instance', (_event, argv) => { - // Windows/Linux: deep link URL is in argv - const url = argv.find((arg) => arg.startsWith(`${PROTOCOL}://`)); - if (url) handleDeepLink(url); - - const win = getMainWindow(); - if (win) { - if (win.isMinimized()) win.restore(); - win.focus(); - } - }); -} - -// macOS: deep link arrives via open-url event -app.on('open-url', (event, url) => { - event.preventDefault(); - handleDeepLink(url); -}); - -// Register surfsense:// protocol -if (process.defaultApp) { - if (process.argv.length >= 2) { - app.setAsDefaultProtocolClient(PROTOCOL, process.execPath, [path.resolve(process.argv[1])]); - } -} else { - app.setAsDefaultProtocolClient(PROTOCOL); -} - function setupAutoUpdater() { if (isDev) return; @@ -136,11 +85,7 @@ app.whenReady().then(async () => { createMainWindow(); setupAutoUpdater(); - // If a deep link was received before the window was ready, handle it now - if (deepLinkUrl) { - handleDeepLink(deepLinkUrl); - deepLinkUrl = null; - } + handlePendingDeepLink(); app.on('activate', () => { if (BrowserWindow.getAllWindows().length === 0) { diff --git a/surfsense_desktop/src/modules/deep-links.ts b/surfsense_desktop/src/modules/deep-links.ts new file mode 100644 index 000000000..1a2b08395 --- /dev/null +++ b/surfsense_desktop/src/modules/deep-links.ts @@ -0,0 +1,66 @@ +import { app } from 'electron'; +import path from 'path'; +import { getMainWindow } from './window'; +import { getServerPort } from './server'; + +const PROTOCOL = 'surfsense'; + +let deepLinkUrl: string | null = null; + +function handleDeepLink(url: string) { + if (!url.startsWith(`${PROTOCOL}://`)) return; + + deepLinkUrl = url; + + const win = getMainWindow(); + if (!win) return; + + const parsed = new URL(url); + if (parsed.hostname === 'auth' && parsed.pathname === '/callback') { + const params = parsed.searchParams.toString(); + win.loadURL(`http://localhost:${getServerPort()}/auth/callback?${params}`); + } + + win.show(); + win.focus(); +} + +export function setupDeepLinks(): boolean { + const gotTheLock = app.requestSingleInstanceLock(); + if (!gotTheLock) { + return false; + } + + app.on('second-instance', (_event, argv) => { + const url = argv.find((arg) => arg.startsWith(`${PROTOCOL}://`)); + if (url) handleDeepLink(url); + + const win = getMainWindow(); + if (win) { + if (win.isMinimized()) win.restore(); + win.focus(); + } + }); + + app.on('open-url', (event, url) => { + event.preventDefault(); + handleDeepLink(url); + }); + + if (process.defaultApp) { + if (process.argv.length >= 2) { + app.setAsDefaultProtocolClient(PROTOCOL, process.execPath, [path.resolve(process.argv[1])]); + } + } else { + app.setAsDefaultProtocolClient(PROTOCOL); + } + + return true; +} + +export function handlePendingDeepLink(): void { + if (deepLinkUrl) { + handleDeepLink(deepLinkUrl); + deepLinkUrl = null; + } +} From d868464de71ba9ee34e14297b472fd6e2e3c7e67 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 20 Mar 2026 19:59:20 +0200 Subject: [PATCH 005/102] refactor(desktop): extract auto-updater into modules/auto-updater.ts --- surfsense_desktop/src/main.ts | 37 +------------------ surfsense_desktop/src/modules/auto-updater.ts | 33 +++++++++++++++++ 2 files changed, 35 insertions(+), 35 deletions(-) create mode 100644 surfsense_desktop/src/modules/auto-updater.ts diff --git a/surfsense_desktop/src/main.ts b/surfsense_desktop/src/main.ts index b61e82008..11b2f7ab6 100644 --- a/surfsense_desktop/src/main.ts +++ b/surfsense_desktop/src/main.ts @@ -1,9 +1,9 @@ -import { app, BrowserWindow, shell, ipcMain, dialog, Menu } from 'electron'; -import { autoUpdater } from 'electron-updater'; +import { app, BrowserWindow, shell, ipcMain, Menu } from 'electron'; import { registerGlobalErrorHandlers, showErrorDialog } from './modules/errors'; import { startNextServer } from './modules/server'; import { createMainWindow } from './modules/window'; import { setupDeepLinks, handlePendingDeepLink } from './modules/deep-links'; +import { setupAutoUpdater } from './modules/auto-updater'; registerGlobalErrorHandlers(); @@ -11,8 +11,6 @@ if (!setupDeepLinks()) { app.quit(); } -const isDev = !app.isPackaged; - // IPC handlers ipcMain.on('open-external', (_event, url: string) => { try { @@ -29,37 +27,6 @@ ipcMain.handle('get-app-version', () => { return app.getVersion(); }); -function setupAutoUpdater() { - if (isDev) return; - - autoUpdater.autoDownload = true; - - autoUpdater.on('update-available', (info) => { - console.log(`Update available: ${info.version}`); - }); - - autoUpdater.on('update-downloaded', (info) => { - console.log(`Update downloaded: ${info.version}`); - dialog.showMessageBox({ - type: 'info', - buttons: ['Restart', 'Later'], - defaultId: 0, - title: 'Update Ready', - message: `Version ${info.version} has been downloaded. Restart to apply the update.`, - }).then(({ response }) => { - if (response === 0) { - autoUpdater.quitAndInstall(); - } - }); - }); - - autoUpdater.on('error', (err) => { - console.error('Auto-updater error:', err); - }); - - autoUpdater.checkForUpdates(); -} - function setupMenu() { const isMac = process.platform === 'darwin'; const template: Electron.MenuItemConstructorOptions[] = [ diff --git a/surfsense_desktop/src/modules/auto-updater.ts b/surfsense_desktop/src/modules/auto-updater.ts new file mode 100644 index 000000000..f895516c0 --- /dev/null +++ b/surfsense_desktop/src/modules/auto-updater.ts @@ -0,0 +1,33 @@ +import { app, dialog } from 'electron'; +import { autoUpdater } from 'electron-updater'; + +export function setupAutoUpdater(): void { + if (!app.isPackaged) return; + + autoUpdater.autoDownload = true; + + autoUpdater.on('update-available', (info) => { + console.log(`Update available: ${info.version}`); + }); + + autoUpdater.on('update-downloaded', (info) => { + console.log(`Update downloaded: ${info.version}`); + dialog.showMessageBox({ + type: 'info', + buttons: ['Restart', 'Later'], + defaultId: 0, + title: 'Update Ready', + message: `Version ${info.version} has been downloaded. Restart to apply the update.`, + }).then(({ response }) => { + if (response === 0) { + autoUpdater.quitAndInstall(); + } + }); + }); + + autoUpdater.on('error', (err) => { + console.error('Auto-updater error:', err); + }); + + autoUpdater.checkForUpdates(); +} From b6a7f0afa7bced3f83fdd90a4424b4c9c8038a04 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 20 Mar 2026 20:01:13 +0200 Subject: [PATCH 006/102] refactor(desktop): extract menu setup into modules/menu.ts --- surfsense_desktop/src/main.ts | 15 ++------------- surfsense_desktop/src/modules/menu.ts | 13 +++++++++++++ 2 files changed, 15 insertions(+), 13 deletions(-) create mode 100644 surfsense_desktop/src/modules/menu.ts diff --git a/surfsense_desktop/src/main.ts b/surfsense_desktop/src/main.ts index 11b2f7ab6..6d55d478b 100644 --- a/surfsense_desktop/src/main.ts +++ b/surfsense_desktop/src/main.ts @@ -1,9 +1,10 @@ -import { app, BrowserWindow, shell, ipcMain, Menu } from 'electron'; +import { app, BrowserWindow, shell, ipcMain } from 'electron'; import { registerGlobalErrorHandlers, showErrorDialog } from './modules/errors'; import { startNextServer } from './modules/server'; import { createMainWindow } from './modules/window'; import { setupDeepLinks, handlePendingDeepLink } from './modules/deep-links'; import { setupAutoUpdater } from './modules/auto-updater'; +import { setupMenu } from './modules/menu'; registerGlobalErrorHandlers(); @@ -27,18 +28,6 @@ ipcMain.handle('get-app-version', () => { return app.getVersion(); }); -function setupMenu() { - const isMac = process.platform === 'darwin'; - const template: Electron.MenuItemConstructorOptions[] = [ - ...(isMac ? [{ role: 'appMenu' as const }] : []), - { role: 'fileMenu' as const }, - { role: 'editMenu' as const }, - { role: 'viewMenu' as const }, - { role: 'windowMenu' as const }, - ]; - Menu.setApplicationMenu(Menu.buildFromTemplate(template)); -} - // App lifecycle app.whenReady().then(async () => { setupMenu(); diff --git a/surfsense_desktop/src/modules/menu.ts b/surfsense_desktop/src/modules/menu.ts new file mode 100644 index 000000000..128a73a21 --- /dev/null +++ b/surfsense_desktop/src/modules/menu.ts @@ -0,0 +1,13 @@ +import { Menu } from 'electron'; + +export function setupMenu(): void { + const isMac = process.platform === 'darwin'; + const template: Electron.MenuItemConstructorOptions[] = [ + ...(isMac ? [{ role: 'appMenu' as const }] : []), + { role: 'fileMenu' as const }, + { role: 'editMenu' as const }, + { role: 'viewMenu' as const }, + { role: 'windowMenu' as const }, + ]; + Menu.setApplicationMenu(Menu.buildFromTemplate(template)); +} From fb4dbf04ae805e6e8ce8e77d5e02210d2ac881f4 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 20 Mar 2026 20:06:21 +0200 Subject: [PATCH 007/102] refactor(desktop): extract IPC channels and handlers into src/ipc/ --- surfsense_desktop/src/ipc/channels.ts | 6 ++++++ surfsense_desktop/src/ipc/handlers.ts | 19 +++++++++++++++++++ surfsense_desktop/src/main.ts | 19 +++---------------- 3 files changed, 28 insertions(+), 16 deletions(-) create mode 100644 surfsense_desktop/src/ipc/channels.ts create mode 100644 surfsense_desktop/src/ipc/handlers.ts diff --git a/surfsense_desktop/src/ipc/channels.ts b/surfsense_desktop/src/ipc/channels.ts new file mode 100644 index 000000000..4d0f3bf80 --- /dev/null +++ b/surfsense_desktop/src/ipc/channels.ts @@ -0,0 +1,6 @@ +export const IPC_CHANNELS = { + OPEN_EXTERNAL: 'open-external', + GET_APP_VERSION: 'get-app-version', + DEEP_LINK: 'deep-link', + GET_CLIPBOARD_CONTENT: 'get-clipboard-content', +} as const; diff --git a/surfsense_desktop/src/ipc/handlers.ts b/surfsense_desktop/src/ipc/handlers.ts new file mode 100644 index 000000000..18e343719 --- /dev/null +++ b/surfsense_desktop/src/ipc/handlers.ts @@ -0,0 +1,19 @@ +import { app, ipcMain, shell } from 'electron'; +import { IPC_CHANNELS } from './channels'; + +export function registerIpcHandlers(): void { + ipcMain.on(IPC_CHANNELS.OPEN_EXTERNAL, (_event, url: string) => { + try { + const parsed = new URL(url); + if (parsed.protocol === 'http:' || parsed.protocol === 'https:') { + shell.openExternal(url); + } + } catch { + // invalid URL — ignore + } + }); + + ipcMain.handle(IPC_CHANNELS.GET_APP_VERSION, () => { + return app.getVersion(); + }); +} diff --git a/surfsense_desktop/src/main.ts b/surfsense_desktop/src/main.ts index 6d55d478b..aff64db22 100644 --- a/surfsense_desktop/src/main.ts +++ b/surfsense_desktop/src/main.ts @@ -1,10 +1,11 @@ -import { app, BrowserWindow, shell, ipcMain } from 'electron'; +import { app, BrowserWindow } from 'electron'; import { registerGlobalErrorHandlers, showErrorDialog } from './modules/errors'; import { startNextServer } from './modules/server'; import { createMainWindow } from './modules/window'; import { setupDeepLinks, handlePendingDeepLink } from './modules/deep-links'; import { setupAutoUpdater } from './modules/auto-updater'; import { setupMenu } from './modules/menu'; +import { registerIpcHandlers } from './ipc/handlers'; registerGlobalErrorHandlers(); @@ -12,21 +13,7 @@ if (!setupDeepLinks()) { app.quit(); } -// IPC handlers -ipcMain.on('open-external', (_event, url: string) => { - try { - const parsed = new URL(url); - if (parsed.protocol === 'http:' || parsed.protocol === 'https:') { - shell.openExternal(url); - } - } catch { - // invalid URL — ignore - } -}); - -ipcMain.handle('get-app-version', () => { - return app.getVersion(); -}); +registerIpcHandlers(); // App lifecycle app.whenReady().then(async () => { From ecdd7354e930d71364f6c2735ca24201cf55c83b Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 20 Mar 2026 20:13:58 +0200 Subject: [PATCH 008/102] refactor(desktop): use IPC channel constants in preload, add getClipboardContent --- surfsense_desktop/src/preload.ts | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/surfsense_desktop/src/preload.ts b/surfsense_desktop/src/preload.ts index dd4b89cf8..3f0f4be1f 100644 --- a/surfsense_desktop/src/preload.ts +++ b/surfsense_desktop/src/preload.ts @@ -1,4 +1,5 @@ const { contextBridge, ipcRenderer } = require('electron'); +const { IPC_CHANNELS } = require('./ipc/channels'); contextBridge.exposeInMainWorld('electronAPI', { versions: { @@ -7,13 +8,14 @@ contextBridge.exposeInMainWorld('electronAPI', { chrome: process.versions.chrome, platform: process.platform, }, - openExternal: (url: string) => ipcRenderer.send('open-external', url), - getAppVersion: () => ipcRenderer.invoke('get-app-version'), + openExternal: (url: string) => ipcRenderer.send(IPC_CHANNELS.OPEN_EXTERNAL, url), + getAppVersion: () => ipcRenderer.invoke(IPC_CHANNELS.GET_APP_VERSION), + getClipboardContent: () => ipcRenderer.invoke(IPC_CHANNELS.GET_CLIPBOARD_CONTENT), onDeepLink: (callback: (url: string) => void) => { const listener = (_event: unknown, url: string) => callback(url); - ipcRenderer.on('deep-link', listener); + ipcRenderer.on(IPC_CHANNELS.DEEP_LINK, listener); return () => { - ipcRenderer.removeListener('deep-link', listener); + ipcRenderer.removeListener(IPC_CHANNELS.DEEP_LINK, listener); }; }, }); From 275fa86ecddbf5c9b51cb89f74734af49f7549d2 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 20 Mar 2026 20:22:37 +0200 Subject: [PATCH 009/102] feat(desktop): add system tray with clipboard-to-chat support --- surfsense_desktop/src/main.ts | 4 ++ surfsense_desktop/src/modules/clipboard.ts | 14 +++++ surfsense_desktop/src/modules/tray.ts | 73 ++++++++++++++++++++++ 3 files changed, 91 insertions(+) create mode 100644 surfsense_desktop/src/modules/clipboard.ts create mode 100644 surfsense_desktop/src/modules/tray.ts diff --git a/surfsense_desktop/src/main.ts b/surfsense_desktop/src/main.ts index aff64db22..10f442c08 100644 --- a/surfsense_desktop/src/main.ts +++ b/surfsense_desktop/src/main.ts @@ -5,7 +5,9 @@ import { createMainWindow } from './modules/window'; import { setupDeepLinks, handlePendingDeepLink } from './modules/deep-links'; import { setupAutoUpdater } from './modules/auto-updater'; import { setupMenu } from './modules/menu'; +import { setupTray } from './modules/tray'; import { registerIpcHandlers } from './ipc/handlers'; +import { registerClipboardHandlers } from './modules/clipboard'; registerGlobalErrorHandlers(); @@ -14,6 +16,7 @@ if (!setupDeepLinks()) { } registerIpcHandlers(); +registerClipboardHandlers(); // App lifecycle app.whenReady().then(async () => { @@ -26,6 +29,7 @@ app.whenReady().then(async () => { return; } createMainWindow(); + setupTray(); setupAutoUpdater(); handlePendingDeepLink(); diff --git a/surfsense_desktop/src/modules/clipboard.ts b/surfsense_desktop/src/modules/clipboard.ts new file mode 100644 index 000000000..4f9d7b802 --- /dev/null +++ b/surfsense_desktop/src/modules/clipboard.ts @@ -0,0 +1,14 @@ +import { ipcMain } from 'electron'; +import { IPC_CHANNELS } from '../ipc/channels'; + +let lastClipboardContent = ''; + +export function setClipboardContent(text: string): void { + lastClipboardContent = text; +} + +export function registerClipboardHandlers(): void { + ipcMain.handle(IPC_CHANNELS.GET_CLIPBOARD_CONTENT, () => { + return lastClipboardContent; + }); +} diff --git a/surfsense_desktop/src/modules/tray.ts b/surfsense_desktop/src/modules/tray.ts new file mode 100644 index 000000000..3527cf691 --- /dev/null +++ b/surfsense_desktop/src/modules/tray.ts @@ -0,0 +1,73 @@ +import { app, BrowserWindow, clipboard, Menu, Tray } from 'electron'; +import path from 'path'; +import { getServerPort } from './server'; +import { setClipboardContent } from './clipboard'; + +let tray: Tray | null = null; +let clipWindow: BrowserWindow | null = null; + +function getIconPath(): string { + if (app.isPackaged) { + return path.join(process.resourcesPath, 'icon.png'); + } + return path.join(__dirname, '..', 'assets', 'icon.png'); +} + +function createClipWindow(): BrowserWindow { + if (clipWindow && !clipWindow.isDestroyed()) { + clipWindow.focus(); + return clipWindow; + } + + clipWindow = new BrowserWindow({ + width: 420, + height: 620, + resizable: true, + minimizable: false, + maximizable: false, + fullscreenable: false, + webPreferences: { + preload: path.join(__dirname, 'preload.js'), + contextIsolation: true, + nodeIntegration: false, + sandbox: true, + }, + show: false, + titleBarStyle: 'hiddenInset', + }); + + clipWindow.loadURL(`http://localhost:${getServerPort()}/dashboard`); + + clipWindow.once('ready-to-show', () => { + clipWindow?.show(); + }); + + clipWindow.on('closed', () => { + clipWindow = null; + }); + + return clipWindow; +} + +export function setupTray(): void { + tray = new Tray(getIconPath()); + tray.setToolTip('SurfSense'); + + const contextMenu = Menu.buildFromTemplate([ + { + label: 'Ask about clipboard', + click: () => { + const text = clipboard.readText(); + setClipboardContent(text); + createClipWindow(); + }, + }, + { type: 'separator' }, + { + label: 'Quit', + click: () => app.quit(), + }, + ]); + + tray.setContextMenu(contextMenu); +} From 5ab534511c8b6ff99d324d17579276ac4cea0801 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 20 Mar 2026 20:26:33 +0200 Subject: [PATCH 010/102] feat(web): add initialText prop to InlineMentionEditor --- .../assistant-ui/inline-mention-editor.tsx | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/surfsense_web/components/assistant-ui/inline-mention-editor.tsx b/surfsense_web/components/assistant-ui/inline-mention-editor.tsx index dacc845ec..656a3ca2d 100644 --- a/surfsense_web/components/assistant-ui/inline-mention-editor.tsx +++ b/surfsense_web/components/assistant-ui/inline-mention-editor.tsx @@ -47,6 +47,7 @@ interface InlineMentionEditorProps { disabled?: boolean; className?: string; initialDocuments?: MentionedDocument[]; + initialText?: string; } // Unique data attribute to identify chip elements @@ -96,6 +97,7 @@ export const InlineMentionEditor = forwardRef { @@ -115,6 +117,16 @@ export const InlineMentionEditor = forwardRef { + if (!initialText || initialTextAppliedRef.current || !editorRef.current) return; + initialTextAppliedRef.current = true; + editorRef.current.textContent = initialText; + setIsEmpty(false); + onChange?.(initialText, Array.from(mentionedDocs.values())); + }, [initialText]); // eslint-disable-line react-hooks/exhaustive-deps + // Focus at the end of the editor const focusAtEnd = useCallback(() => { if (!editorRef.current) return; From c78f0e78aae2e540eaf7f86cf8cb194e53ec6980 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 20 Mar 2026 20:33:43 +0200 Subject: [PATCH 011/102] feat(web): wire Composer to pre-fill clipboard content from Electron tray --- surfsense_web/components/assistant-ui/thread.tsx | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/surfsense_web/components/assistant-ui/thread.tsx b/surfsense_web/components/assistant-ui/thread.tsx index b7a5bcf0e..b6bbea2f4 100644 --- a/surfsense_web/components/assistant-ui/thread.tsx +++ b/surfsense_web/components/assistant-ui/thread.tsx @@ -314,6 +314,16 @@ const Composer: FC = () => { const composerRuntime = useComposerRuntime(); const hasAutoFocusedRef = useRef(false); + // Clipboard content from Electron tray (pre-filled into composer) + const [clipboardText, setClipboardText] = useState(); + useEffect(() => { + const api = (window as { electronAPI?: { getClipboardContent?: () => Promise } }).electronAPI; + if (!api?.getClipboardContent) return; + api.getClipboardContent().then((text) => { + if (text) setClipboardText(text); + }); + }, []); + const isThreadEmpty = useAssistantState(({ thread }) => thread.isEmpty); const isThreadRunning = useAssistantState(({ thread }) => thread.isRunning); @@ -520,6 +530,7 @@ const Composer: FC = () => { onDocumentRemove={handleDocumentRemove} onSubmit={handleSubmit} onKeyDown={handleKeyDown} + initialText={clipboardText} className="min-h-[24px]" /> From 9e058e13290fdbdb258d7984afc611fecdcda190 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 20 Mar 2026 20:35:29 +0200 Subject: [PATCH 012/102] chore: clean up comments in editor and composer --- surfsense_web/components/assistant-ui/inline-mention-editor.tsx | 2 +- surfsense_web/components/assistant-ui/thread.tsx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/surfsense_web/components/assistant-ui/inline-mention-editor.tsx b/surfsense_web/components/assistant-ui/inline-mention-editor.tsx index 656a3ca2d..be48b60fa 100644 --- a/surfsense_web/components/assistant-ui/inline-mention-editor.tsx +++ b/surfsense_web/components/assistant-ui/inline-mention-editor.tsx @@ -117,7 +117,7 @@ export const InlineMentionEditor = forwardRef { if (!initialText || initialTextAppliedRef.current || !editorRef.current) return; diff --git a/surfsense_web/components/assistant-ui/thread.tsx b/surfsense_web/components/assistant-ui/thread.tsx index b6bbea2f4..023b0f7bc 100644 --- a/surfsense_web/components/assistant-ui/thread.tsx +++ b/surfsense_web/components/assistant-ui/thread.tsx @@ -314,7 +314,7 @@ const Composer: FC = () => { const composerRuntime = useComposerRuntime(); const hasAutoFocusedRef = useRef(false); - // Clipboard content from Electron tray (pre-filled into composer) + // Clipboard content const [clipboardText, setClipboardText] = useState(); useEffect(() => { const api = (window as { electronAPI?: { getClipboardContent?: () => Promise } }).electronAPI; From d6d4ebc75de6bdbff5ffb4732d8ad8e8e9693b80 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 20 Mar 2026 20:39:18 +0200 Subject: [PATCH 013/102] feat(web): add ElectronAPI type declaration for window.electronAPI --- surfsense_web/components/assistant-ui/thread.tsx | 2 +- surfsense_web/types/window.d.ts | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/surfsense_web/components/assistant-ui/thread.tsx b/surfsense_web/components/assistant-ui/thread.tsx index 023b0f7bc..389b9f204 100644 --- a/surfsense_web/components/assistant-ui/thread.tsx +++ b/surfsense_web/components/assistant-ui/thread.tsx @@ -317,7 +317,7 @@ const Composer: FC = () => { // Clipboard content const [clipboardText, setClipboardText] = useState(); useEffect(() => { - const api = (window as { electronAPI?: { getClipboardContent?: () => Promise } }).electronAPI; + const api = window.electronAPI; if (!api?.getClipboardContent) return; api.getClipboardContent().then((text) => { if (text) setClipboardText(text); diff --git a/surfsense_web/types/window.d.ts b/surfsense_web/types/window.d.ts index 4d4abc9c1..487e2058f 100644 --- a/surfsense_web/types/window.d.ts +++ b/surfsense_web/types/window.d.ts @@ -1,7 +1,21 @@ import type { PostHog } from "posthog-js"; +interface ElectronAPI { + versions: { + electron: string; + node: string; + chrome: string; + platform: string; + }; + openExternal: (url: string) => void; + getAppVersion: () => Promise; + getClipboardContent: () => Promise; + onDeepLink: (callback: (url: string) => void) => () => void; +} + declare global { interface Window { posthog?: PostHog; + electronAPI?: ElectronAPI; } } From dea0651a94921f99f4f5d7017f6579fb91d71c80 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Mon, 23 Mar 2026 15:49:50 +0200 Subject: [PATCH 014/102] fix(desktop): include tray icon in packaged app resources --- surfsense_desktop/electron-builder.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/surfsense_desktop/electron-builder.yml b/surfsense_desktop/electron-builder.yml index eaca0f19b..715366e0c 100644 --- a/surfsense_desktop/electron-builder.yml +++ b/surfsense_desktop/electron-builder.yml @@ -13,6 +13,8 @@ files: - "!scripts" - "!release" extraResources: + - from: assets/icon.png + to: icon.png - from: ../surfsense_web/.next/standalone/surfsense_web/ to: standalone/ filter: From f783b00d2e287a488b5129e4fe2cbc652c175ab6 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 24 Mar 2026 18:40:07 +0200 Subject: [PATCH 015/102] fix(desktop): bind to 0.0.0.0 and silence auto-updater 404 --- surfsense_desktop/src/modules/auto-updater.ts | 4 ++-- surfsense_desktop/src/modules/server.ts | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/surfsense_desktop/src/modules/auto-updater.ts b/surfsense_desktop/src/modules/auto-updater.ts index f895516c0..2e7680953 100644 --- a/surfsense_desktop/src/modules/auto-updater.ts +++ b/surfsense_desktop/src/modules/auto-updater.ts @@ -26,8 +26,8 @@ export function setupAutoUpdater(): void { }); autoUpdater.on('error', (err) => { - console.error('Auto-updater error:', err); + console.log('Auto-updater: update check skipped —', err.message?.split('\n')[0]); }); - autoUpdater.checkForUpdates(); + autoUpdater.checkForUpdates().catch(() => {}); } diff --git a/surfsense_desktop/src/modules/server.ts b/surfsense_desktop/src/modules/server.ts index 969478e4a..e2f078a8c 100644 --- a/surfsense_desktop/src/modules/server.ts +++ b/surfsense_desktop/src/modules/server.ts @@ -39,7 +39,7 @@ export async function startNextServer(): Promise { const serverScript = path.join(standalonePath, 'server.js'); process.env.PORT = String(serverPort); - process.env.HOSTNAME = 'localhost'; + process.env.HOSTNAME = '0.0.0.0'; process.env.NODE_ENV = 'production'; process.chdir(standalonePath); From 59e7f8f06880bf7d7cd42f7a9c6cb8e17239a173 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 24 Mar 2026 19:12:04 +0200 Subject: [PATCH 016/102] remove tray and clipboard-to-chat feature --- surfsense_desktop/electron-builder.yml | 2 - surfsense_desktop/src/ipc/channels.ts | 1 - surfsense_desktop/src/main.ts | 4 - surfsense_desktop/src/modules/clipboard.ts | 14 ---- surfsense_desktop/src/modules/tray.ts | 73 ------------------- surfsense_desktop/src/preload.ts | 1 - .../assistant-ui/inline-mention-editor.tsx | 12 --- .../components/assistant-ui/thread.tsx | 11 --- surfsense_web/types/window.d.ts | 1 - 9 files changed, 119 deletions(-) delete mode 100644 surfsense_desktop/src/modules/clipboard.ts delete mode 100644 surfsense_desktop/src/modules/tray.ts diff --git a/surfsense_desktop/electron-builder.yml b/surfsense_desktop/electron-builder.yml index 715366e0c..eaca0f19b 100644 --- a/surfsense_desktop/electron-builder.yml +++ b/surfsense_desktop/electron-builder.yml @@ -13,8 +13,6 @@ files: - "!scripts" - "!release" extraResources: - - from: assets/icon.png - to: icon.png - from: ../surfsense_web/.next/standalone/surfsense_web/ to: standalone/ filter: diff --git a/surfsense_desktop/src/ipc/channels.ts b/surfsense_desktop/src/ipc/channels.ts index 4d0f3bf80..8ae21cfcf 100644 --- a/surfsense_desktop/src/ipc/channels.ts +++ b/surfsense_desktop/src/ipc/channels.ts @@ -2,5 +2,4 @@ export const IPC_CHANNELS = { OPEN_EXTERNAL: 'open-external', GET_APP_VERSION: 'get-app-version', DEEP_LINK: 'deep-link', - GET_CLIPBOARD_CONTENT: 'get-clipboard-content', } as const; diff --git a/surfsense_desktop/src/main.ts b/surfsense_desktop/src/main.ts index 10f442c08..aff64db22 100644 --- a/surfsense_desktop/src/main.ts +++ b/surfsense_desktop/src/main.ts @@ -5,9 +5,7 @@ import { createMainWindow } from './modules/window'; import { setupDeepLinks, handlePendingDeepLink } from './modules/deep-links'; import { setupAutoUpdater } from './modules/auto-updater'; import { setupMenu } from './modules/menu'; -import { setupTray } from './modules/tray'; import { registerIpcHandlers } from './ipc/handlers'; -import { registerClipboardHandlers } from './modules/clipboard'; registerGlobalErrorHandlers(); @@ -16,7 +14,6 @@ if (!setupDeepLinks()) { } registerIpcHandlers(); -registerClipboardHandlers(); // App lifecycle app.whenReady().then(async () => { @@ -29,7 +26,6 @@ app.whenReady().then(async () => { return; } createMainWindow(); - setupTray(); setupAutoUpdater(); handlePendingDeepLink(); diff --git a/surfsense_desktop/src/modules/clipboard.ts b/surfsense_desktop/src/modules/clipboard.ts deleted file mode 100644 index 4f9d7b802..000000000 --- a/surfsense_desktop/src/modules/clipboard.ts +++ /dev/null @@ -1,14 +0,0 @@ -import { ipcMain } from 'electron'; -import { IPC_CHANNELS } from '../ipc/channels'; - -let lastClipboardContent = ''; - -export function setClipboardContent(text: string): void { - lastClipboardContent = text; -} - -export function registerClipboardHandlers(): void { - ipcMain.handle(IPC_CHANNELS.GET_CLIPBOARD_CONTENT, () => { - return lastClipboardContent; - }); -} diff --git a/surfsense_desktop/src/modules/tray.ts b/surfsense_desktop/src/modules/tray.ts deleted file mode 100644 index 3527cf691..000000000 --- a/surfsense_desktop/src/modules/tray.ts +++ /dev/null @@ -1,73 +0,0 @@ -import { app, BrowserWindow, clipboard, Menu, Tray } from 'electron'; -import path from 'path'; -import { getServerPort } from './server'; -import { setClipboardContent } from './clipboard'; - -let tray: Tray | null = null; -let clipWindow: BrowserWindow | null = null; - -function getIconPath(): string { - if (app.isPackaged) { - return path.join(process.resourcesPath, 'icon.png'); - } - return path.join(__dirname, '..', 'assets', 'icon.png'); -} - -function createClipWindow(): BrowserWindow { - if (clipWindow && !clipWindow.isDestroyed()) { - clipWindow.focus(); - return clipWindow; - } - - clipWindow = new BrowserWindow({ - width: 420, - height: 620, - resizable: true, - minimizable: false, - maximizable: false, - fullscreenable: false, - webPreferences: { - preload: path.join(__dirname, 'preload.js'), - contextIsolation: true, - nodeIntegration: false, - sandbox: true, - }, - show: false, - titleBarStyle: 'hiddenInset', - }); - - clipWindow.loadURL(`http://localhost:${getServerPort()}/dashboard`); - - clipWindow.once('ready-to-show', () => { - clipWindow?.show(); - }); - - clipWindow.on('closed', () => { - clipWindow = null; - }); - - return clipWindow; -} - -export function setupTray(): void { - tray = new Tray(getIconPath()); - tray.setToolTip('SurfSense'); - - const contextMenu = Menu.buildFromTemplate([ - { - label: 'Ask about clipboard', - click: () => { - const text = clipboard.readText(); - setClipboardContent(text); - createClipWindow(); - }, - }, - { type: 'separator' }, - { - label: 'Quit', - click: () => app.quit(), - }, - ]); - - tray.setContextMenu(contextMenu); -} diff --git a/surfsense_desktop/src/preload.ts b/surfsense_desktop/src/preload.ts index 3f0f4be1f..d36db8c22 100644 --- a/surfsense_desktop/src/preload.ts +++ b/surfsense_desktop/src/preload.ts @@ -10,7 +10,6 @@ contextBridge.exposeInMainWorld('electronAPI', { }, openExternal: (url: string) => ipcRenderer.send(IPC_CHANNELS.OPEN_EXTERNAL, url), getAppVersion: () => ipcRenderer.invoke(IPC_CHANNELS.GET_APP_VERSION), - getClipboardContent: () => ipcRenderer.invoke(IPC_CHANNELS.GET_CLIPBOARD_CONTENT), onDeepLink: (callback: (url: string) => void) => { const listener = (_event: unknown, url: string) => callback(url); ipcRenderer.on(IPC_CHANNELS.DEEP_LINK, listener); diff --git a/surfsense_web/components/assistant-ui/inline-mention-editor.tsx b/surfsense_web/components/assistant-ui/inline-mention-editor.tsx index be48b60fa..dacc845ec 100644 --- a/surfsense_web/components/assistant-ui/inline-mention-editor.tsx +++ b/surfsense_web/components/assistant-ui/inline-mention-editor.tsx @@ -47,7 +47,6 @@ interface InlineMentionEditorProps { disabled?: boolean; className?: string; initialDocuments?: MentionedDocument[]; - initialText?: string; } // Unique data attribute to identify chip elements @@ -97,7 +96,6 @@ export const InlineMentionEditor = forwardRef { @@ -117,16 +115,6 @@ export const InlineMentionEditor = forwardRef { - if (!initialText || initialTextAppliedRef.current || !editorRef.current) return; - initialTextAppliedRef.current = true; - editorRef.current.textContent = initialText; - setIsEmpty(false); - onChange?.(initialText, Array.from(mentionedDocs.values())); - }, [initialText]); // eslint-disable-line react-hooks/exhaustive-deps - // Focus at the end of the editor const focusAtEnd = useCallback(() => { if (!editorRef.current) return; diff --git a/surfsense_web/components/assistant-ui/thread.tsx b/surfsense_web/components/assistant-ui/thread.tsx index 0a38e19d5..081e234a8 100644 --- a/surfsense_web/components/assistant-ui/thread.tsx +++ b/surfsense_web/components/assistant-ui/thread.tsx @@ -330,16 +330,6 @@ const Composer: FC = () => { const composerRuntime = useComposerRuntime(); const hasAutoFocusedRef = useRef(false); - // Clipboard content - const [clipboardText, setClipboardText] = useState(); - useEffect(() => { - const api = window.electronAPI; - if (!api?.getClipboardContent) return; - api.getClipboardContent().then((text) => { - if (text) setClipboardText(text); - }); - }, []); - const isThreadEmpty = useAssistantState(({ thread }) => thread.isEmpty); const isThreadRunning = useAssistantState(({ thread }) => thread.isRunning); @@ -546,7 +536,6 @@ const Composer: FC = () => { onDocumentRemove={handleDocumentRemove} onSubmit={handleSubmit} onKeyDown={handleKeyDown} - initialText={clipboardText} className="min-h-[24px]" /> diff --git a/surfsense_web/types/window.d.ts b/surfsense_web/types/window.d.ts index 487e2058f..8d2c38c8b 100644 --- a/surfsense_web/types/window.d.ts +++ b/surfsense_web/types/window.d.ts @@ -9,7 +9,6 @@ interface ElectronAPI { }; openExternal: (url: string) => void; getAppVersion: () => Promise; - getClipboardContent: () => Promise; onDeepLink: (callback: (url: string) => void) => () => void; } From 801c07291e4cd09f21bc27ecd657b430ef977711 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 24 Mar 2026 19:22:43 +0200 Subject: [PATCH 017/102] add quick-ask IPC channel and shortcut module --- surfsense_desktop/src/ipc/channels.ts | 1 + surfsense_desktop/src/modules/quick-ask.ts | 29 ++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 surfsense_desktop/src/modules/quick-ask.ts diff --git a/surfsense_desktop/src/ipc/channels.ts b/surfsense_desktop/src/ipc/channels.ts index 8ae21cfcf..18002b520 100644 --- a/surfsense_desktop/src/ipc/channels.ts +++ b/surfsense_desktop/src/ipc/channels.ts @@ -2,4 +2,5 @@ export const IPC_CHANNELS = { OPEN_EXTERNAL: 'open-external', GET_APP_VERSION: 'get-app-version', DEEP_LINK: 'deep-link', + QUICK_ASK_TEXT: 'quick-ask-text', } as const; diff --git a/surfsense_desktop/src/modules/quick-ask.ts b/surfsense_desktop/src/modules/quick-ask.ts new file mode 100644 index 000000000..e38b0d693 --- /dev/null +++ b/surfsense_desktop/src/modules/quick-ask.ts @@ -0,0 +1,29 @@ +import { clipboard, globalShortcut } from 'electron'; +import { IPC_CHANNELS } from '../ipc/channels'; +import { getMainWindow } from './window'; + +const SHORTCUT = 'CommandOrControl+Option+S'; + +export function registerQuickAsk(): void { + const ok = globalShortcut.register(SHORTCUT, () => { + const win = getMainWindow(); + if (!win) return; + + const text = clipboard.readText().trim(); + if (!text) return; + + if (win.isMinimized()) win.restore(); + win.show(); + win.focus(); + + win.webContents.send(IPC_CHANNELS.QUICK_ASK_TEXT, text); + }); + + if (!ok) { + console.log(`Quick-ask: failed to register ${SHORTCUT}`); + } +} + +export function unregisterQuickAsk(): void { + globalShortcut.unregister(SHORTCUT); +} From 45e91135227d5faa2758595eca21f5c4ec199c39 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 24 Mar 2026 19:23:24 +0200 Subject: [PATCH 018/102] expose onQuickAskText in preload --- surfsense_desktop/src/preload.ts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/surfsense_desktop/src/preload.ts b/surfsense_desktop/src/preload.ts index d36db8c22..ca894d6b3 100644 --- a/surfsense_desktop/src/preload.ts +++ b/surfsense_desktop/src/preload.ts @@ -17,4 +17,11 @@ contextBridge.exposeInMainWorld('electronAPI', { ipcRenderer.removeListener(IPC_CHANNELS.DEEP_LINK, listener); }; }, + onQuickAskText: (callback: (text: string) => void) => { + const listener = (_event: unknown, text: string) => callback(text); + ipcRenderer.on(IPC_CHANNELS.QUICK_ASK_TEXT, listener); + return () => { + ipcRenderer.removeListener(IPC_CHANNELS.QUICK_ASK_TEXT, listener); + }; + }, }); From 032ccd95415b4c060372f2e2d03a9050e21adf30 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 24 Mar 2026 19:24:02 +0200 Subject: [PATCH 019/102] add onQuickAskText type to ElectronAPI --- surfsense_web/types/window.d.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/surfsense_web/types/window.d.ts b/surfsense_web/types/window.d.ts index 8d2c38c8b..6c7e192db 100644 --- a/surfsense_web/types/window.d.ts +++ b/surfsense_web/types/window.d.ts @@ -10,6 +10,7 @@ interface ElectronAPI { openExternal: (url: string) => void; getAppVersion: () => Promise; onDeepLink: (callback: (url: string) => void) => () => void; + onQuickAskText: (callback: (text: string) => void) => () => void; } declare global { From 71a262b2e797f92e9abaab5e2ee6f028f19825ab Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 24 Mar 2026 19:24:41 +0200 Subject: [PATCH 020/102] wire quick-ask in main.ts --- surfsense_desktop/src/main.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/surfsense_desktop/src/main.ts b/surfsense_desktop/src/main.ts index aff64db22..3ab41073b 100644 --- a/surfsense_desktop/src/main.ts +++ b/surfsense_desktop/src/main.ts @@ -5,6 +5,7 @@ import { createMainWindow } from './modules/window'; import { setupDeepLinks, handlePendingDeepLink } from './modules/deep-links'; import { setupAutoUpdater } from './modules/auto-updater'; import { setupMenu } from './modules/menu'; +import { registerQuickAsk, unregisterQuickAsk } from './modules/quick-ask'; import { registerIpcHandlers } from './ipc/handlers'; registerGlobalErrorHandlers(); @@ -26,6 +27,7 @@ app.whenReady().then(async () => { return; } createMainWindow(); + registerQuickAsk(); setupAutoUpdater(); handlePendingDeepLink(); @@ -44,5 +46,5 @@ app.on('window-all-closed', () => { }); app.on('will-quit', () => { - // Server runs in-process — no child process to kill + unregisterQuickAsk(); }); From f9be80ab766af7cfd78d93a044d6724b68326ab6 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 24 Mar 2026 19:26:13 +0200 Subject: [PATCH 021/102] re-add initialText prop to InlineMentionEditor --- .../components/assistant-ui/inline-mention-editor.tsx | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/surfsense_web/components/assistant-ui/inline-mention-editor.tsx b/surfsense_web/components/assistant-ui/inline-mention-editor.tsx index dacc845ec..ab1213a49 100644 --- a/surfsense_web/components/assistant-ui/inline-mention-editor.tsx +++ b/surfsense_web/components/assistant-ui/inline-mention-editor.tsx @@ -47,6 +47,7 @@ interface InlineMentionEditorProps { disabled?: boolean; className?: string; initialDocuments?: MentionedDocument[]; + initialText?: string; } // Unique data attribute to identify chip elements @@ -96,6 +97,7 @@ export const InlineMentionEditor = forwardRef { @@ -115,6 +117,13 @@ export const InlineMentionEditor = forwardRef { + if (!initialText || !editorRef.current) return; + editorRef.current.textContent = initialText; + setIsEmpty(false); + onChange?.(initialText, Array.from(mentionedDocs.values())); + }, [initialText]); // eslint-disable-line react-hooks/exhaustive-deps + // Focus at the end of the editor const focusAtEnd = useCallback(() => { if (!editorRef.current) return; From 875046263738ddcbfc6b92ed7d08546eef24b8d1 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 24 Mar 2026 19:28:41 +0200 Subject: [PATCH 022/102] listen for quick-ask text in Composer --- surfsense_web/components/assistant-ui/thread.tsx | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/surfsense_web/components/assistant-ui/thread.tsx b/surfsense_web/components/assistant-ui/thread.tsx index 081e234a8..eb98fd025 100644 --- a/surfsense_web/components/assistant-ui/thread.tsx +++ b/surfsense_web/components/assistant-ui/thread.tsx @@ -330,6 +330,13 @@ const Composer: FC = () => { const composerRuntime = useComposerRuntime(); const hasAutoFocusedRef = useRef(false); + const [quickAskText, setQuickAskText] = useState(); + useEffect(() => { + return window.electronAPI?.onQuickAskText((text) => { + if (text) setQuickAskText(text); + }); + }, []); + const isThreadEmpty = useAssistantState(({ thread }) => thread.isEmpty); const isThreadRunning = useAssistantState(({ thread }) => thread.isRunning); @@ -536,6 +543,7 @@ const Composer: FC = () => { onDocumentRemove={handleDocumentRemove} onSubmit={handleSubmit} onKeyDown={handleKeyDown} + initialText={quickAskText} className="min-h-[24px]" /> From d033e1cb4847e0b08b2f4d7e82e6ac235e169c63 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 24 Mar 2026 20:12:49 +0200 Subject: [PATCH 023/102] open quick-ask as mini window at cursor position --- surfsense_desktop/src/modules/quick-ask.ts | 53 ++++++++++++++++++---- 1 file changed, 45 insertions(+), 8 deletions(-) diff --git a/surfsense_desktop/src/modules/quick-ask.ts b/surfsense_desktop/src/modules/quick-ask.ts index e38b0d693..6785fb3ce 100644 --- a/surfsense_desktop/src/modules/quick-ask.ts +++ b/surfsense_desktop/src/modules/quick-ask.ts @@ -1,20 +1,57 @@ -import { clipboard, globalShortcut } from 'electron'; +import { BrowserWindow, clipboard, globalShortcut, screen } from 'electron'; +import path from 'path'; import { IPC_CHANNELS } from '../ipc/channels'; -import { getMainWindow } from './window'; +import { getServerPort } from './server'; const SHORTCUT = 'CommandOrControl+Option+S'; +let quickAskWindow: BrowserWindow | null = null; + +function createQuickAskWindow(x: number, y: number): BrowserWindow { + if (quickAskWindow && !quickAskWindow.isDestroyed()) { + quickAskWindow.setPosition(x, y); + quickAskWindow.show(); + quickAskWindow.focus(); + return quickAskWindow; + } + + quickAskWindow = new BrowserWindow({ + width: 450, + height: 550, + x, + y, + alwaysOnTop: true, + resizable: true, + frame: false, + webPreferences: { + preload: path.join(__dirname, 'preload.js'), + contextIsolation: true, + nodeIntegration: false, + sandbox: true, + }, + show: false, + skipTaskbar: true, + }); + + quickAskWindow.loadURL(`http://localhost:${getServerPort()}/dashboard`); + + quickAskWindow.once('ready-to-show', () => { + quickAskWindow?.show(); + }); + + quickAskWindow.on('closed', () => { + quickAskWindow = null; + }); + + return quickAskWindow; +} export function registerQuickAsk(): void { const ok = globalShortcut.register(SHORTCUT, () => { - const win = getMainWindow(); - if (!win) return; - const text = clipboard.readText().trim(); if (!text) return; - if (win.isMinimized()) win.restore(); - win.show(); - win.focus(); + const cursor = screen.getCursorScreenPoint(); + const win = createQuickAskWindow(cursor.x, cursor.y); win.webContents.send(IPC_CHANNELS.QUICK_ASK_TEXT, text); }); From 985299b72da8759f363d187b26f4828929bdabca Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 24 Mar 2026 20:32:27 +0200 Subject: [PATCH 024/102] toggle, blur-dismiss, and titlebar for quick-ask window --- surfsense_desktop/src/modules/quick-ask.ts | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/surfsense_desktop/src/modules/quick-ask.ts b/surfsense_desktop/src/modules/quick-ask.ts index 6785fb3ce..acf41febf 100644 --- a/surfsense_desktop/src/modules/quick-ask.ts +++ b/surfsense_desktop/src/modules/quick-ask.ts @@ -6,6 +6,12 @@ import { getServerPort } from './server'; const SHORTCUT = 'CommandOrControl+Option+S'; let quickAskWindow: BrowserWindow | null = null; +function hideQuickAsk(): void { + if (quickAskWindow && !quickAskWindow.isDestroyed()) { + quickAskWindow.hide(); + } +} + function createQuickAskWindow(x: number, y: number): BrowserWindow { if (quickAskWindow && !quickAskWindow.isDestroyed()) { quickAskWindow.setPosition(x, y); @@ -19,9 +25,8 @@ function createQuickAskWindow(x: number, y: number): BrowserWindow { height: 550, x, y, - alwaysOnTop: true, resizable: true, - frame: false, + titleBarStyle: 'hiddenInset', webPreferences: { preload: path.join(__dirname, 'preload.js'), contextIsolation: true, @@ -38,6 +43,8 @@ function createQuickAskWindow(x: number, y: number): BrowserWindow { quickAskWindow?.show(); }); + quickAskWindow.on('blur', hideQuickAsk); + quickAskWindow.on('closed', () => { quickAskWindow = null; }); @@ -47,6 +54,11 @@ function createQuickAskWindow(x: number, y: number): BrowserWindow { export function registerQuickAsk(): void { const ok = globalShortcut.register(SHORTCUT, () => { + if (quickAskWindow && !quickAskWindow.isDestroyed() && quickAskWindow.isVisible()) { + hideQuickAsk(); + return; + } + const text = clipboard.readText().trim(); if (!text) return; From 296b95ba5b689bdadc0be18fc2d55d3ba36cd119 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 24 Mar 2026 20:45:56 +0200 Subject: [PATCH 025/102] use type panel for floating non-focus-stealing window --- surfsense_desktop/src/modules/quick-ask.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/surfsense_desktop/src/modules/quick-ask.ts b/surfsense_desktop/src/modules/quick-ask.ts index acf41febf..0058a738e 100644 --- a/surfsense_desktop/src/modules/quick-ask.ts +++ b/surfsense_desktop/src/modules/quick-ask.ts @@ -25,8 +25,8 @@ function createQuickAskWindow(x: number, y: number): BrowserWindow { height: 550, x, y, + type: 'panel', resizable: true, - titleBarStyle: 'hiddenInset', webPreferences: { preload: path.join(__dirname, 'preload.js'), contextIsolation: true, From f7b52470eb4d1adcf77d8fd36e7416f0f0b5e850 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 25 Mar 2026 18:33:44 +0530 Subject: [PATCH 026/102] feat: enhance Google connectors indexing with content extraction and document migration - Added `download_and_extract_content` function to extract content from Google Drive files as markdown. - Updated Google Drive indexer to utilize the new content extraction method. - Implemented document migration logic to update legacy Composio document types to their native Google types. - Introduced identifier hashing for stable document identification. - Improved file pre-filtering to handle unchanged and rename-only files efficiently. --- .../app/connectors/google_drive/__init__.py | 3 +- .../google_drive/content_extractor.py | 154 ++ .../app/indexing_pipeline/document_hashing.py | 11 +- .../indexing_pipeline_service.py | 59 +- .../google_calendar_indexer.py | 373 ++-- .../google_drive_indexer.py | 1539 +++++------------ .../google_gmail_indexer.py | 379 ++-- .../test_document_hashing.py | 21 + 8 files changed, 951 insertions(+), 1588 deletions(-) diff --git a/surfsense_backend/app/connectors/google_drive/__init__.py b/surfsense_backend/app/connectors/google_drive/__init__.py index 47cc8598e..a0e9c4484 100644 --- a/surfsense_backend/app/connectors/google_drive/__init__.py +++ b/surfsense_backend/app/connectors/google_drive/__init__.py @@ -2,13 +2,14 @@ from .change_tracker import categorize_change, fetch_all_changes, get_start_page_token from .client import GoogleDriveClient -from .content_extractor import download_and_process_file +from .content_extractor import download_and_extract_content, download_and_process_file from .credentials import get_valid_credentials, validate_credentials from .folder_manager import get_file_by_id, get_files_in_folder, list_folder_contents __all__ = [ "GoogleDriveClient", "categorize_change", + "download_and_extract_content", "download_and_process_file", "fetch_all_changes", "get_file_by_id", diff --git a/surfsense_backend/app/connectors/google_drive/content_extractor.py b/surfsense_backend/app/connectors/google_drive/content_extractor.py index 1d08d38f7..6fa20bf8e 100644 --- a/surfsense_backend/app/connectors/google_drive/content_extractor.py +++ b/surfsense_backend/app/connectors/google_drive/content_extractor.py @@ -17,6 +17,160 @@ from .file_types import get_export_mime_type, is_google_workspace_file, should_s logger = logging.getLogger(__name__) +async def download_and_extract_content( + client: GoogleDriveClient, + file: dict[str, Any], +) -> tuple[str | None, dict[str, Any], str | None]: + """Download a Google Drive file and extract its content as markdown. + + ETL only -- no DB writes, no indexing, no summarization. + + Returns: + (markdown_content, drive_metadata, error_message) + On success error_message is None. + """ + file_id = file.get("id") + file_name = file.get("name", "Unknown") + mime_type = file.get("mimeType", "") + + if should_skip_file(mime_type): + return None, {}, f"Skipping {mime_type}" + + logger.info(f"Downloading file for content extraction: {file_name} ({mime_type})") + + drive_metadata: dict[str, Any] = { + "google_drive_file_id": file_id, + "google_drive_file_name": file_name, + "google_drive_mime_type": mime_type, + "source_connector": "google_drive", + } + if "modifiedTime" in file: + drive_metadata["modified_time"] = file["modifiedTime"] + if "createdTime" in file: + drive_metadata["created_time"] = file["createdTime"] + if "size" in file: + drive_metadata["file_size"] = file["size"] + if "webViewLink" in file: + drive_metadata["web_view_link"] = file["webViewLink"] + if "md5Checksum" in file: + drive_metadata["md5_checksum"] = file["md5Checksum"] + if is_google_workspace_file(mime_type): + drive_metadata["exported_as"] = "pdf" + drive_metadata["original_workspace_type"] = mime_type.split(".")[-1] + + temp_file_path = None + try: + # Download / export + if is_google_workspace_file(mime_type): + export_mime = get_export_mime_type(mime_type) + if not export_mime: + return None, drive_metadata, f"Cannot export Google Workspace type: {mime_type}" + content_bytes, error = await client.export_google_file(file_id, export_mime) + if error: + return None, drive_metadata, error + extension = ".pdf" if export_mime == "application/pdf" else ".txt" + else: + content_bytes, error = await client.download_file(file_id) + if error: + return None, drive_metadata, error + extension = Path(file_name).suffix or ".bin" + + with tempfile.NamedTemporaryFile(delete=False, suffix=extension) as tmp: + tmp.write(content_bytes) + temp_file_path = tmp.name + + # Parse to markdown + markdown = await _parse_file_to_markdown(temp_file_path, file_name) + return markdown, drive_metadata, None + + except Exception as e: + logger.warning(f"Failed to extract content from {file_name}: {e!s}") + return None, drive_metadata, str(e) + finally: + if temp_file_path and os.path.exists(temp_file_path): + try: + os.unlink(temp_file_path) + except Exception: + pass + + +async def _parse_file_to_markdown(file_path: str, filename: str) -> str: + """Parse a local file to markdown using the configured ETL service.""" + lower = filename.lower() + + if lower.endswith((".md", ".markdown", ".txt")): + with open(file_path, encoding="utf-8") as f: + return f.read() + + if lower.endswith((".mp3", ".mp4", ".mpeg", ".mpga", ".m4a", ".wav", ".webm")): + from app.config import config as app_config + from litellm import atranscription + + stt_service_type = ( + "local" + if app_config.STT_SERVICE and app_config.STT_SERVICE.startswith("local/") + else "external" + ) + if stt_service_type == "local": + from app.services.stt_service import stt_service + result = stt_service.transcribe_file(file_path) + text = result.get("text", "") + else: + with open(file_path, "rb") as audio_file: + kwargs: dict[str, Any] = { + "model": app_config.STT_SERVICE, + "file": audio_file, + "api_key": app_config.STT_SERVICE_API_KEY, + } + if app_config.STT_SERVICE_API_BASE: + kwargs["api_base"] = app_config.STT_SERVICE_API_BASE + resp = await atranscription(**kwargs) + text = resp.get("text", "") + + if not text: + raise ValueError("Transcription returned empty text") + return f"# Transcription of {filename}\n\n{text}" + + # Document files -- use configured ETL service + from app.config import config as app_config + + if app_config.ETL_SERVICE == "UNSTRUCTURED": + from langchain_unstructured import UnstructuredLoader + from app.utils.document_converters import convert_document_to_markdown + + loader = UnstructuredLoader( + file_path, + mode="elements", + post_processors=[], + languages=["eng"], + include_orig_elements=False, + include_metadata=False, + strategy="auto", + ) + docs = await loader.aload() + return await convert_document_to_markdown(docs) + + if app_config.ETL_SERVICE == "LLAMACLOUD": + from app.tasks.document_processors.file_processors import ( + parse_with_llamacloud_retry, + ) + + result = await parse_with_llamacloud_retry(file_path=file_path, estimated_pages=50) + markdown_documents = await result.aget_markdown_documents(split_by_page=False) + if not markdown_documents: + raise RuntimeError(f"LlamaCloud returned no documents for {filename}") + return markdown_documents[0].text + + if app_config.ETL_SERVICE == "DOCLING": + from docling.document_converter import DocumentConverter + + converter = DocumentConverter() + result = converter.convert(file_path) + return result.document.export_to_markdown() + + raise RuntimeError(f"Unknown ETL_SERVICE: {app_config.ETL_SERVICE}") + + async def download_and_process_file( client: GoogleDriveClient, file: dict[str, Any], diff --git a/surfsense_backend/app/indexing_pipeline/document_hashing.py b/surfsense_backend/app/indexing_pipeline/document_hashing.py index 5dd7767a4..9edebd140 100644 --- a/surfsense_backend/app/indexing_pipeline/document_hashing.py +++ b/surfsense_backend/app/indexing_pipeline/document_hashing.py @@ -3,10 +3,17 @@ import hashlib from app.indexing_pipeline.connector_document import ConnectorDocument +def compute_identifier_hash( + document_type_value: str, unique_id: str, search_space_id: int +) -> str: + """Return a stable SHA-256 hash from raw identity components.""" + combined = f"{document_type_value}:{unique_id}:{search_space_id}" + return hashlib.sha256(combined.encode("utf-8")).hexdigest() + + def compute_unique_identifier_hash(doc: ConnectorDocument) -> str: """Return a stable SHA-256 hash identifying a document by its source identity.""" - combined = f"{doc.document_type.value}:{doc.unique_id}:{doc.search_space_id}" - return hashlib.sha256(combined.encode("utf-8")).hexdigest() + return compute_identifier_hash(doc.document_type.value, doc.unique_id, doc.search_space_id) def compute_content_hash(doc: ConnectorDocument) -> str: diff --git a/surfsense_backend/app/indexing_pipeline/indexing_pipeline_service.py b/surfsense_backend/app/indexing_pipeline/indexing_pipeline_service.py index 490aac782..c6a29f204 100644 --- a/surfsense_backend/app/indexing_pipeline/indexing_pipeline_service.py +++ b/surfsense_backend/app/indexing_pipeline/indexing_pipeline_service.py @@ -6,12 +6,13 @@ from sqlalchemy import delete, select from sqlalchemy.exc import IntegrityError from sqlalchemy.ext.asyncio import AsyncSession -from app.db import Chunk, Document, DocumentStatus +from app.db import NATIVE_TO_LEGACY_DOCTYPE, Chunk, Document, DocumentStatus from app.indexing_pipeline.connector_document import ConnectorDocument from app.indexing_pipeline.document_chunker import chunk_text from app.indexing_pipeline.document_embedder import embed_texts from app.indexing_pipeline.document_hashing import ( compute_content_hash, + compute_identifier_hash, compute_unique_identifier_hash, ) from app.indexing_pipeline.document_persistence import ( @@ -54,6 +55,62 @@ class IndexingPipelineService: def __init__(self, session: AsyncSession) -> None: self.session = session + async def migrate_legacy_docs( + self, connector_docs: list[ConnectorDocument] + ) -> None: + """Migrate legacy Composio documents to their native Google type. + + For each ConnectorDocument whose document_type has a Composio equivalent + in NATIVE_TO_LEGACY_DOCTYPE, look up the old document by legacy hash and + update its unique_identifier_hash and document_type so that + prepare_for_indexing() can find it under the native hash. + """ + for doc in connector_docs: + legacy_type = NATIVE_TO_LEGACY_DOCTYPE.get(doc.document_type.value) + if not legacy_type: + continue + + legacy_hash = compute_identifier_hash( + legacy_type, doc.unique_id, doc.search_space_id + ) + result = await self.session.execute( + select(Document).filter( + Document.unique_identifier_hash == legacy_hash + ) + ) + existing = result.scalars().first() + if existing is None: + continue + + native_hash = compute_identifier_hash( + doc.document_type.value, doc.unique_id, doc.search_space_id + ) + existing.unique_identifier_hash = native_hash + existing.document_type = doc.document_type + + await self.session.commit() + + async def index_batch( + self, connector_docs: list[ConnectorDocument], llm + ) -> list[Document]: + """Convenience method: prepare_for_indexing then index each document. + + Indexers that need heartbeat callbacks or custom per-document logic + should call prepare_for_indexing() + index() directly instead. + """ + doc_map = { + compute_unique_identifier_hash(cd): cd for cd in connector_docs + } + documents = await self.prepare_for_indexing(connector_docs) + results: list[Document] = [] + for document in documents: + connector_doc = doc_map.get(document.unique_identifier_hash) + if connector_doc is None: + continue + result = await self.index(document, connector_doc, llm) + results.append(result) + return results + async def prepare_for_indexing( self, connector_docs: list[ConnectorDocument] ) -> list[Document]: diff --git a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py index 233bc66e4..a69b33bdc 100644 --- a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py @@ -1,9 +1,8 @@ """ Google Calendar connector indexer. -Implements 2-phase document status updates for real-time UI feedback: -- Phase 1: Create all documents with 'pending' status (visible in UI immediately) -- Phase 2: Process each document: pending → processing → ready/failed +Uses the shared IndexingPipelineService for document deduplication, +summarization, chunking, and embedding. """ import time @@ -15,29 +14,25 @@ from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.ext.asyncio import AsyncSession from app.connectors.google_calendar_connector import GoogleCalendarConnector -from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType +from app.db import DocumentType, SearchSourceConnectorType +from app.indexing_pipeline.connector_document import ConnectorDocument +from app.indexing_pipeline.document_hashing import ( + compute_content_hash, + compute_unique_identifier_hash, +) +from app.indexing_pipeline.indexing_pipeline_service import IndexingPipelineService from app.services.llm_service import get_user_long_context_llm from app.services.task_logging_service import TaskLoggingService -from app.utils.document_converters import ( - create_document_chunks, - embed_text, - generate_content_hash, - generate_document_summary, - generate_unique_identifier_hash, -) from app.utils.google_credentials import ( COMPOSIO_GOOGLE_CONNECTOR_TYPES, build_composio_credentials, ) from .base import ( - check_document_by_unique_identifier, check_duplicate_document_by_hash, get_connector_by_id, - get_current_timestamp, logger, parse_date_flexible, - safe_set_chunks, update_connector_last_indexed, ) @@ -46,13 +41,60 @@ ACCEPTED_CALENDAR_CONNECTOR_TYPES = { SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR, } -# Type hint for heartbeat callback HeartbeatCallbackType = Callable[[int], Awaitable[None]] - -# Heartbeat interval in seconds HEARTBEAT_INTERVAL_SECONDS = 30 +def _build_connector_doc( + event: dict, + event_markdown: str, + *, + connector_id: int, + search_space_id: int, + user_id: str, + enable_summary: bool, +) -> ConnectorDocument: + """Map a raw Google Calendar API event dict to a ConnectorDocument.""" + event_id = event.get("id", "") + event_summary = event.get("summary", "No Title") + calendar_id = event.get("calendarId", "") + + start = event.get("start", {}) + end = event.get("end", {}) + start_time = start.get("dateTime") or start.get("date", "") + end_time = end.get("dateTime") or end.get("date", "") + location = event.get("location", "") + + metadata = { + "event_id": event_id, + "event_summary": event_summary, + "calendar_id": calendar_id, + "start_time": start_time, + "end_time": end_time, + "location": location, + "connector_id": connector_id, + "document_type": "Google Calendar Event", + "connector_type": "Google Calendar", + } + + fallback_summary = ( + f"Google Calendar Event: {event_summary}\n\n{event_markdown}" + ) + + return ConnectorDocument( + title=event_summary, + source_markdown=event_markdown, + unique_id=event_id, + document_type=DocumentType.GOOGLE_CALENDAR_CONNECTOR, + search_space_id=search_space_id, + connector_id=connector_id, + created_by_id=user_id, + should_summarize=enable_summary, + fallback_summary=fallback_summary, + metadata=metadata, + ) + + async def index_google_calendar_events( session: AsyncSession, connector_id: int, @@ -82,7 +124,6 @@ async def index_google_calendar_events( """ task_logger = TaskLoggingService(session, search_space_id) - # Log task start log_entry = await task_logger.log_task_start( task_name="google_calendar_events_indexing", source="connector_indexing_task", @@ -96,7 +137,7 @@ async def index_google_calendar_events( ) try: - # Accept both native and Composio Calendar connectors + # ── Connector lookup ────────────────────────────────────────── connector = None for ct in ACCEPTED_CALENDAR_CONNECTOR_TYPES: connector = await get_connector_by_id(session, connector_id, ct) @@ -112,7 +153,7 @@ async def index_google_calendar_events( ) return 0, 0, f"Connector with ID {connector_id} not found" - # Build credentials based on connector type + # ── Credential building ─────────────────────────────────────── if connector.connector_type in COMPOSIO_GOOGLE_CONNECTOR_TYPES: connected_account_id = connector.config.get("composio_connected_account_id") if not connected_account_id: @@ -184,6 +225,7 @@ async def index_google_calendar_events( ) return 0, 0, "Google Calendar credentials not found in connector config" + # ── Calendar client init ────────────────────────────────────── await task_logger.log_task_progress( log_entry, f"Initializing Google Calendar client for connector {connector_id}", @@ -203,36 +245,26 @@ async def index_google_calendar_events( if end_date == "undefined" or end_date == "": end_date = None - # Calculate date range - # For calendar connectors, allow future dates to index upcoming events + # ── Date range calculation ──────────────────────────────────── if start_date is None or end_date is None: - # Fall back to calculating dates based on last_indexed_at - # Default to today (users can manually select future dates if needed) calculated_end_date = datetime.now() - # Use last_indexed_at as start date if available, otherwise use 30 days ago if connector.last_indexed_at: - # Convert dates to be comparable (both timezone-naive) last_indexed_naive = ( connector.last_indexed_at.replace(tzinfo=None) if connector.last_indexed_at.tzinfo else connector.last_indexed_at ) - - # Allow future dates - use last_indexed_at as start date calculated_start_date = last_indexed_naive logger.info( f"Using last_indexed_at ({calculated_start_date.strftime('%Y-%m-%d')}) as start date" ) else: - calculated_start_date = datetime.now() - timedelta( - days=365 - ) # Use 365 days as default for calendar events (matches frontend) + calculated_start_date = datetime.now() - timedelta(days=365) logger.info( f"No last_indexed_at found, using {calculated_start_date.strftime('%Y-%m-%d')} (365 days ago) as start date" ) - # Use calculated dates if not provided start_date_str = ( start_date if start_date else calculated_start_date.strftime("%Y-%m-%d") ) @@ -240,19 +272,14 @@ async def index_google_calendar_events( end_date if end_date else calculated_end_date.strftime("%Y-%m-%d") ) else: - # Use provided dates (including future dates) start_date_str = start_date end_date_str = end_date - # FIX: Ensure end_date is at least 1 day after start_date to avoid - # "start_date must be strictly before end_date" errors when dates are the same - # (e.g., when last_indexed_at is today) if start_date_str == end_date_str: logger.info( f"Start date ({start_date_str}) equals end date ({end_date_str}), " "adjusting end date to next day to ensure valid date range" ) - # Parse end_date and add 1 day try: end_dt = parse_date_flexible(end_date_str) except ValueError: @@ -264,6 +291,7 @@ async def index_google_calendar_events( end_date_str = end_dt.strftime("%Y-%m-%d") logger.info(f"Adjusted end date to {end_date_str}") + # ── Fetch events ────────────────────────────────────────────── await task_logger.log_task_progress( log_entry, f"Fetching Google Calendar events from {start_date_str} to {end_date_str}", @@ -274,27 +302,19 @@ async def index_google_calendar_events( }, ) - # Get events within date range from primary calendar try: events, error = await calendar_client.get_all_primary_calendar_events( start_date=start_date_str, end_date=end_date_str ) if error: - # Don't treat "No events found" as an error that should stop indexing if "No events found" in error: logger.info(f"No Google Calendar events found: {error}") - logger.info( - "No events found is not a critical error, continuing with update" - ) if update_last_indexed: await update_connector_last_indexed( session, connector, update_last_indexed ) await session.commit() - logger.info( - f"Updated last_indexed_at to {connector.last_indexed_at} despite no events found" - ) await task_logger.log_task_success( log_entry, @@ -304,7 +324,6 @@ async def index_google_calendar_events( return 0, 0, None else: logger.error(f"Failed to get Google Calendar events: {error}") - # Check if this is an authentication error that requires re-authentication error_message = error error_type = "APIError" if ( @@ -329,28 +348,15 @@ async def index_google_calendar_events( logger.error(f"Error fetching Google Calendar events: {e!s}", exc_info=True) return 0, 0, f"Error fetching Google Calendar events: {e!s}" - documents_indexed = 0 + # ── Build ConnectorDocuments ────────────────────────────────── + connector_docs: list[ConnectorDocument] = [] documents_skipped = 0 - documents_failed = 0 # Track events that failed processing - duplicate_content_count = ( - 0 # Track events skipped due to duplicate content_hash - ) - - # Heartbeat tracking - update notification periodically to prevent appearing stuck - last_heartbeat_time = time.time() - - # ======================================================================= - # PHASE 1: Analyze all events, create pending documents - # This makes ALL documents visible in the UI immediately with pending status - # ======================================================================= - events_to_process = [] # List of dicts with document and event data - new_documents_created = False + duplicate_content_count = 0 for event in events: try: event_id = event.get("id") event_summary = event.get("summary", "No Title") - calendar_id = event.get("calendarId", "") if not event_id: logger.warning(f"Skipping event with missing ID: {event_summary}") @@ -363,223 +369,73 @@ async def index_google_calendar_events( documents_skipped += 1 continue - start = event.get("start", {}) - end = event.get("end", {}) - start_time = start.get("dateTime") or start.get("date", "") - end_time = end.get("dateTime") or end.get("date", "") - location = event.get("location", "") - description = event.get("description", "") - - # Generate unique identifier hash for this Google Calendar event - unique_identifier_hash = generate_unique_identifier_hash( - DocumentType.GOOGLE_CALENDAR_CONNECTOR, event_id, search_space_id + doc = _build_connector_doc( + event, + event_markdown, + connector_id=connector_id, + search_space_id=search_space_id, + user_id=user_id, + enable_summary=connector.enable_summary, ) - # Generate content hash - content_hash = generate_content_hash(event_markdown, search_space_id) - - # Check if document with this unique identifier already exists - existing_document = await check_document_by_unique_identifier( - session, unique_identifier_hash - ) - - # Fallback: legacy Composio hash - if not existing_document: - legacy_hash = generate_unique_identifier_hash( - DocumentType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR, - event_id, - search_space_id, - ) - existing_document = await check_document_by_unique_identifier( - session, legacy_hash - ) - if existing_document: - existing_document.unique_identifier_hash = ( - unique_identifier_hash - ) - if ( - existing_document.document_type - == DocumentType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR - ): - existing_document.document_type = ( - DocumentType.GOOGLE_CALENDAR_CONNECTOR - ) - logger.info( - f"Migrated legacy Composio Calendar document: {event_id}" - ) - - if existing_document: - # Document exists - check if content has changed - if existing_document.content_hash == content_hash: - # Ensure status is ready (might have been stuck in processing/pending) - if not DocumentStatus.is_state( - existing_document.status, DocumentStatus.READY - ): - existing_document.status = DocumentStatus.ready() - documents_skipped += 1 - continue - - # Queue existing document for update (will be set to processing in Phase 2) - events_to_process.append( - { - "document": existing_document, - "is_new": False, - "event_markdown": event_markdown, - "content_hash": content_hash, - "event_id": event_id, - "event_summary": event_summary, - "calendar_id": calendar_id, - "start_time": start_time, - "end_time": end_time, - "location": location, - "description": description, - } - ) - continue - - # Document doesn't exist by unique_identifier_hash - # Check if a document with the same content_hash exists (from another connector) with session.no_autoflush: - duplicate_by_content = await check_duplicate_document_by_hash( - session, content_hash + duplicate = await check_duplicate_document_by_hash( + session, compute_content_hash(doc) ) - - if duplicate_by_content: - # A document with the same content already exists (likely from Composio connector) + if duplicate: logger.info( - f"Event {event_summary} already indexed by another connector " - f"(existing document ID: {duplicate_by_content.id}, " - f"type: {duplicate_by_content.document_type}). Skipping to avoid duplicate content." + f"Event {doc.title} already indexed by another connector " + f"(existing document ID: {duplicate.id}, " + f"type: {duplicate.document_type}). Skipping." ) duplicate_content_count += 1 documents_skipped += 1 continue - # Create new document with PENDING status (visible in UI immediately) - document = Document( - search_space_id=search_space_id, - title=event_summary, - document_type=DocumentType.GOOGLE_CALENDAR_CONNECTOR, - document_metadata={ - "event_id": event_id, - "event_summary": event_summary, - "calendar_id": calendar_id, - "start_time": start_time, - "end_time": end_time, - "location": location, - "connector_id": connector_id, - }, - content="Pending...", # Placeholder until processed - content_hash=unique_identifier_hash, # Temporary unique value - updated when ready - unique_identifier_hash=unique_identifier_hash, - embedding=None, - chunks=[], # Empty at creation - safe for async - status=DocumentStatus.pending(), # Pending until processing starts - updated_at=get_current_timestamp(), - created_by_id=user_id, - connector_id=connector_id, - ) - session.add(document) - new_documents_created = True - - events_to_process.append( - { - "document": document, - "is_new": True, - "event_markdown": event_markdown, - "content_hash": content_hash, - "event_id": event_id, - "event_summary": event_summary, - "calendar_id": calendar_id, - "start_time": start_time, - "end_time": end_time, - "location": location, - "description": description, - } - ) + connector_docs.append(doc) except Exception as e: - logger.error(f"Error in Phase 1 for event: {e!s}", exc_info=True) - documents_failed += 1 + logger.error(f"Error building ConnectorDocument for event: {e!s}", exc_info=True) + documents_skipped += 1 continue - # Commit all pending documents - they all appear in UI now - if new_documents_created: - logger.info( - f"Phase 1: Committing {len([e for e in events_to_process if e['is_new']])} pending documents" - ) - await session.commit() + # ── Pipeline: migrate legacy docs + prepare + index ─────────── + pipeline = IndexingPipelineService(session) - # ======================================================================= - # PHASE 2: Process each document one by one - # Each document transitions: pending → processing → ready/failed - # ======================================================================= - logger.info(f"Phase 2: Processing {len(events_to_process)} documents") + await pipeline.migrate_legacy_docs(connector_docs) - for item in events_to_process: - # Send heartbeat periodically + documents = await pipeline.prepare_for_indexing(connector_docs) + + doc_map = { + compute_unique_identifier_hash(cd): cd for cd in connector_docs + } + + documents_indexed = 0 + documents_failed = 0 + last_heartbeat_time = time.time() + + for document in documents: if on_heartbeat_callback: current_time = time.time() if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS: await on_heartbeat_callback(documents_indexed) last_heartbeat_time = current_time - document = item["document"] - try: - # Set to PROCESSING and commit - shows "processing" in UI for THIS document only - document.status = DocumentStatus.processing() - await session.commit() + connector_doc = doc_map.get(document.unique_identifier_hash) + if connector_doc is None: + logger.warning( + f"No matching ConnectorDocument for document {document.id}, skipping" + ) + documents_failed += 1 + continue - # Heavy processing (LLM, embeddings, chunks) + try: user_llm = await get_user_long_context_llm( session, user_id, search_space_id ) - - if user_llm and connector.enable_summary: - document_metadata_for_summary = { - "event_id": item["event_id"], - "event_summary": item["event_summary"], - "calendar_id": item["calendar_id"], - "start_time": item["start_time"], - "end_time": item["end_time"], - "location": item["location"] or "No location", - "document_type": "Google Calendar Event", - "connector_type": "Google Calendar", - } - ( - summary_content, - summary_embedding, - ) = await generate_document_summary( - item["event_markdown"], user_llm, document_metadata_for_summary - ) - else: - summary_content = f"Google Calendar Event: {item['event_summary']}\n\n{item['event_markdown']}" - summary_embedding = embed_text(summary_content) - - chunks = await create_document_chunks(item["event_markdown"]) - - # Update document to READY with actual content - document.title = item["event_summary"] - document.content = summary_content - document.content_hash = item["content_hash"] - document.embedding = summary_embedding - document.document_metadata = { - "event_id": item["event_id"], - "event_summary": item["event_summary"], - "calendar_id": item["calendar_id"], - "start_time": item["start_time"], - "end_time": item["end_time"], - "location": item["location"], - "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), - "connector_id": connector_id, - } - await safe_set_chunks(session, document, chunks) - document.updated_at = get_current_timestamp() - document.status = DocumentStatus.ready() - + await pipeline.index(document, connector_doc, user_llm) documents_indexed += 1 - # Batch commit every 10 documents (for ready status updates) if documents_indexed % 10 == 0: logger.info( f"Committing batch: {documents_indexed} Google Calendar events processed so far" @@ -588,21 +444,12 @@ async def index_google_calendar_events( except Exception as e: logger.error(f"Error processing Calendar event: {e!s}", exc_info=True) - # Mark document as failed with reason (visible in UI) - try: - document.status = DocumentStatus.failed(str(e)) - document.updated_at = get_current_timestamp() - except Exception as status_error: - logger.error( - f"Failed to update document status to failed: {status_error}" - ) documents_failed += 1 continue - # CRITICAL: Always update timestamp (even if 0 documents indexed) so Zero syncs + # ── Finalize ────────────────────────────────────────────────── await update_connector_last_indexed(session, connector, update_last_indexed) - # Final commit for any remaining documents not yet committed in batches logger.info( f"Final commit: Total {documents_indexed} Google Calendar events processed" ) @@ -612,22 +459,18 @@ async def index_google_calendar_events( "Successfully committed all Google Calendar document changes to database" ) except Exception as e: - # Handle any remaining integrity errors gracefully (race conditions, etc.) if ( "duplicate key value violates unique constraint" in str(e).lower() or "uniqueviolationerror" in str(e).lower() ): logger.warning( f"Duplicate content_hash detected during final commit. " - f"This may occur if the same event was indexed by multiple connectors. " f"Rolling back and continuing. Error: {e!s}" ) await session.rollback() - # Don't fail the entire task - some documents may have been successfully indexed else: raise - # Build warning message if there were issues warning_parts = [] if duplicate_content_count > 0: warning_parts.append(f"{duplicate_content_count} duplicate") diff --git a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py index 260db0ce6..92c074812 100644 --- a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py @@ -1,36 +1,41 @@ -"""Google Drive indexer using Surfsense file processors. +"""Google Drive indexer using the shared IndexingPipelineService. -Implements 2-phase document status updates for real-time UI feedback: -- Phase 1: Create all documents with 'pending' status (visible in UI immediately) -- Phase 2: Process each document: pending → processing → ready/failed +File-level pre-filter (_should_skip_file) handles md5/modifiedTime +checks and rename-only detection. download_and_extract_content() +returns markdown which is fed into ConnectorDocument -> pipeline. """ import logging import time from collections.abc import Awaitable, Callable +from sqlalchemy import String, cast, select from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.orm.attributes import flag_modified from app.config import config from app.connectors.google_drive import ( GoogleDriveClient, categorize_change, - download_and_process_file, + download_and_extract_content, fetch_all_changes, get_file_by_id, get_files_in_folder, get_start_page_token, ) +from app.connectors.google_drive.file_types import should_skip_file as skip_mime from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType +from app.indexing_pipeline.connector_document import ConnectorDocument +from app.indexing_pipeline.document_hashing import compute_identifier_hash +from app.indexing_pipeline.indexing_pipeline_service import IndexingPipelineService +from app.services.llm_service import get_user_long_context_llm from app.services.task_logging_service import TaskLoggingService from app.tasks.connector_indexers.base import ( check_document_by_unique_identifier, get_connector_by_id, - get_current_timestamp, update_connector_last_indexed, ) -from app.utils.document_converters import generate_unique_identifier_hash from app.utils.google_credentials import ( COMPOSIO_GOOGLE_CONNECTOR_TYPES, build_composio_credentials, @@ -41,15 +46,423 @@ ACCEPTED_DRIVE_CONNECTOR_TYPES = { SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR, } -# Type hint for heartbeat callback HeartbeatCallbackType = Callable[[int], Awaitable[None]] - -# Heartbeat interval in seconds HEARTBEAT_INTERVAL_SECONDS = 30 logger = logging.getLogger(__name__) +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +async def _should_skip_file( + session: AsyncSession, + file: dict, + search_space_id: int, +) -> tuple[bool, str | None]: + """Pre-filter: detect unchanged / rename-only files. + + Returns (should_skip, message). + Side-effects: migrates legacy Composio hashes, updates renames in-place. + """ + file_id = file.get("id") + file_name = file.get("name", "Unknown") + mime_type = file.get("mimeType", "") + + if skip_mime(mime_type): + return True, "folder/shortcut" + if not file_id: + return True, "missing file_id" + + # --- locate existing document --- + primary_hash = compute_identifier_hash( + DocumentType.GOOGLE_DRIVE_FILE.value, file_id, search_space_id + ) + existing = await check_document_by_unique_identifier(session, primary_hash) + + if not existing: + legacy_hash = compute_identifier_hash( + DocumentType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR.value, file_id, search_space_id + ) + existing = await check_document_by_unique_identifier(session, legacy_hash) + if existing: + existing.unique_identifier_hash = primary_hash + if existing.document_type == DocumentType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR: + existing.document_type = DocumentType.GOOGLE_DRIVE_FILE + logger.info(f"Migrated legacy Composio Drive document: {file_id}") + + if not existing: + result = await session.execute( + select(Document).where( + Document.search_space_id == search_space_id, + Document.document_type.in_([ + DocumentType.GOOGLE_DRIVE_FILE, + DocumentType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR, + ]), + cast(Document.document_metadata["google_drive_file_id"], String) == file_id, + ) + ) + existing = result.scalar_one_or_none() + if existing: + existing.unique_identifier_hash = primary_hash + if existing.document_type == DocumentType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR: + existing.document_type = DocumentType.GOOGLE_DRIVE_FILE + logger.debug(f"Found legacy doc by metadata for file_id: {file_id}") + + if not existing: + return False, None + + # --- content-change check via md5 / modifiedTime --- + incoming_md5 = file.get("md5Checksum") + incoming_mtime = file.get("modifiedTime") + meta = existing.document_metadata or {} + stored_md5 = meta.get("md5_checksum") + stored_mtime = meta.get("modified_time") + + content_unchanged = False + if incoming_md5 and stored_md5: + content_unchanged = incoming_md5 == stored_md5 + elif incoming_md5 and not stored_md5: + return False, None + elif not incoming_md5 and incoming_mtime and stored_mtime: + content_unchanged = incoming_mtime == stored_mtime + elif not incoming_md5: + return False, None + + if not content_unchanged: + return False, None + + # --- rename-only detection --- + old_name = meta.get("FILE_NAME") or meta.get("google_drive_file_name") + if old_name and old_name != file_name: + existing.title = file_name + if not existing.document_metadata: + existing.document_metadata = {} + existing.document_metadata["FILE_NAME"] = file_name + existing.document_metadata["google_drive_file_name"] = file_name + if incoming_mtime: + existing.document_metadata["modified_time"] = incoming_mtime + flag_modified(existing, "document_metadata") + await session.commit() + logger.info(f"Rename-only update: '{old_name}' → '{file_name}'") + return True, f"File renamed: '{old_name}' → '{file_name}'" + + if not DocumentStatus.is_state(existing.status, DocumentStatus.READY): + existing.status = DocumentStatus.ready() + return True, "unchanged" + + +def _build_connector_doc( + file: dict, + markdown: str, + drive_metadata: dict, + *, + connector_id: int, + search_space_id: int, + user_id: str, + enable_summary: bool, +) -> ConnectorDocument: + """Build a ConnectorDocument from Drive file metadata + extracted markdown.""" + file_id = file.get("id", "") + file_name = file.get("name", "Unknown") + + metadata = { + **drive_metadata, + "connector_id": connector_id, + "document_type": "Google Drive File", + "connector_type": "Google Drive", + } + + fallback_summary = f"File: {file_name}\n\n{markdown[:4000]}" + + return ConnectorDocument( + title=file_name, + source_markdown=markdown, + unique_id=file_id, + document_type=DocumentType.GOOGLE_DRIVE_FILE, + search_space_id=search_space_id, + connector_id=connector_id, + created_by_id=user_id, + should_summarize=enable_summary, + fallback_summary=fallback_summary, + metadata=metadata, + ) + + +async def _process_single_file( + drive_client: GoogleDriveClient, + session: AsyncSession, + file: dict, + connector_id: int, + search_space_id: int, + user_id: str, + enable_summary: bool = True, +) -> tuple[int, int, int]: + """Download, extract, and index a single Drive file via the pipeline. + + Returns (indexed, skipped, failed). + """ + file_name = file.get("name", "Unknown") + + try: + skip, msg = await _should_skip_file(session, file, search_space_id) + if skip: + if msg and "renamed" in msg.lower(): + return 1, 0, 0 + return 0, 1, 0 + + markdown, drive_metadata, error = await download_and_extract_content( + drive_client, file + ) + if error or not markdown: + logger.warning(f"ETL failed for {file_name}: {error}") + return 0, 1, 0 + + doc = _build_connector_doc( + file, + markdown, + drive_metadata, + connector_id=connector_id, + search_space_id=search_space_id, + user_id=user_id, + enable_summary=enable_summary, + ) + + pipeline = IndexingPipelineService(session) + documents = await pipeline.prepare_for_indexing([doc]) + if not documents: + return 0, 1, 0 + + from app.indexing_pipeline.document_hashing import compute_unique_identifier_hash + + doc_map = {compute_unique_identifier_hash(doc): doc} + for document in documents: + connector_doc = doc_map.get(document.unique_identifier_hash) + if not connector_doc: + continue + user_llm = await get_user_long_context_llm(session, user_id, search_space_id) + await pipeline.index(document, connector_doc, user_llm) + + logger.info(f"Successfully indexed Google Drive file: {file_name}") + return 1, 0, 0 + + except Exception as e: + logger.error(f"Error processing file {file_name}: {e!s}", exc_info=True) + return 0, 0, 1 + + +async def _remove_document(session: AsyncSession, file_id: str, search_space_id: int): + """Remove a document that was deleted in Drive.""" + primary_hash = compute_identifier_hash( + DocumentType.GOOGLE_DRIVE_FILE.value, file_id, search_space_id + ) + existing = await check_document_by_unique_identifier(session, primary_hash) + + if not existing: + legacy_hash = compute_identifier_hash( + DocumentType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR.value, file_id, search_space_id + ) + existing = await check_document_by_unique_identifier(session, legacy_hash) + + if not existing: + result = await session.execute( + select(Document).where( + Document.search_space_id == search_space_id, + Document.document_type.in_([ + DocumentType.GOOGLE_DRIVE_FILE, + DocumentType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR, + ]), + cast(Document.document_metadata["google_drive_file_id"], String) == file_id, + ) + ) + existing = result.scalar_one_or_none() + + if existing: + await session.delete(existing) + logger.info(f"Removed deleted file document: {file_id}") + + +# --------------------------------------------------------------------------- +# Scan strategies +# --------------------------------------------------------------------------- + +async def _index_full_scan( + drive_client: GoogleDriveClient, + session: AsyncSession, + connector: object, + connector_id: int, + search_space_id: int, + user_id: str, + folder_id: str | None, + folder_name: str, + task_logger: TaskLoggingService, + log_entry: object, + max_files: int, + include_subfolders: bool = False, + on_heartbeat_callback: HeartbeatCallbackType | None = None, + enable_summary: bool = True, +) -> tuple[int, int]: + """Full scan indexing of a folder.""" + await task_logger.log_task_progress( + log_entry, + f"Starting full scan of folder: {folder_name} (include_subfolders={include_subfolders})", + {"stage": "full_scan", "folder_id": folder_id, "include_subfolders": include_subfolders}, + ) + + indexed = 0 + skipped = 0 + failed = 0 + files_processed = 0 + last_heartbeat = time.time() + folders_to_process = [(folder_id, folder_name)] + first_error: str | None = None + + while folders_to_process and files_processed < max_files: + cur_id, cur_name = folders_to_process.pop(0) + page_token = None + + while files_processed < max_files: + files, next_token, error = await get_files_in_folder( + drive_client, cur_id, include_subfolders=True, page_token=page_token, + ) + if error: + logger.error(f"Error listing files in {cur_name}: {error}") + if first_error is None: + first_error = error + break + if not files: + break + + for file in files: + if files_processed >= max_files: + break + + mime = file.get("mimeType", "") + if mime == "application/vnd.google-apps.folder": + if include_subfolders: + folders_to_process.append((file["id"], file.get("name", "Unknown"))) + continue + + files_processed += 1 + + if on_heartbeat_callback: + now = time.time() + if now - last_heartbeat >= HEARTBEAT_INTERVAL_SECONDS: + await on_heartbeat_callback(indexed) + last_heartbeat = now + + i, s, f = await _process_single_file( + drive_client, session, file, + connector_id, search_space_id, user_id, enable_summary, + ) + indexed += i + skipped += s + failed += f + + if indexed > 0 and indexed % 10 == 0: + await session.commit() + + page_token = next_token + if not page_token: + break + + if not files_processed and first_error: + err_lower = first_error.lower() + if "401" in first_error or "invalid credentials" in err_lower or "authError" in first_error: + raise Exception( + f"Google Drive authentication failed. Please re-authenticate. (Error: {first_error})" + ) + raise Exception(f"Failed to list Google Drive files: {first_error}") + + logger.info(f"Full scan complete: {indexed} indexed, {skipped} skipped, {failed} failed") + return indexed, skipped + + +async def _index_with_delta_sync( + drive_client: GoogleDriveClient, + session: AsyncSession, + connector: object, + connector_id: int, + search_space_id: int, + user_id: str, + folder_id: str | None, + start_page_token: str, + task_logger: TaskLoggingService, + log_entry: object, + max_files: int, + include_subfolders: bool = False, + on_heartbeat_callback: HeartbeatCallbackType | None = None, + enable_summary: bool = True, +) -> tuple[int, int]: + """Delta sync using change tracking.""" + await task_logger.log_task_progress( + log_entry, + f"Starting delta sync from token: {start_page_token[:20]}...", + {"stage": "delta_sync", "start_token": start_page_token}, + ) + + changes, _final_token, error = await fetch_all_changes(drive_client, start_page_token, folder_id) + if error: + err_lower = error.lower() + if "401" in error or "invalid credentials" in err_lower or "authError" in error: + raise Exception( + f"Google Drive authentication failed. Please re-authenticate. (Error: {error})" + ) + raise Exception(f"Failed to fetch Google Drive changes: {error}") + + if not changes: + logger.info("No changes detected since last sync") + return 0, 0 + + logger.info(f"Processing {len(changes)} changes") + indexed = 0 + skipped = 0 + failed = 0 + files_processed = 0 + last_heartbeat = time.time() + + for change in changes: + if files_processed >= max_files: + break + files_processed += 1 + change_type = categorize_change(change) + + if change_type in ["removed", "trashed"]: + fid = change.get("fileId") + if fid: + await _remove_document(session, fid, search_space_id) + continue + + file = change.get("file") + if not file: + continue + + if on_heartbeat_callback: + now = time.time() + if now - last_heartbeat >= HEARTBEAT_INTERVAL_SECONDS: + await on_heartbeat_callback(indexed) + last_heartbeat = now + + i, s, f = await _process_single_file( + drive_client, session, file, + connector_id, search_space_id, user_id, enable_summary, + ) + indexed += i + skipped += s + failed += f + + if indexed > 0 and indexed % 10 == 0: + await session.commit() + + logger.info(f"Delta sync complete: {indexed} indexed, {skipped} skipped, {failed} failed") + return indexed, skipped + + +# --------------------------------------------------------------------------- +# Public entry points +# --------------------------------------------------------------------------- + async def index_google_drive_files( session: AsyncSession, connector_id: int, @@ -63,234 +476,125 @@ async def index_google_drive_files( include_subfolders: bool = False, on_heartbeat_callback: HeartbeatCallbackType | None = None, ) -> tuple[int, int, str | None]: - """ - Index Google Drive files for a specific connector. - - Args: - session: Database session - connector_id: ID of the Drive connector - search_space_id: ID of the search space - user_id: ID of the user - folder_id: Specific folder to index (from UI/request, takes precedence) - folder_name: Folder name for display (from UI/request) - use_delta_sync: Whether to use change tracking for incremental sync - update_last_indexed: Whether to update last_indexed_at timestamp - max_files: Maximum number of files to index - include_subfolders: Whether to recursively index files in subfolders - on_heartbeat_callback: Optional callback to update notification during long-running indexing. - - Returns: - Tuple of (number_of_indexed_files, number_of_skipped_files, error_message) - """ + """Index Google Drive files for a specific connector.""" task_logger = TaskLoggingService(session, search_space_id) - log_entry = await task_logger.log_task_start( task_name="google_drive_files_indexing", source="connector_indexing_task", message=f"Starting Google Drive indexing for connector {connector_id}", metadata={ - "connector_id": connector_id, - "user_id": str(user_id), - "folder_id": folder_id, - "use_delta_sync": use_delta_sync, - "max_files": max_files, + "connector_id": connector_id, "user_id": str(user_id), + "folder_id": folder_id, "use_delta_sync": use_delta_sync, "max_files": max_files, }, ) try: - # Accept both native and Composio Drive connectors connector = None for ct in ACCEPTED_DRIVE_CONNECTOR_TYPES: connector = await get_connector_by_id(session, connector_id, ct) if connector: break - if not connector: error_msg = f"Google Drive connector with ID {connector_id} not found" - await task_logger.log_task_failure( - log_entry, error_msg, None, {"error_type": "ConnectorNotFound"} - ) + await task_logger.log_task_failure(log_entry, error_msg, None, {"error_type": "ConnectorNotFound"}) return 0, 0, error_msg await task_logger.log_task_progress( - log_entry, - f"Initializing Google Drive client for connector {connector_id}", + log_entry, f"Initializing Google Drive client for connector {connector_id}", {"stage": "client_initialization"}, ) - # Build credentials based on connector type pre_built_credentials = None if connector.connector_type in COMPOSIO_GOOGLE_CONNECTOR_TYPES: connected_account_id = connector.config.get("composio_connected_account_id") if not connected_account_id: error_msg = f"Composio connected_account_id not found for connector {connector_id}" - await task_logger.log_task_failure( - log_entry, - error_msg, - "Missing Composio account", - {"error_type": "MissingComposioAccount"}, - ) + await task_logger.log_task_failure(log_entry, error_msg, "Missing Composio account", {"error_type": "MissingComposioAccount"}) return 0, 0, error_msg pre_built_credentials = build_composio_credentials(connected_account_id) else: token_encrypted = connector.config.get("_token_encrypted", False) - if token_encrypted: - if not config.SECRET_KEY: - await task_logger.log_task_failure( - log_entry, - f"SECRET_KEY not configured but credentials are marked as encrypted for connector {connector_id}", - "Missing SECRET_KEY for token decryption", - {"error_type": "MissingSecretKey"}, - ) - return ( - 0, - 0, - "SECRET_KEY not configured but credentials are marked as encrypted", - ) - logger.info( - f"Google Drive credentials are encrypted for connector {connector_id}, will decrypt during client initialization" + if token_encrypted and not config.SECRET_KEY: + await task_logger.log_task_failure( + log_entry, "SECRET_KEY not configured but credentials are encrypted", + "Missing SECRET_KEY", {"error_type": "MissingSecretKey"}, ) + return 0, 0, "SECRET_KEY not configured but credentials are marked as encrypted" connector_enable_summary = getattr(connector, "enable_summary", True) - - drive_client = GoogleDriveClient( - session, connector_id, credentials=pre_built_credentials - ) + drive_client = GoogleDriveClient(session, connector_id, credentials=pre_built_credentials) if not folder_id: error_msg = "folder_id is required for Google Drive indexing" - await task_logger.log_task_failure( - log_entry, error_msg, {"error_type": "MissingParameter"} - ) + await task_logger.log_task_failure(log_entry, error_msg, {"error_type": "MissingParameter"}) return 0, 0, error_msg target_folder_id = folder_id target_folder_name = folder_name or "Selected Folder" - logger.info( - f"Indexing Google Drive folder: {target_folder_name} ({target_folder_id})" - ) - folder_tokens = connector.config.get("folder_tokens", {}) start_page_token = folder_tokens.get(target_folder_id) - can_use_delta_sync = ( - use_delta_sync and start_page_token and connector.last_indexed_at - ) + can_use_delta = use_delta_sync and start_page_token and connector.last_indexed_at - if can_use_delta_sync: + if can_use_delta: logger.info(f"Using delta sync for connector {connector_id}") - result = await _index_with_delta_sync( - drive_client=drive_client, - session=session, - connector=connector, - connector_id=connector_id, - search_space_id=search_space_id, - user_id=user_id, - folder_id=target_folder_id, - start_page_token=start_page_token, - task_logger=task_logger, - log_entry=log_entry, - max_files=max_files, - include_subfolders=include_subfolders, - on_heartbeat_callback=on_heartbeat_callback, - enable_summary=connector_enable_summary, + documents_indexed, documents_skipped = await _index_with_delta_sync( + drive_client, session, connector, connector_id, search_space_id, user_id, + target_folder_id, start_page_token, task_logger, log_entry, max_files, + include_subfolders, on_heartbeat_callback, connector_enable_summary, ) - documents_indexed, documents_skipped = result - - # Reconciliation: full scan re-indexes documents that were manually - # deleted from SurfSense but still exist in Google Drive. - # Already-indexed files are skipped via md5/modifiedTime checks, - # so the overhead is just one API listing call + fast DB lookups. logger.info("Running reconciliation scan after delta sync") - reconcile_result = await _index_full_scan( - drive_client=drive_client, - session=session, - connector=connector, - connector_id=connector_id, - search_space_id=search_space_id, - user_id=user_id, - folder_id=target_folder_id, - folder_name=target_folder_name, - task_logger=task_logger, - log_entry=log_entry, - max_files=max_files, - include_subfolders=include_subfolders, - on_heartbeat_callback=on_heartbeat_callback, - enable_summary=connector_enable_summary, + ri, rs = await _index_full_scan( + drive_client, session, connector, connector_id, search_space_id, user_id, + target_folder_id, target_folder_name, task_logger, log_entry, max_files, + include_subfolders, on_heartbeat_callback, connector_enable_summary, ) - documents_indexed += reconcile_result[0] - documents_skipped += reconcile_result[1] + documents_indexed += ri + documents_skipped += rs else: logger.info(f"Using full scan for connector {connector_id}") - result = await _index_full_scan( - drive_client=drive_client, - session=session, - connector=connector, - connector_id=connector_id, - search_space_id=search_space_id, - user_id=user_id, - folder_id=target_folder_id, - folder_name=target_folder_name, - task_logger=task_logger, - log_entry=log_entry, - max_files=max_files, - include_subfolders=include_subfolders, - on_heartbeat_callback=on_heartbeat_callback, - enable_summary=connector_enable_summary, + documents_indexed, documents_skipped = await _index_full_scan( + drive_client, session, connector, connector_id, search_space_id, user_id, + target_folder_id, target_folder_name, task_logger, log_entry, max_files, + include_subfolders, on_heartbeat_callback, connector_enable_summary, ) - documents_indexed, documents_skipped = result - if documents_indexed > 0 or can_use_delta_sync: + if documents_indexed > 0 or can_use_delta: new_token, token_error = await get_start_page_token(drive_client) if new_token and not token_error: - from sqlalchemy.orm.attributes import flag_modified - - # Refresh connector to reload attributes that may have been expired by earlier commits await session.refresh(connector) - if "folder_tokens" not in connector.config: connector.config["folder_tokens"] = {} connector.config["folder_tokens"][target_folder_id] = new_token flag_modified(connector, "config") - await update_connector_last_indexed(session, connector, update_last_indexed) await session.commit() - logger.info("Successfully committed Google Drive indexing changes to database") await task_logger.log_task_success( log_entry, f"Successfully completed Google Drive indexing for connector {connector_id}", { - "files_processed": documents_indexed, - "files_skipped": documents_skipped, - "sync_type": "delta" if can_use_delta_sync else "full", - "folder": target_folder_name, + "files_processed": documents_indexed, "files_skipped": documents_skipped, + "sync_type": "delta" if can_use_delta else "full", "folder": target_folder_name, }, ) - - logger.info( - f"Google Drive indexing completed: {documents_indexed} files indexed, {documents_skipped} skipped" - ) + logger.info(f"Google Drive indexing completed: {documents_indexed} indexed, {documents_skipped} skipped") return documents_indexed, documents_skipped, None except SQLAlchemyError as db_error: await session.rollback() await task_logger.log_task_failure( - log_entry, - f"Database error during Google Drive indexing for connector {connector_id}", - str(db_error), - {"error_type": "SQLAlchemyError"}, + log_entry, f"Database error during Google Drive indexing for connector {connector_id}", + str(db_error), {"error_type": "SQLAlchemyError"}, ) logger.error(f"Database error: {db_error!s}", exc_info=True) return 0, 0, f"Database error: {db_error!s}" except Exception as e: await session.rollback() await task_logger.log_task_failure( - log_entry, - f"Failed to index Google Drive files for connector {connector_id}", - str(e), - {"error_type": type(e).__name__}, + log_entry, f"Failed to index Google Drive files for connector {connector_id}", + str(e), {"error_type": type(e).__name__}, ) logger.error(f"Failed to index Google Drive files: {e!s}", exc_info=True) return 0, 0, f"Failed to index Google Drive files: {e!s}" @@ -304,964 +608,81 @@ async def index_google_drive_single_file( file_id: str, file_name: str | None = None, ) -> tuple[int, str | None]: - """ - Index a single Google Drive file by its ID. - - Args: - session: Database session - connector_id: ID of the Drive connector - search_space_id: ID of the search space - user_id: ID of the user - file_id: Specific file ID to index - file_name: File name for display (optional) - - Returns: - Tuple of (number_of_indexed_files, error_message) - """ + """Index a single Google Drive file by its ID.""" task_logger = TaskLoggingService(session, search_space_id) - log_entry = await task_logger.log_task_start( task_name="google_drive_single_file_indexing", source="connector_indexing_task", message=f"Starting Google Drive single file indexing for file {file_id}", - metadata={ - "connector_id": connector_id, - "user_id": str(user_id), - "file_id": file_id, - "file_name": file_name, - }, + metadata={"connector_id": connector_id, "user_id": str(user_id), "file_id": file_id, "file_name": file_name}, ) try: - # Accept both native and Composio Drive connectors connector = None for ct in ACCEPTED_DRIVE_CONNECTOR_TYPES: connector = await get_connector_by_id(session, connector_id, ct) if connector: break - if not connector: error_msg = f"Google Drive connector with ID {connector_id} not found" - await task_logger.log_task_failure( - log_entry, error_msg, None, {"error_type": "ConnectorNotFound"} - ) + await task_logger.log_task_failure(log_entry, error_msg, None, {"error_type": "ConnectorNotFound"}) return 0, error_msg - await task_logger.log_task_progress( - log_entry, - f"Initializing Google Drive client for connector {connector_id}", - {"stage": "client_initialization"}, - ) - pre_built_credentials = None if connector.connector_type in COMPOSIO_GOOGLE_CONNECTOR_TYPES: connected_account_id = connector.config.get("composio_connected_account_id") if not connected_account_id: error_msg = f"Composio connected_account_id not found for connector {connector_id}" - await task_logger.log_task_failure( - log_entry, - error_msg, - "Missing Composio account", - {"error_type": "MissingComposioAccount"}, - ) + await task_logger.log_task_failure(log_entry, error_msg, "Missing Composio account", {"error_type": "MissingComposioAccount"}) return 0, error_msg pre_built_credentials = build_composio_credentials(connected_account_id) else: token_encrypted = connector.config.get("_token_encrypted", False) - if token_encrypted: - if not config.SECRET_KEY: - await task_logger.log_task_failure( - log_entry, - f"SECRET_KEY not configured but credentials are marked as encrypted for connector {connector_id}", - "Missing SECRET_KEY for token decryption", - {"error_type": "MissingSecretKey"}, - ) - return ( - 0, - "SECRET_KEY not configured but credentials are marked as encrypted", - ) - logger.info( - f"Google Drive credentials are encrypted for connector {connector_id}, will decrypt during client initialization" + if token_encrypted and not config.SECRET_KEY: + await task_logger.log_task_failure( + log_entry, "SECRET_KEY not configured but credentials are encrypted", + "Missing SECRET_KEY", {"error_type": "MissingSecretKey"}, ) + return 0, "SECRET_KEY not configured but credentials are marked as encrypted" connector_enable_summary = getattr(connector, "enable_summary", True) + drive_client = GoogleDriveClient(session, connector_id, credentials=pre_built_credentials) - drive_client = GoogleDriveClient( - session, connector_id, credentials=pre_built_credentials - ) - - # Fetch the file metadata file, error = await get_file_by_id(drive_client, file_id) - if error or not file: error_msg = f"Failed to fetch file {file_id}: {error or 'File not found'}" - await task_logger.log_task_failure( - log_entry, error_msg, {"error_type": "FileNotFound"} - ) + await task_logger.log_task_failure(log_entry, error_msg, {"error_type": "FileNotFound"}) return 0, error_msg display_name = file_name or file.get("name", "Unknown") - logger.info(f"Indexing Google Drive file: {display_name} ({file_id})") - # Create pending document for status visibility - pending_doc, should_skip = await _create_pending_document_for_file( - session=session, - file=file, - connector_id=connector_id, - search_space_id=search_space_id, - user_id=user_id, - ) - - if should_skip: - await task_logger.log_task_progress( - log_entry, - f"File {display_name} is unchanged or not indexable", - {"status": "skipped"}, - ) - return 0, None - - # Commit pending document so it appears in UI - if pending_doc and pending_doc.id is None: - await session.commit() - - # Process the file indexed, _skipped, failed = await _process_single_file( - drive_client=drive_client, - session=session, - file=file, - connector_id=connector_id, - search_space_id=search_space_id, - user_id=user_id, - task_logger=task_logger, - log_entry=log_entry, - pending_document=pending_doc, - enable_summary=connector_enable_summary, + drive_client, session, file, + connector_id, search_space_id, user_id, connector_enable_summary, ) - await session.commit() - logger.info( - "Successfully committed Google Drive file indexing changes to database" - ) if failed > 0: error_msg = f"Failed to index file {display_name}" - await task_logger.log_task_failure( - log_entry, - error_msg, - {"file_name": display_name, "file_id": file_id}, - ) + await task_logger.log_task_failure(log_entry, error_msg, {"file_name": display_name, "file_id": file_id}) return 0, error_msg if indexed > 0: await task_logger.log_task_success( - log_entry, - f"Successfully indexed file {display_name}", - { - "file_name": display_name, - "file_id": file_id, - }, + log_entry, f"Successfully indexed file {display_name}", + {"file_name": display_name, "file_id": file_id}, ) - logger.info(f"Google Drive file indexing completed: {display_name}") return 1, None - else: - await task_logger.log_task_progress( - log_entry, - f"File {display_name} was skipped", - {"status": "skipped"}, - ) - return 0, None + + return 0, None except SQLAlchemyError as db_error: await session.rollback() - await task_logger.log_task_failure( - log_entry, - "Database error during file indexing", - str(db_error), - {"error_type": "SQLAlchemyError"}, - ) + await task_logger.log_task_failure(log_entry, "Database error during file indexing", str(db_error), {"error_type": "SQLAlchemyError"}) logger.error(f"Database error: {db_error!s}", exc_info=True) return 0, f"Database error: {db_error!s}" except Exception as e: await session.rollback() - await task_logger.log_task_failure( - log_entry, - "Failed to index Google Drive file", - str(e), - {"error_type": type(e).__name__}, - ) + await task_logger.log_task_failure(log_entry, "Failed to index Google Drive file", str(e), {"error_type": type(e).__name__}) logger.error(f"Failed to index Google Drive file: {e!s}", exc_info=True) return 0, f"Failed to index Google Drive file: {e!s}" - - -async def _index_full_scan( - drive_client: GoogleDriveClient, - session: AsyncSession, - connector: any, - connector_id: int, - search_space_id: int, - user_id: str, - folder_id: str | None, - folder_name: str, - task_logger: TaskLoggingService, - log_entry: any, - max_files: int, - include_subfolders: bool = False, - on_heartbeat_callback: HeartbeatCallbackType | None = None, - enable_summary: bool = True, -) -> tuple[int, int]: - """Perform full scan indexing of a folder. - - Implements 2-phase document status updates for real-time UI feedback: - - Phase 1: Collect all files and create pending documents (visible in UI immediately) - - Phase 2: Process each file: pending → processing → ready/failed - """ - await task_logger.log_task_progress( - log_entry, - f"Starting full scan of folder: {folder_name} (include_subfolders={include_subfolders})", - { - "stage": "full_scan", - "folder_id": folder_id, - "include_subfolders": include_subfolders, - }, - ) - - documents_indexed = 0 - documents_skipped = 0 - documents_failed = 0 - files_processed = 0 - - # Heartbeat tracking - update notification periodically to prevent appearing stuck - last_heartbeat_time = time.time() - - # ======================================================================= - # PHASE 1: Collect all files and create pending documents - # This makes ALL documents visible in the UI immediately with pending status - # ======================================================================= - files_to_process = [] # List of (file, pending_document or None) - new_documents_created = False - - # Queue of folders to process: (folder_id, folder_name) - folders_to_process = [(folder_id, folder_name)] - first_listing_error: str | None = None - - logger.info("Phase 1: Collecting files and creating pending documents") - - while folders_to_process and files_processed < max_files: - current_folder_id, current_folder_name = folders_to_process.pop(0) - logger.info(f"Scanning folder: {current_folder_name} ({current_folder_id})") - page_token = None - - while files_processed < max_files: - # Get files and folders in current folder - files, next_token, error = await get_files_in_folder( - drive_client, - current_folder_id, - include_subfolders=True, - page_token=page_token, - ) - - if error: - logger.error(f"Error listing files in {current_folder_name}: {error}") - if first_listing_error is None: - first_listing_error = error - break - - if not files: - break - - for file in files: - if files_processed >= max_files: - break - - mime_type = file.get("mimeType", "") - - # If this is a folder and include_subfolders is enabled, queue it for processing - if mime_type == "application/vnd.google-apps.folder": - if include_subfolders: - folders_to_process.append( - (file["id"], file.get("name", "Unknown")) - ) - logger.debug(f"Queued subfolder: {file.get('name', 'Unknown')}") - continue - - files_processed += 1 - - # Create pending document for this file - pending_doc, should_skip = await _create_pending_document_for_file( - session=session, - file=file, - connector_id=connector_id, - search_space_id=search_space_id, - user_id=user_id, - ) - - if should_skip: - documents_skipped += 1 - continue - - if pending_doc and pending_doc.id is None: - # New document was created - new_documents_created = True - - files_to_process.append((file, pending_doc)) - - page_token = next_token - if not page_token: - break - - if not files_to_process and first_listing_error: - error_lower = first_listing_error.lower() - if ( - "401" in first_listing_error - or "invalid credentials" in error_lower - or "authError" in first_listing_error - ): - raise Exception( - f"Google Drive authentication failed. Please re-authenticate. " - f"(Error: {first_listing_error})" - ) - raise Exception(f"Failed to list Google Drive files: {first_listing_error}") - - # Commit all pending documents - they all appear in UI now - if new_documents_created: - logger.info( - f"Phase 1: Committing {len([f for f in files_to_process if f[1] and f[1].id is None])} pending documents" - ) - await session.commit() - - # ======================================================================= - # PHASE 2: Process each file one by one - # Each document transitions: pending → processing → ready/failed - # ======================================================================= - logger.info(f"Phase 2: Processing {len(files_to_process)} files") - - for file, pending_doc in files_to_process: - # Check if it's time for a heartbeat update - if on_heartbeat_callback: - current_time = time.time() - if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS: - await on_heartbeat_callback(documents_indexed) - last_heartbeat_time = current_time - - indexed, skipped, failed = await _process_single_file( - drive_client=drive_client, - session=session, - file=file, - connector_id=connector_id, - search_space_id=search_space_id, - user_id=user_id, - task_logger=task_logger, - log_entry=log_entry, - pending_document=pending_doc, - enable_summary=enable_summary, - ) - - documents_indexed += indexed - documents_skipped += skipped - documents_failed += failed - - if documents_indexed % 10 == 0 and documents_indexed > 0: - await session.commit() - logger.info(f"Committed batch: {documents_indexed} files indexed so far") - - logger.info( - f"Full scan complete: {documents_indexed} indexed, {documents_skipped} skipped, {documents_failed} failed" - ) - return documents_indexed, documents_skipped - - -async def _index_with_delta_sync( - drive_client: GoogleDriveClient, - session: AsyncSession, - connector: any, - connector_id: int, - search_space_id: int, - user_id: str, - folder_id: str | None, - start_page_token: str, - task_logger: TaskLoggingService, - log_entry: any, - max_files: int, - include_subfolders: bool = False, - on_heartbeat_callback: HeartbeatCallbackType | None = None, - enable_summary: bool = True, -) -> tuple[int, int]: - """Perform delta sync indexing using change tracking. - - Note: include_subfolders is accepted for API consistency but delta sync - automatically tracks changes across all folders including subfolders. - - Implements 2-phase document status updates for real-time UI feedback: - - Phase 1: Collect all changes and create pending documents (visible in UI immediately) - - Phase 2: Process each file: pending → processing → ready/failed - """ - await task_logger.log_task_progress( - log_entry, - f"Starting delta sync from token: {start_page_token[:20]}...", - {"stage": "delta_sync", "start_token": start_page_token}, - ) - - changes, _final_token, error = await fetch_all_changes( - drive_client, start_page_token, folder_id - ) - - if error: - logger.error(f"Error fetching changes: {error}") - error_lower = error.lower() - if ( - "401" in error - or "invalid credentials" in error_lower - or "authError" in error - ): - raise Exception( - f"Google Drive authentication failed. Please re-authenticate. " - f"(Error: {error})" - ) - raise Exception(f"Failed to fetch Google Drive changes: {error}") - - if not changes: - logger.info("No changes detected since last sync") - return 0, 0 - - logger.info(f"Processing {len(changes)} changes") - - documents_indexed = 0 - documents_skipped = 0 - documents_failed = 0 - files_processed = 0 - - # Heartbeat tracking - update notification periodically to prevent appearing stuck - last_heartbeat_time = time.time() - - # ======================================================================= - # PHASE 1: Analyze changes and create pending documents for new/modified files - # ======================================================================= - changes_to_process = [] # List of (change, file, pending_document or None) - new_documents_created = False - - logger.info("Phase 1: Analyzing changes and creating pending documents") - - for change in changes: - if files_processed >= max_files: - break - - files_processed += 1 - change_type = categorize_change(change) - - if change_type in ["removed", "trashed"]: - file_id = change.get("fileId") - if file_id: - await _remove_document(session, file_id, search_space_id) - continue - - file = change.get("file") - if not file: - continue - - # Create pending document for this file - pending_doc, should_skip = await _create_pending_document_for_file( - session=session, - file=file, - connector_id=connector_id, - search_space_id=search_space_id, - user_id=user_id, - ) - - if should_skip: - documents_skipped += 1 - continue - - if pending_doc and pending_doc.id is None: - # New document was created - new_documents_created = True - - changes_to_process.append((change, file, pending_doc)) - - # Commit all pending documents - they all appear in UI now - if new_documents_created: - logger.info("Phase 1: Committing pending documents") - await session.commit() - - # ======================================================================= - # PHASE 2: Process each file one by one - # Each document transitions: pending → processing → ready/failed - # ======================================================================= - logger.info(f"Phase 2: Processing {len(changes_to_process)} changes") - - for _, file, pending_doc in changes_to_process: - # Check if it's time for a heartbeat update - if on_heartbeat_callback: - current_time = time.time() - if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS: - await on_heartbeat_callback(documents_indexed) - last_heartbeat_time = current_time - - indexed, skipped, failed = await _process_single_file( - drive_client=drive_client, - session=session, - file=file, - connector_id=connector_id, - search_space_id=search_space_id, - user_id=user_id, - task_logger=task_logger, - log_entry=log_entry, - pending_document=pending_doc, - enable_summary=enable_summary, - ) - - documents_indexed += indexed - documents_skipped += skipped - documents_failed += failed - - if documents_indexed % 10 == 0 and documents_indexed > 0: - await session.commit() - logger.info(f"Committed batch: {documents_indexed} changes processed") - - logger.info( - f"Delta sync complete: {documents_indexed} indexed, {documents_skipped} skipped, {documents_failed} failed" - ) - return documents_indexed, documents_skipped - - -async def _create_pending_document_for_file( - session: AsyncSession, - file: dict, - connector_id: int, - search_space_id: int, - user_id: str, -) -> tuple[Document | None, bool]: - """ - Create a pending document for a Google Drive file if it doesn't exist. - - This is Phase 1 of the 2-phase document status update pattern. - Creates documents with 'pending' status so they appear in UI immediately. - - Args: - session: Database session - file: File metadata from Google Drive API - connector_id: ID of the Drive connector - search_space_id: ID of the search space - user_id: ID of the user - - Returns: - Tuple of (document, should_skip): - - (existing_doc, False): Existing document that needs update - - (new_pending_doc, False): New pending document created - - (None, True): File should be skipped (unchanged, rename-only, or folder) - """ - from app.connectors.google_drive.file_types import should_skip_file - - file_id = file.get("id") - file_name = file.get("name", "Unknown") - mime_type = file.get("mimeType", "") - - # Skip folders and shortcuts - if should_skip_file(mime_type): - return None, True - - if not file_id: - return None, True - - # Generate unique identifier hash for this file - unique_identifier_hash = generate_unique_identifier_hash( - DocumentType.GOOGLE_DRIVE_FILE, file_id, search_space_id - ) - - # Check if document exists (primary hash first, then legacy Composio hash) - existing_document = await check_document_by_unique_identifier( - session, unique_identifier_hash - ) - if not existing_document: - legacy_hash = generate_unique_identifier_hash( - DocumentType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR, file_id, search_space_id - ) - existing_document = await check_document_by_unique_identifier( - session, legacy_hash - ) - if existing_document: - existing_document.unique_identifier_hash = unique_identifier_hash - if ( - existing_document.document_type - == DocumentType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR - ): - existing_document.document_type = DocumentType.GOOGLE_DRIVE_FILE - logger.info(f"Migrated legacy Composio document to native type: {file_id}") - - if existing_document: - # Check if this is a rename-only update (content unchanged) - incoming_md5 = file.get("md5Checksum") - incoming_modified_time = file.get("modifiedTime") - doc_metadata = existing_document.document_metadata or {} - stored_md5 = doc_metadata.get("md5_checksum") - stored_modified_time = doc_metadata.get("modified_time") - - # Determine if content changed - content_unchanged = False - if incoming_md5 and stored_md5: - content_unchanged = incoming_md5 == stored_md5 - elif not incoming_md5 and incoming_modified_time and stored_modified_time: - # Google Workspace file - use modifiedTime as fallback - content_unchanged = incoming_modified_time == stored_modified_time - - if content_unchanged: - # Ensure status is ready (might have been stuck in processing/pending) - if not DocumentStatus.is_state( - existing_document.status, DocumentStatus.READY - ): - existing_document.status = DocumentStatus.ready() - return None, True - - # Content changed - return existing document for update - return existing_document, False - - # Create new pending document - document = Document( - search_space_id=search_space_id, - title=file_name, - document_type=DocumentType.GOOGLE_DRIVE_FILE, - document_metadata={ - "google_drive_file_id": file_id, - "google_drive_file_name": file_name, - "google_drive_mime_type": mime_type, - "connector_id": connector_id, - }, - content="Pending...", # Placeholder until processed - content_hash=unique_identifier_hash, # Temporary unique value - updated when ready - unique_identifier_hash=unique_identifier_hash, - embedding=None, - chunks=[], # Empty at creation - status=DocumentStatus.pending(), # Pending until processing starts - updated_at=get_current_timestamp(), - created_by_id=user_id, - connector_id=connector_id, - ) - session.add(document) - - return document, False - - -async def _check_rename_only_update( - session: AsyncSession, - file: dict, - search_space_id: int, -) -> tuple[bool, str | None]: - """ - Check if a file only needs a rename update (no content change). - - Uses md5Checksum comparison (preferred) or modifiedTime (fallback for Google Workspace files) - to detect if content has changed. This optimization prevents unnecessary ETL API calls - (Docling/LlamaCloud) for rename-only operations. - - Args: - session: Database session - file: File metadata from Google Drive API - search_space_id: ID of the search space - - Returns: - Tuple of (is_rename_only, message) - - (True, message): Only filename changed, document was updated - - (False, None): Content changed or new file, needs full processing - """ - from sqlalchemy import String, cast, select - from sqlalchemy.orm.attributes import flag_modified - - from app.db import Document - - file_id = file.get("id") - file_name = file.get("name", "Unknown") - incoming_md5 = file.get("md5Checksum") # None for Google Workspace files - incoming_modified_time = file.get("modifiedTime") - - if not file_id: - return False, None - - # Try to find existing document by file_id-based hash (primary method) - primary_hash = generate_unique_identifier_hash( - DocumentType.GOOGLE_DRIVE_FILE, file_id, search_space_id - ) - existing_document = await check_document_by_unique_identifier(session, primary_hash) - - # Fallback: legacy Composio hash - if not existing_document: - legacy_hash = generate_unique_identifier_hash( - DocumentType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR, file_id, search_space_id - ) - existing_document = await check_document_by_unique_identifier( - session, legacy_hash - ) - - # Fallback: metadata search (covers old filename-based hashes) - if not existing_document: - result = await session.execute( - select(Document).where( - Document.search_space_id == search_space_id, - Document.document_type.in_( - [ - DocumentType.GOOGLE_DRIVE_FILE, - DocumentType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR, - ] - ), - cast(Document.document_metadata["google_drive_file_id"], String) - == file_id, - ) - ) - existing_document = result.scalar_one_or_none() - if existing_document: - logger.debug(f"Found legacy document by metadata for file_id: {file_id}") - - # Migrate legacy Composio document to native type - if existing_document: - if existing_document.unique_identifier_hash != primary_hash: - existing_document.unique_identifier_hash = primary_hash - if ( - existing_document.document_type - == DocumentType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR - ): - existing_document.document_type = DocumentType.GOOGLE_DRIVE_FILE - logger.info(f"Migrated legacy Composio Drive document: {file_id}") - - if not existing_document: - # New file, needs full processing - return False, None - - # Get stored checksums/timestamps from document metadata - doc_metadata = existing_document.document_metadata or {} - stored_md5 = doc_metadata.get("md5_checksum") - stored_modified_time = doc_metadata.get("modified_time") - - # Determine if content changed using md5Checksum (preferred) or modifiedTime (fallback) - content_unchanged = False - - if incoming_md5 and stored_md5: - # Best case: Compare md5 checksums (only changes when content changes, not on rename) - content_unchanged = incoming_md5 == stored_md5 - logger.debug(f"MD5 comparison for {file_name}: unchanged={content_unchanged}") - elif incoming_md5 and not stored_md5: - # Have incoming md5 but no stored md5 (legacy doc) - need to reprocess to store it - logger.debug( - f"No stored md5 for {file_name}, will reprocess to store md5_checksum" - ) - return False, None - elif not incoming_md5: - # Google Workspace file (no md5Checksum available) - fall back to modifiedTime - # Note: modifiedTime is less reliable as it changes on rename too, but it's the best we have - if incoming_modified_time and stored_modified_time: - content_unchanged = incoming_modified_time == stored_modified_time - logger.debug( - f"ModifiedTime fallback for Google Workspace file {file_name}: unchanged={content_unchanged}" - ) - else: - # No stored modifiedTime (legacy) - reprocess to store it - return False, None - - if content_unchanged: - # Content hasn't changed - check if filename changed - old_name = doc_metadata.get("FILE_NAME") or doc_metadata.get( - "google_drive_file_name" - ) - - if old_name and old_name != file_name: - # Rename-only update - update the document without re-processing - existing_document.title = file_name - if not existing_document.document_metadata: - existing_document.document_metadata = {} - existing_document.document_metadata["FILE_NAME"] = file_name - existing_document.document_metadata["google_drive_file_name"] = file_name - # Also update modified_time for Google Workspace files (since it changed on rename) - if incoming_modified_time: - existing_document.document_metadata["modified_time"] = ( - incoming_modified_time - ) - flag_modified(existing_document, "document_metadata") - await session.commit() - - logger.info( - f"Rename-only update: '{old_name}' → '{file_name}' (skipped ETL)" - ) - return ( - True, - f"File renamed: '{old_name}' → '{file_name}' (no content change)", - ) - else: - # Neither content nor name changed - logger.debug(f"File unchanged: {file_name}") - return True, "File unchanged (same content and name)" - - # Content changed - needs full processing - return False, None - - -async def _process_single_file( - drive_client: GoogleDriveClient, - session: AsyncSession, - file: dict, - connector_id: int, - search_space_id: int, - user_id: str, - task_logger: TaskLoggingService, - log_entry: any, - pending_document: Document | None = None, - enable_summary: bool = True, -) -> tuple[int, int, int]: - """ - Process a single file by downloading and using Surfsense's file processor. - - Implements Phase 2 of the 2-phase document status update pattern. - Updates document status: pending → processing → ready/failed - - Args: - drive_client: Google Drive client - session: Database session - file: File metadata from Google Drive API - connector_id: ID of the connector - search_space_id: ID of the search space - user_id: ID of the user - task_logger: Task logging service - log_entry: Log entry for tracking - pending_document: Optional pending document created in Phase 1 - - Returns: - Tuple of (indexed_count, skipped_count, failed_count) - """ - file_name = file.get("name", "Unknown") - mime_type = file.get("mimeType", "") - file_id = file.get("id") - - try: - logger.info(f"Processing file: {file_name} ({mime_type})") - - # Early check: Is this a rename-only update? - # This optimization prevents downloading and ETL processing for files - # where only the name changed but content is the same. - is_rename_only, rename_message = await _check_rename_only_update( - session=session, - file=file, - search_space_id=search_space_id, - ) - - if is_rename_only: - await task_logger.log_task_progress( - log_entry, - f"Skipped ETL for {file_name}: {rename_message}", - {"status": "rename_only", "reason": rename_message}, - ) - # Return 1 for renamed files (they are "indexed" in the sense that they're updated) - # Return 0 for unchanged files - if "renamed" in (rename_message or "").lower(): - return 1, 0, 0 - return 0, 1, 0 - - # Set document to PROCESSING status if we have a pending document - if pending_document: - pending_document.status = DocumentStatus.processing() - await session.commit() - - _, error, _metadata = await download_and_process_file( - client=drive_client, - file=file, - search_space_id=search_space_id, - user_id=user_id, - session=session, - task_logger=task_logger, - log_entry=log_entry, - connector_id=connector_id, - enable_summary=enable_summary, - ) - - if error: - await task_logger.log_task_progress( - log_entry, - f"Skipped {file_name}: {error}", - {"status": "skipped", "reason": error}, - ) - # Mark pending document as failed if it exists - if pending_document: - pending_document.status = DocumentStatus.failed(error) - pending_document.updated_at = get_current_timestamp() - await session.commit() - return 0, 1, 0 - - # The document was created/updated by download_and_process_file - # Find the document and ensure it has READY status - if file_id: - unique_identifier_hash = generate_unique_identifier_hash( - DocumentType.GOOGLE_DRIVE_FILE, file_id, search_space_id - ) - processed_doc = await check_document_by_unique_identifier( - session, unique_identifier_hash - ) - # Ensure status is READY - if processed_doc and not DocumentStatus.is_state( - processed_doc.status, DocumentStatus.READY - ): - processed_doc.status = DocumentStatus.ready() - processed_doc.updated_at = get_current_timestamp() - await session.commit() - - logger.info(f"Successfully indexed Google Drive file: {file_name}") - return 1, 0, 0 - - except Exception as e: - logger.error(f"Error processing file {file_name}: {e!s}", exc_info=True) - # Mark pending document as failed if it exists - if pending_document: - try: - pending_document.status = DocumentStatus.failed(str(e)) - pending_document.updated_at = get_current_timestamp() - await session.commit() - except Exception as status_error: - logger.error( - f"Failed to update document status to failed: {status_error}" - ) - return 0, 0, 1 - - -async def _remove_document(session: AsyncSession, file_id: str, search_space_id: int): - """Remove a document that was deleted in Drive. - - Handles both new (file_id-based) and legacy (filename-based) hash schemes. - """ - from sqlalchemy import String, cast, select - - from app.db import Document - - # First try with file_id-based hash (new method) - unique_identifier_hash = generate_unique_identifier_hash( - DocumentType.GOOGLE_DRIVE_FILE, file_id, search_space_id - ) - - existing_document = await check_document_by_unique_identifier( - session, unique_identifier_hash - ) - - # Fallback: legacy Composio hash - if not existing_document: - legacy_hash = generate_unique_identifier_hash( - DocumentType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR, file_id, search_space_id - ) - existing_document = await check_document_by_unique_identifier( - session, legacy_hash - ) - - # Fallback: metadata search (covers old filename-based hashes, both native and Composio) - if not existing_document: - result = await session.execute( - select(Document).where( - Document.search_space_id == search_space_id, - Document.document_type.in_( - [ - DocumentType.GOOGLE_DRIVE_FILE, - DocumentType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR, - ] - ), - cast(Document.document_metadata["google_drive_file_id"], String) - == file_id, - ) - ) - existing_document = result.scalar_one_or_none() - if existing_document: - logger.info(f"Found legacy document by metadata for file_id: {file_id}") - - if existing_document: - await session.delete(existing_document) - logger.info(f"Removed deleted file document: {file_id}") diff --git a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py index 384ad85e2..96cc1cbb4 100644 --- a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py @@ -1,11 +1,11 @@ """ Google Gmail connector indexer. -Implements 2-phase document status updates for real-time UI feedback: -- Phase 1: Create all documents with 'pending' status (visible in UI immediately) -- Phase 2: Process each document: pending → processing → ready/failed +Uses the shared IndexingPipelineService for document deduplication, +summarization, chunking, and embedding. """ +import logging import time from collections.abc import Awaitable, Callable from datetime import datetime @@ -15,21 +15,15 @@ from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.ext.asyncio import AsyncSession from app.connectors.google_gmail_connector import GoogleGmailConnector -from app.db import ( - Document, - DocumentStatus, - DocumentType, - SearchSourceConnectorType, +from app.db import DocumentType, SearchSourceConnectorType +from app.indexing_pipeline.connector_document import ConnectorDocument +from app.indexing_pipeline.document_hashing import ( + compute_content_hash, + compute_unique_identifier_hash, ) +from app.indexing_pipeline.indexing_pipeline_service import IndexingPipelineService from app.services.llm_service import get_user_long_context_llm from app.services.task_logging_service import TaskLoggingService -from app.utils.document_converters import ( - create_document_chunks, - embed_text, - generate_content_hash, - generate_document_summary, - generate_unique_identifier_hash, -) from app.utils.google_credentials import ( COMPOSIO_GOOGLE_CONNECTOR_TYPES, build_composio_credentials, @@ -37,12 +31,9 @@ from app.utils.google_credentials import ( from .base import ( calculate_date_range, - check_document_by_unique_identifier, check_duplicate_document_by_hash, get_connector_by_id, - get_current_timestamp, logger, - safe_set_chunks, update_connector_last_indexed, ) @@ -51,13 +42,70 @@ ACCEPTED_GMAIL_CONNECTOR_TYPES = { SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR, } -# Type hint for heartbeat callback HeartbeatCallbackType = Callable[[int], Awaitable[None]] - -# Heartbeat interval in seconds HEARTBEAT_INTERVAL_SECONDS = 30 +def _build_connector_doc( + message: dict, + markdown_content: str, + *, + connector_id: int, + search_space_id: int, + user_id: str, + enable_summary: bool, +) -> ConnectorDocument: + """Map a raw Gmail API message dict to a ConnectorDocument.""" + message_id = message.get("id", "") + thread_id = message.get("threadId", "") + payload = message.get("payload", {}) + headers = payload.get("headers", []) + + subject = "No Subject" + sender = "Unknown Sender" + date_str = "Unknown Date" + + for header in headers: + name = header.get("name", "").lower() + value = header.get("value", "") + if name == "subject": + subject = value + elif name == "from": + sender = value + elif name == "date": + date_str = value + + metadata = { + "message_id": message_id, + "thread_id": thread_id, + "subject": subject, + "sender": sender, + "date": date_str, + "connector_id": connector_id, + "document_type": "Gmail Message", + "connector_type": "Google Gmail", + } + + fallback_summary = ( + f"Google Gmail Message: {subject}\n\n" + f"From: {sender}\nDate: {date_str}\n\n" + f"{markdown_content}" + ) + + return ConnectorDocument( + title=subject, + source_markdown=markdown_content, + unique_id=message_id, + document_type=DocumentType.GOOGLE_GMAIL_CONNECTOR, + search_space_id=search_space_id, + connector_id=connector_id, + created_by_id=user_id, + should_summarize=enable_summary, + fallback_summary=fallback_summary, + metadata=metadata, + ) + + async def index_google_gmail_messages( session: AsyncSession, connector_id: int, @@ -80,7 +128,7 @@ async def index_google_gmail_messages( start_date: Start date for filtering messages (YYYY-MM-DD format) end_date: End date for filtering messages (YYYY-MM-DD format) update_last_indexed: Whether to update the last_indexed_at timestamp (default: True) - max_messages: Maximum number of messages to fetch (default: 100) + max_messages: Maximum number of messages to fetch (default: 1000) on_heartbeat_callback: Optional callback to update notification during long-running indexing. Returns: @@ -88,7 +136,6 @@ async def index_google_gmail_messages( """ task_logger = TaskLoggingService(session, search_space_id) - # Log task start log_entry = await task_logger.log_task_start( task_name="google_gmail_messages_indexing", source="connector_indexing_task", @@ -103,7 +150,7 @@ async def index_google_gmail_messages( ) try: - # Accept both native and Composio Gmail connectors + # ── Connector lookup ────────────────────────────────────────── connector = None for ct in ACCEPTED_GMAIL_CONNECTOR_TYPES: connector = await get_connector_by_id(session, connector_id, ct) @@ -117,7 +164,7 @@ async def index_google_gmail_messages( ) return 0, 0, error_msg - # Build credentials based on connector type + # ── Credential building ─────────────────────────────────────── if connector.connector_type in COMPOSIO_GOOGLE_CONNECTOR_TYPES: connected_account_id = connector.config.get("composio_connected_account_id") if not connected_account_id: @@ -189,6 +236,7 @@ async def index_google_gmail_messages( ) return 0, 0, "Google gmail credentials not found in connector config" + # ── Gmail client init ───────────────────────────────────────── await task_logger.log_task_progress( log_entry, f"Initializing Google gmail client for connector {connector_id}", @@ -199,14 +247,11 @@ async def index_google_gmail_messages( credentials, session, user_id, connector_id ) - # Calculate date range using last_indexed_at if dates not provided - # This ensures Gmail uses the same date logic as other connectors - # (uses last_indexed_at → now, or 365 days back for first-time indexing) calculated_start_date, calculated_end_date = calculate_date_range( connector, start_date, end_date, default_days_back=365 ) - # Fetch recent Google gmail messages + # ── Fetch messages ──────────────────────────────────────────── logger.info( f"Fetching emails for connector {connector_id} " f"from {calculated_start_date} to {calculated_end_date}" @@ -218,7 +263,6 @@ async def index_google_gmail_messages( ) if error: - # Check if this is an authentication error that requires re-authentication error_message = error error_type = "APIError" if ( @@ -243,263 +287,92 @@ async def index_google_gmail_messages( logger.info(f"Found {len(messages)} Google gmail messages to index") - documents_indexed = 0 + # ── Build ConnectorDocuments ────────────────────────────────── + connector_docs: list[ConnectorDocument] = [] documents_skipped = 0 - documents_failed = 0 # Track messages that failed processing - duplicate_content_count = ( - 0 # Track messages skipped due to duplicate content_hash - ) - - # Heartbeat tracking - update notification periodically to prevent appearing stuck - last_heartbeat_time = time.time() - - # ======================================================================= - # PHASE 1: Analyze all messages, create pending documents - # This makes ALL documents visible in the UI immediately with pending status - # ======================================================================= - messages_to_process = [] # List of dicts with document and message data - new_documents_created = False + duplicate_content_count = 0 for message in messages: try: - # Extract message information message_id = message.get("id", "") - thread_id = message.get("threadId", "") - - # Extract headers for subject and sender - payload = message.get("payload", {}) - headers = payload.get("headers", []) - - subject = "No Subject" - sender = "Unknown Sender" - date_str = "Unknown Date" - - for header in headers: - name = header.get("name", "").lower() - value = header.get("value", "") - if name == "subject": - subject = value - elif name == "from": - sender = value - elif name == "date": - date_str = value - if not message_id: - logger.warning(f"Skipping message with missing ID: {subject}") + logger.warning("Skipping message with missing ID") documents_skipped += 1 continue - # Format message to markdown markdown_content = gmail_connector.format_message_to_markdown(message) - if not markdown_content.strip(): - logger.warning(f"Skipping message with no content: {subject}") + logger.warning(f"Skipping message with no content: {message_id}") documents_skipped += 1 continue - # Generate unique identifier hash for this Gmail message - unique_identifier_hash = generate_unique_identifier_hash( - DocumentType.GOOGLE_GMAIL_CONNECTOR, message_id, search_space_id + doc = _build_connector_doc( + message, + markdown_content, + connector_id=connector_id, + search_space_id=search_space_id, + user_id=user_id, + enable_summary=connector.enable_summary, ) - # Generate content hash - content_hash = generate_content_hash(markdown_content, search_space_id) - - # Check if document with this unique identifier already exists - existing_document = await check_document_by_unique_identifier( - session, unique_identifier_hash - ) - - # Fallback: legacy Composio hash - if not existing_document: - legacy_hash = generate_unique_identifier_hash( - DocumentType.COMPOSIO_GMAIL_CONNECTOR, - message_id, - search_space_id, - ) - existing_document = await check_document_by_unique_identifier( - session, legacy_hash - ) - if existing_document: - existing_document.unique_identifier_hash = ( - unique_identifier_hash - ) - if ( - existing_document.document_type - == DocumentType.COMPOSIO_GMAIL_CONNECTOR - ): - existing_document.document_type = ( - DocumentType.GOOGLE_GMAIL_CONNECTOR - ) - logger.info( - f"Migrated legacy Composio Gmail document: {message_id}" - ) - - if existing_document: - # Document exists - check if content has changed - if existing_document.content_hash == content_hash: - # Ensure status is ready (might have been stuck in processing/pending) - if not DocumentStatus.is_state( - existing_document.status, DocumentStatus.READY - ): - existing_document.status = DocumentStatus.ready() - documents_skipped += 1 - continue - - # Queue existing document for update (will be set to processing in Phase 2) - messages_to_process.append( - { - "document": existing_document, - "is_new": False, - "markdown_content": markdown_content, - "content_hash": content_hash, - "message_id": message_id, - "thread_id": thread_id, - "subject": subject, - "sender": sender, - "date_str": date_str, - } - ) - continue - - # Document doesn't exist by unique_identifier_hash - # Check if a document with the same content_hash exists (from another connector) with session.no_autoflush: - duplicate_by_content = await check_duplicate_document_by_hash( - session, content_hash + duplicate = await check_duplicate_document_by_hash( + session, compute_content_hash(doc) ) - - if duplicate_by_content: + if duplicate: logger.info( - f"Gmail message {subject} already indexed by another connector " - f"(existing document ID: {duplicate_by_content.id}, " - f"type: {duplicate_by_content.document_type}). Skipping." + f"Gmail message {doc.title} already indexed by another connector " + f"(existing document ID: {duplicate.id}, " + f"type: {duplicate.document_type}). Skipping." ) duplicate_content_count += 1 documents_skipped += 1 continue - # Create new document with PENDING status (visible in UI immediately) - document = Document( - search_space_id=search_space_id, - title=subject, - document_type=DocumentType.GOOGLE_GMAIL_CONNECTOR, - document_metadata={ - "message_id": message_id, - "thread_id": thread_id, - "subject": subject, - "sender": sender, - "date": date_str, - "connector_id": connector_id, - }, - content="Pending...", # Placeholder until processed - content_hash=unique_identifier_hash, # Temporary unique value - updated when ready - unique_identifier_hash=unique_identifier_hash, - embedding=None, - chunks=[], # Empty at creation - safe for async - status=DocumentStatus.pending(), # Pending until processing starts - updated_at=get_current_timestamp(), - created_by_id=user_id, - connector_id=connector_id, - ) - session.add(document) - new_documents_created = True - - messages_to_process.append( - { - "document": document, - "is_new": True, - "markdown_content": markdown_content, - "content_hash": content_hash, - "message_id": message_id, - "thread_id": thread_id, - "subject": subject, - "sender": sender, - "date_str": date_str, - } - ) + connector_docs.append(doc) except Exception as e: - logger.error(f"Error in Phase 1 for message: {e!s}", exc_info=True) - documents_failed += 1 + logger.error(f"Error building ConnectorDocument for message: {e!s}", exc_info=True) + documents_skipped += 1 continue - # Commit all pending documents - they all appear in UI now - if new_documents_created: - logger.info( - f"Phase 1: Committing {len([m for m in messages_to_process if m['is_new']])} pending documents" - ) - await session.commit() + # ── Pipeline: migrate legacy docs + prepare + index ─────────── + pipeline = IndexingPipelineService(session) - # ======================================================================= - # PHASE 2: Process each document one by one - # Each document transitions: pending → processing → ready/failed - # ======================================================================= - logger.info(f"Phase 2: Processing {len(messages_to_process)} documents") + await pipeline.migrate_legacy_docs(connector_docs) - for item in messages_to_process: - # Send heartbeat periodically + documents = await pipeline.prepare_for_indexing(connector_docs) + + doc_map = { + compute_unique_identifier_hash(cd): cd for cd in connector_docs + } + + documents_indexed = 0 + documents_failed = 0 + last_heartbeat_time = time.time() + + for document in documents: if on_heartbeat_callback: current_time = time.time() if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS: await on_heartbeat_callback(documents_indexed) last_heartbeat_time = current_time - document = item["document"] - try: - # Set to PROCESSING and commit - shows "processing" in UI for THIS document only - document.status = DocumentStatus.processing() - await session.commit() + connector_doc = doc_map.get(document.unique_identifier_hash) + if connector_doc is None: + logger.warning( + f"No matching ConnectorDocument for document {document.id}, skipping" + ) + documents_failed += 1 + continue - # Heavy processing (LLM, embeddings, chunks) + try: user_llm = await get_user_long_context_llm( session, user_id, search_space_id ) - - if user_llm and connector.enable_summary: - document_metadata_for_summary = { - "message_id": item["message_id"], - "thread_id": item["thread_id"], - "subject": item["subject"], - "sender": item["sender"], - "date": item["date_str"], - "document_type": "Gmail Message", - "connector_type": "Google Gmail", - } - ( - summary_content, - summary_embedding, - ) = await generate_document_summary( - item["markdown_content"], - user_llm, - document_metadata_for_summary, - ) - else: - summary_content = f"Google Gmail Message: {item['subject']}\n\nFrom: {item['sender']}\nDate: {item['date_str']}\n\n{item['markdown_content']}" - summary_embedding = embed_text(summary_content) - - chunks = await create_document_chunks(item["markdown_content"]) - - # Update document to READY with actual content - document.title = item["subject"] - document.content = summary_content - document.content_hash = item["content_hash"] - document.embedding = summary_embedding - document.document_metadata = { - "message_id": item["message_id"], - "thread_id": item["thread_id"], - "subject": item["subject"], - "sender": item["sender"], - "date": item["date_str"], - "connector_id": connector_id, - } - await safe_set_chunks(session, document, chunks) - document.updated_at = get_current_timestamp() - document.status = DocumentStatus.ready() - + await pipeline.index(document, connector_doc, user_llm) documents_indexed += 1 - # Batch commit every 10 documents (for ready status updates) if documents_indexed % 10 == 0: logger.info( f"Committing batch: {documents_indexed} Gmail messages processed so far" @@ -508,21 +381,12 @@ async def index_google_gmail_messages( except Exception as e: logger.error(f"Error processing Gmail message: {e!s}", exc_info=True) - # Mark document as failed with reason (visible in UI) - try: - document.status = DocumentStatus.failed(str(e)) - document.updated_at = get_current_timestamp() - except Exception as status_error: - logger.error( - f"Failed to update document status to failed: {status_error}" - ) documents_failed += 1 continue - # CRITICAL: Always update timestamp (even if 0 documents indexed) so Zero syncs + # ── Finalize ────────────────────────────────────────────────── await update_connector_last_indexed(session, connector, update_last_indexed) - # Final commit for any remaining documents not yet committed in batches logger.info(f"Final commit: Total {documents_indexed} Gmail messages processed") try: await session.commit() @@ -530,22 +394,18 @@ async def index_google_gmail_messages( "Successfully committed all Google Gmail document changes to database" ) except Exception as e: - # Handle any remaining integrity errors gracefully (race conditions, etc.) if ( "duplicate key value violates unique constraint" in str(e).lower() or "uniqueviolationerror" in str(e).lower() ): logger.warning( f"Duplicate content_hash detected during final commit. " - f"This may occur if the same message was indexed by multiple connectors. " f"Rolling back and continuing. Error: {e!s}" ) await session.rollback() - # Don't fail the entire task - some documents may have been successfully indexed else: raise - # Build warning message if there were issues warning_parts = [] if duplicate_content_count > 0: warning_parts.append(f"{duplicate_content_count} duplicate") @@ -555,7 +415,6 @@ async def index_google_gmail_messages( total_processed = documents_indexed - # Log success await task_logger.log_task_success( log_entry, f"Successfully completed Google Gmail indexing for connector {connector_id}", diff --git a/surfsense_backend/tests/unit/indexing_pipeline/test_document_hashing.py b/surfsense_backend/tests/unit/indexing_pipeline/test_document_hashing.py index fe536b066..d04d8b048 100644 --- a/surfsense_backend/tests/unit/indexing_pipeline/test_document_hashing.py +++ b/surfsense_backend/tests/unit/indexing_pipeline/test_document_hashing.py @@ -3,6 +3,7 @@ import pytest from app.db import DocumentType from app.indexing_pipeline.document_hashing import ( compute_content_hash, + compute_identifier_hash, compute_unique_identifier_hash, ) @@ -61,3 +62,23 @@ def test_different_content_produces_different_content_hash(make_connector_docume doc_a = make_connector_document(source_markdown="Original content") doc_b = make_connector_document(source_markdown="Updated content") assert compute_content_hash(doc_a) != compute_content_hash(doc_b) + + +def test_compute_identifier_hash_matches_connector_doc_hash(make_connector_document): + """Raw-args hash equals ConnectorDocument hash for equivalent inputs.""" + doc = make_connector_document( + document_type=DocumentType.GOOGLE_GMAIL_CONNECTOR, + unique_id="msg-123", + search_space_id=5, + ) + raw_hash = compute_identifier_hash("GOOGLE_GMAIL_CONNECTOR", "msg-123", 5) + assert raw_hash == compute_unique_identifier_hash(doc) + + +def test_compute_identifier_hash_differs_for_different_inputs(): + """Different arguments produce different hashes.""" + h1 = compute_identifier_hash("GOOGLE_DRIVE_FILE", "file-1", 1) + h2 = compute_identifier_hash("GOOGLE_DRIVE_FILE", "file-2", 1) + h3 = compute_identifier_hash("GOOGLE_DRIVE_FILE", "file-1", 2) + h4 = compute_identifier_hash("COMPOSIO_GOOGLE_DRIVE_CONNECTOR", "file-1", 1) + assert len({h1, h2, h3, h4}) == 4 From 8c41fd91bafc01347e4ab35da3cd79c9c4e7b104 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 25 Mar 2026 18:34:02 +0530 Subject: [PATCH 027/102] feat: add integration tests for indexing pipeline components - Introduced integration tests for Calendar, Drive, and Gmail indexers to ensure proper document creation and migration. - Added tests for batch indexing functionality to validate the processing of multiple documents. - Implemented tests for legacy document migration to verify updates to document types and hashes. - Enhanced test coverage for the IndexingPipelineService to ensure robust functionality across various document types. --- .../test_calendar_pipeline.py | 111 +++++++++++++++ .../indexing_pipeline/test_drive_pipeline.py | 110 +++++++++++++++ .../indexing_pipeline/test_gmail_pipeline.py | 116 ++++++++++++++++ .../indexing_pipeline/test_index_batch.py | 55 ++++++++ .../test_migrate_legacy_docs.py | 92 +++++++++++++ .../indexing_pipeline/test_index_batch.py | 82 +++++++++++ .../test_migrate_legacy_docs.py | 127 ++++++++++++++++++ 7 files changed, 693 insertions(+) create mode 100644 surfsense_backend/tests/integration/indexing_pipeline/test_calendar_pipeline.py create mode 100644 surfsense_backend/tests/integration/indexing_pipeline/test_drive_pipeline.py create mode 100644 surfsense_backend/tests/integration/indexing_pipeline/test_gmail_pipeline.py create mode 100644 surfsense_backend/tests/integration/indexing_pipeline/test_index_batch.py create mode 100644 surfsense_backend/tests/integration/indexing_pipeline/test_migrate_legacy_docs.py create mode 100644 surfsense_backend/tests/unit/indexing_pipeline/test_index_batch.py create mode 100644 surfsense_backend/tests/unit/indexing_pipeline/test_migrate_legacy_docs.py diff --git a/surfsense_backend/tests/integration/indexing_pipeline/test_calendar_pipeline.py b/surfsense_backend/tests/integration/indexing_pipeline/test_calendar_pipeline.py new file mode 100644 index 000000000..6a60c5cc1 --- /dev/null +++ b/surfsense_backend/tests/integration/indexing_pipeline/test_calendar_pipeline.py @@ -0,0 +1,111 @@ +"""Integration tests: Calendar indexer builds ConnectorDocuments that flow through the pipeline.""" + +import pytest +from sqlalchemy import select + +from app.config import config as app_config +from app.db import Document, DocumentStatus, DocumentType +from app.indexing_pipeline.connector_document import ConnectorDocument +from app.indexing_pipeline.document_hashing import compute_identifier_hash +from app.indexing_pipeline.indexing_pipeline_service import IndexingPipelineService + +_EMBEDDING_DIM = app_config.embedding_model_instance.dimension + +pytestmark = pytest.mark.integration + + +def _cal_doc(*, unique_id: str, search_space_id: int, connector_id: int, user_id: str) -> ConnectorDocument: + return ConnectorDocument( + title=f"Event {unique_id}", + source_markdown=f"## Calendar Event\n\nDetails for {unique_id}", + unique_id=unique_id, + document_type=DocumentType.GOOGLE_CALENDAR_CONNECTOR, + search_space_id=search_space_id, + connector_id=connector_id, + created_by_id=user_id, + should_summarize=True, + fallback_summary=f"Calendar: Event {unique_id}", + metadata={ + "event_id": unique_id, + "start_time": "2025-01-15T10:00:00", + "end_time": "2025-01-15T11:00:00", + "document_type": "Google Calendar Event", + }, + ) + + +@pytest.mark.usefixtures("patched_summarize", "patched_embed_texts", "patched_chunk_text") +async def test_calendar_pipeline_creates_ready_document( + db_session, db_search_space, db_connector, db_user, mocker +): + """A Calendar ConnectorDocument flows through prepare + index to a READY document.""" + space_id = db_search_space.id + doc = _cal_doc( + unique_id="evt-1", + search_space_id=space_id, + connector_id=db_connector.id, + user_id=str(db_user.id), + ) + + service = IndexingPipelineService(session=db_session) + prepared = await service.prepare_for_indexing([doc]) + assert len(prepared) == 1 + + await service.index(prepared[0], doc, llm=mocker.Mock()) + + result = await db_session.execute( + select(Document).filter(Document.search_space_id == space_id) + ) + row = result.scalars().first() + + assert row is not None + assert row.document_type == DocumentType.GOOGLE_CALENDAR_CONNECTOR + assert DocumentStatus.is_state(row.status, DocumentStatus.READY) + + +@pytest.mark.usefixtures("patched_summarize", "patched_embed_texts", "patched_chunk_text") +async def test_calendar_legacy_doc_migrated( + db_session, db_search_space, db_connector, db_user, mocker +): + """A legacy Composio Calendar doc is migrated and reused.""" + space_id = db_search_space.id + user_id = str(db_user.id) + evt_id = "evt-legacy-cal" + + legacy_hash = compute_identifier_hash( + DocumentType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR.value, evt_id, space_id + ) + legacy_doc = Document( + title="Old Calendar Event", + document_type=DocumentType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR, + content="old summary", + content_hash=f"ch-{legacy_hash[:12]}", + unique_identifier_hash=legacy_hash, + source_markdown="## Old event", + search_space_id=space_id, + created_by_id=user_id, + embedding=[0.1] * _EMBEDDING_DIM, + status={"state": "ready"}, + ) + db_session.add(legacy_doc) + await db_session.flush() + original_id = legacy_doc.id + + connector_doc = _cal_doc( + unique_id=evt_id, + search_space_id=space_id, + connector_id=db_connector.id, + user_id=user_id, + ) + + service = IndexingPipelineService(session=db_session) + await service.migrate_legacy_docs([connector_doc]) + + result = await db_session.execute(select(Document).filter(Document.id == original_id)) + row = result.scalars().first() + + assert row.document_type == DocumentType.GOOGLE_CALENDAR_CONNECTOR + native_hash = compute_identifier_hash( + DocumentType.GOOGLE_CALENDAR_CONNECTOR.value, evt_id, space_id + ) + assert row.unique_identifier_hash == native_hash diff --git a/surfsense_backend/tests/integration/indexing_pipeline/test_drive_pipeline.py b/surfsense_backend/tests/integration/indexing_pipeline/test_drive_pipeline.py new file mode 100644 index 000000000..32af0b8c1 --- /dev/null +++ b/surfsense_backend/tests/integration/indexing_pipeline/test_drive_pipeline.py @@ -0,0 +1,110 @@ +"""Integration tests: Drive indexer builds ConnectorDocuments that flow through the pipeline.""" + +import pytest +from sqlalchemy import select + +from app.config import config as app_config +from app.db import Document, DocumentStatus, DocumentType +from app.indexing_pipeline.connector_document import ConnectorDocument +from app.indexing_pipeline.document_hashing import compute_identifier_hash +from app.indexing_pipeline.indexing_pipeline_service import IndexingPipelineService + +_EMBEDDING_DIM = app_config.embedding_model_instance.dimension + +pytestmark = pytest.mark.integration + + +def _drive_doc(*, unique_id: str, search_space_id: int, connector_id: int, user_id: str) -> ConnectorDocument: + return ConnectorDocument( + title=f"File {unique_id}.pdf", + source_markdown=f"## Document Content\n\nText from file {unique_id}", + unique_id=unique_id, + document_type=DocumentType.GOOGLE_DRIVE_FILE, + search_space_id=search_space_id, + connector_id=connector_id, + created_by_id=user_id, + should_summarize=True, + fallback_summary=f"File: {unique_id}.pdf", + metadata={ + "google_drive_file_id": unique_id, + "google_drive_file_name": f"{unique_id}.pdf", + "document_type": "Google Drive File", + }, + ) + + +@pytest.mark.usefixtures("patched_summarize", "patched_embed_texts", "patched_chunk_text") +async def test_drive_pipeline_creates_ready_document( + db_session, db_search_space, db_connector, db_user, mocker +): + """A Drive ConnectorDocument flows through prepare + index to a READY document.""" + space_id = db_search_space.id + doc = _drive_doc( + unique_id="file-abc", + search_space_id=space_id, + connector_id=db_connector.id, + user_id=str(db_user.id), + ) + + service = IndexingPipelineService(session=db_session) + prepared = await service.prepare_for_indexing([doc]) + assert len(prepared) == 1 + + await service.index(prepared[0], doc, llm=mocker.Mock()) + + result = await db_session.execute( + select(Document).filter(Document.search_space_id == space_id) + ) + row = result.scalars().first() + + assert row is not None + assert row.document_type == DocumentType.GOOGLE_DRIVE_FILE + assert DocumentStatus.is_state(row.status, DocumentStatus.READY) + + +@pytest.mark.usefixtures("patched_summarize", "patched_embed_texts", "patched_chunk_text") +async def test_drive_legacy_doc_migrated( + db_session, db_search_space, db_connector, db_user, mocker +): + """A legacy Composio Drive doc is migrated and reused.""" + space_id = db_search_space.id + user_id = str(db_user.id) + file_id = "file-legacy-drive" + + legacy_hash = compute_identifier_hash( + DocumentType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR.value, file_id, space_id + ) + legacy_doc = Document( + title="Old Drive File", + document_type=DocumentType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR, + content="old file summary", + content_hash=f"ch-{legacy_hash[:12]}", + unique_identifier_hash=legacy_hash, + source_markdown="## Old file content", + search_space_id=space_id, + created_by_id=user_id, + embedding=[0.1] * _EMBEDDING_DIM, + status={"state": "ready"}, + ) + db_session.add(legacy_doc) + await db_session.flush() + original_id = legacy_doc.id + + connector_doc = _drive_doc( + unique_id=file_id, + search_space_id=space_id, + connector_id=db_connector.id, + user_id=user_id, + ) + + service = IndexingPipelineService(session=db_session) + await service.migrate_legacy_docs([connector_doc]) + + result = await db_session.execute(select(Document).filter(Document.id == original_id)) + row = result.scalars().first() + + assert row.document_type == DocumentType.GOOGLE_DRIVE_FILE + native_hash = compute_identifier_hash( + DocumentType.GOOGLE_DRIVE_FILE.value, file_id, space_id + ) + assert row.unique_identifier_hash == native_hash diff --git a/surfsense_backend/tests/integration/indexing_pipeline/test_gmail_pipeline.py b/surfsense_backend/tests/integration/indexing_pipeline/test_gmail_pipeline.py new file mode 100644 index 000000000..d67420cb7 --- /dev/null +++ b/surfsense_backend/tests/integration/indexing_pipeline/test_gmail_pipeline.py @@ -0,0 +1,116 @@ +"""Integration tests: Gmail indexer builds ConnectorDocuments that flow through the pipeline.""" + +import pytest +from sqlalchemy import select + +from app.config import config as app_config +from app.db import Document, DocumentStatus, DocumentType +from app.indexing_pipeline.connector_document import ConnectorDocument +from app.indexing_pipeline.document_hashing import ( + compute_identifier_hash, + compute_unique_identifier_hash, +) +from app.indexing_pipeline.indexing_pipeline_service import IndexingPipelineService + +_EMBEDDING_DIM = app_config.embedding_model_instance.dimension + +pytestmark = pytest.mark.integration + + +def _gmail_doc(*, unique_id: str, search_space_id: int, connector_id: int, user_id: str) -> ConnectorDocument: + """Build a Gmail-style ConnectorDocument like the real indexer does.""" + return ConnectorDocument( + title=f"Subject for {unique_id}", + source_markdown=f"## Email\n\nBody of {unique_id}", + unique_id=unique_id, + document_type=DocumentType.GOOGLE_GMAIL_CONNECTOR, + search_space_id=search_space_id, + connector_id=connector_id, + created_by_id=user_id, + should_summarize=True, + fallback_summary=f"Gmail: Subject for {unique_id}", + metadata={ + "message_id": unique_id, + "from": "sender@example.com", + "document_type": "Gmail Message", + }, + ) + + +@pytest.mark.usefixtures("patched_summarize", "patched_embed_texts", "patched_chunk_text") +async def test_gmail_pipeline_creates_ready_document( + db_session, db_search_space, db_connector, db_user, mocker +): + """A Gmail ConnectorDocument flows through prepare + index to a READY document.""" + space_id = db_search_space.id + doc = _gmail_doc( + unique_id="msg-pipeline-1", + search_space_id=space_id, + connector_id=db_connector.id, + user_id=str(db_user.id), + ) + + service = IndexingPipelineService(session=db_session) + prepared = await service.prepare_for_indexing([doc]) + assert len(prepared) == 1 + + await service.index(prepared[0], doc, llm=mocker.Mock()) + + result = await db_session.execute( + select(Document).filter(Document.search_space_id == space_id) + ) + row = result.scalars().first() + + assert row is not None + assert row.document_type == DocumentType.GOOGLE_GMAIL_CONNECTOR + assert DocumentStatus.is_state(row.status, DocumentStatus.READY) + assert row.source_markdown == doc.source_markdown + + +@pytest.mark.usefixtures("patched_summarize", "patched_embed_texts", "patched_chunk_text") +async def test_gmail_legacy_doc_migrated_then_reused( + db_session, db_search_space, db_connector, db_user, mocker +): + """A legacy Composio Gmail doc is migrated then reused by the pipeline.""" + space_id = db_search_space.id + user_id = str(db_user.id) + msg_id = "msg-legacy-gmail" + + legacy_hash = compute_identifier_hash( + DocumentType.COMPOSIO_GMAIL_CONNECTOR.value, msg_id, space_id + ) + legacy_doc = Document( + title="Old Gmail", + document_type=DocumentType.COMPOSIO_GMAIL_CONNECTOR, + content="old summary", + content_hash=f"ch-{legacy_hash[:12]}", + unique_identifier_hash=legacy_hash, + source_markdown="## Old content", + search_space_id=space_id, + created_by_id=user_id, + embedding=[0.1] * _EMBEDDING_DIM, + status={"state": "ready"}, + ) + db_session.add(legacy_doc) + await db_session.flush() + original_id = legacy_doc.id + + connector_doc = _gmail_doc( + unique_id=msg_id, + search_space_id=space_id, + connector_id=db_connector.id, + user_id=user_id, + ) + + service = IndexingPipelineService(session=db_session) + await service.migrate_legacy_docs([connector_doc]) + + prepared = await service.prepare_for_indexing([connector_doc]) + assert len(prepared) == 1 + assert prepared[0].id == original_id + assert prepared[0].document_type == DocumentType.GOOGLE_GMAIL_CONNECTOR + + native_hash = compute_identifier_hash( + DocumentType.GOOGLE_GMAIL_CONNECTOR.value, msg_id, space_id + ) + assert prepared[0].unique_identifier_hash == native_hash diff --git a/surfsense_backend/tests/integration/indexing_pipeline/test_index_batch.py b/surfsense_backend/tests/integration/indexing_pipeline/test_index_batch.py new file mode 100644 index 000000000..a40498769 --- /dev/null +++ b/surfsense_backend/tests/integration/indexing_pipeline/test_index_batch.py @@ -0,0 +1,55 @@ +"""Integration tests for IndexingPipelineService.index_batch().""" + +import pytest +from sqlalchemy import select + +from app.db import Document, DocumentStatus, DocumentType +from app.indexing_pipeline.indexing_pipeline_service import IndexingPipelineService + +pytestmark = pytest.mark.integration + + +@pytest.mark.usefixtures("patched_summarize", "patched_embed_texts", "patched_chunk_text") +async def test_index_batch_creates_ready_documents( + db_session, db_search_space, make_connector_document, mocker +): + """index_batch prepares and indexes a batch, resulting in READY documents.""" + space_id = db_search_space.id + docs = [ + make_connector_document( + document_type=DocumentType.GOOGLE_GMAIL_CONNECTOR, + unique_id="msg-batch-1", + search_space_id=space_id, + source_markdown="## Email 1\n\nBody", + ), + make_connector_document( + document_type=DocumentType.GOOGLE_GMAIL_CONNECTOR, + unique_id="msg-batch-2", + search_space_id=space_id, + source_markdown="## Email 2\n\nDifferent body", + ), + ] + + service = IndexingPipelineService(session=db_session) + results = await service.index_batch(docs, llm=mocker.Mock()) + + assert len(results) == 2 + + result = await db_session.execute( + select(Document).filter(Document.search_space_id == space_id) + ) + rows = result.scalars().all() + assert len(rows) == 2 + + for row in rows: + assert DocumentStatus.is_state(row.status, DocumentStatus.READY) + assert row.content is not None + assert row.embedding is not None + + +@pytest.mark.usefixtures("patched_summarize", "patched_embed_texts", "patched_chunk_text") +async def test_index_batch_empty_returns_empty(db_session, mocker): + """index_batch with empty input returns an empty list.""" + service = IndexingPipelineService(session=db_session) + results = await service.index_batch([], llm=mocker.Mock()) + assert results == [] diff --git a/surfsense_backend/tests/integration/indexing_pipeline/test_migrate_legacy_docs.py b/surfsense_backend/tests/integration/indexing_pipeline/test_migrate_legacy_docs.py new file mode 100644 index 000000000..8fc0e7586 --- /dev/null +++ b/surfsense_backend/tests/integration/indexing_pipeline/test_migrate_legacy_docs.py @@ -0,0 +1,92 @@ +"""Integration tests for IndexingPipelineService.migrate_legacy_docs().""" + +import pytest +from sqlalchemy import select + +from app.config import config as app_config +from app.db import Document, DocumentType +from app.indexing_pipeline.document_hashing import compute_identifier_hash +from app.indexing_pipeline.indexing_pipeline_service import IndexingPipelineService + +_EMBEDDING_DIM = app_config.embedding_model_instance.dimension + +pytestmark = pytest.mark.integration + + +async def test_legacy_composio_gmail_doc_migrated_in_db( + db_session, db_search_space, db_user, make_connector_document +): + """A Composio Gmail doc in the DB gets its hash and type updated to native.""" + space_id = db_search_space.id + user_id = str(db_user.id) + unique_id = "msg-legacy-123" + + legacy_hash = compute_identifier_hash( + DocumentType.COMPOSIO_GMAIL_CONNECTOR.value, unique_id, space_id + ) + native_hash = compute_identifier_hash( + DocumentType.GOOGLE_GMAIL_CONNECTOR.value, unique_id, space_id + ) + + legacy_doc = Document( + title="Old Gmail", + document_type=DocumentType.COMPOSIO_GMAIL_CONNECTOR, + content="legacy content", + content_hash=f"ch-{legacy_hash[:12]}", + unique_identifier_hash=legacy_hash, + search_space_id=space_id, + created_by_id=user_id, + embedding=[0.1] * _EMBEDDING_DIM, + status={"state": "ready"}, + ) + db_session.add(legacy_doc) + await db_session.flush() + doc_id = legacy_doc.id + + connector_doc = make_connector_document( + document_type=DocumentType.GOOGLE_GMAIL_CONNECTOR, + unique_id=unique_id, + search_space_id=space_id, + ) + + service = IndexingPipelineService(session=db_session) + await service.migrate_legacy_docs([connector_doc]) + + result = await db_session.execute(select(Document).filter(Document.id == doc_id)) + reloaded = result.scalars().first() + + assert reloaded.unique_identifier_hash == native_hash + assert reloaded.document_type == DocumentType.GOOGLE_GMAIL_CONNECTOR + + +async def test_no_legacy_doc_is_noop( + db_session, db_search_space, make_connector_document +): + """When no legacy document exists, migrate_legacy_docs does nothing.""" + connector_doc = make_connector_document( + document_type=DocumentType.GOOGLE_CALENDAR_CONNECTOR, + unique_id="evt-no-legacy", + search_space_id=db_search_space.id, + ) + + service = IndexingPipelineService(session=db_session) + await service.migrate_legacy_docs([connector_doc]) + + result = await db_session.execute( + select(Document).filter(Document.search_space_id == db_search_space.id) + ) + assert result.scalars().all() == [] + + +async def test_non_google_type_is_skipped( + db_session, db_search_space, make_connector_document +): + """migrate_legacy_docs skips ConnectorDocuments that are not Google types.""" + connector_doc = make_connector_document( + document_type=DocumentType.CLICKUP_CONNECTOR, + unique_id="task-1", + search_space_id=db_search_space.id, + ) + + service = IndexingPipelineService(session=db_session) + await service.migrate_legacy_docs([connector_doc]) diff --git a/surfsense_backend/tests/unit/indexing_pipeline/test_index_batch.py b/surfsense_backend/tests/unit/indexing_pipeline/test_index_batch.py new file mode 100644 index 000000000..dcd097d20 --- /dev/null +++ b/surfsense_backend/tests/unit/indexing_pipeline/test_index_batch.py @@ -0,0 +1,82 @@ +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from app.db import Document, DocumentType +from app.indexing_pipeline.document_hashing import compute_unique_identifier_hash +from app.indexing_pipeline.indexing_pipeline_service import IndexingPipelineService + +pytestmark = pytest.mark.unit + + +@pytest.fixture +def mock_session(): + return AsyncMock() + + +@pytest.fixture +def pipeline(mock_session): + return IndexingPipelineService(mock_session) + + +async def test_calls_prepare_then_index_per_document( + pipeline, make_connector_document +): + """index_batch calls prepare_for_indexing, then index() for each returned doc.""" + doc1 = make_connector_document( + document_type=DocumentType.GOOGLE_GMAIL_CONNECTOR, + unique_id="msg-1", + search_space_id=1, + ) + doc2 = make_connector_document( + document_type=DocumentType.GOOGLE_GMAIL_CONNECTOR, + unique_id="msg-2", + search_space_id=1, + ) + + orm1 = MagicMock(spec=Document) + orm1.unique_identifier_hash = compute_unique_identifier_hash(doc1) + orm2 = MagicMock(spec=Document) + orm2.unique_identifier_hash = compute_unique_identifier_hash(doc2) + + mock_llm = MagicMock() + + pipeline.prepare_for_indexing = AsyncMock(return_value=[orm1, orm2]) + pipeline.index = AsyncMock(side_effect=lambda doc, cdoc, llm: doc) + + results = await pipeline.index_batch([doc1, doc2], mock_llm) + + pipeline.prepare_for_indexing.assert_awaited_once_with([doc1, doc2]) + assert pipeline.index.await_count == 2 + assert results == [orm1, orm2] + + +async def test_empty_input_returns_empty(pipeline): + """Empty connector_docs list returns empty result.""" + pipeline.prepare_for_indexing = AsyncMock(return_value=[]) + + results = await pipeline.index_batch([], MagicMock()) + + assert results == [] + + +async def test_skips_document_without_matching_connector_doc( + pipeline, make_connector_document +): + """If prepare returns a doc whose hash has no matching ConnectorDocument, it's skipped.""" + doc1 = make_connector_document( + document_type=DocumentType.GOOGLE_GMAIL_CONNECTOR, + unique_id="msg-1", + search_space_id=1, + ) + + orphan_orm = MagicMock(spec=Document) + orphan_orm.unique_identifier_hash = "nonexistent-hash" + + pipeline.prepare_for_indexing = AsyncMock(return_value=[orphan_orm]) + pipeline.index = AsyncMock() + + results = await pipeline.index_batch([doc1], MagicMock()) + + pipeline.index.assert_not_awaited() + assert results == [] diff --git a/surfsense_backend/tests/unit/indexing_pipeline/test_migrate_legacy_docs.py b/surfsense_backend/tests/unit/indexing_pipeline/test_migrate_legacy_docs.py new file mode 100644 index 000000000..9334fe678 --- /dev/null +++ b/surfsense_backend/tests/unit/indexing_pipeline/test_migrate_legacy_docs.py @@ -0,0 +1,127 @@ +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from app.db import Document, DocumentType +from app.indexing_pipeline.document_hashing import compute_identifier_hash +from app.indexing_pipeline.indexing_pipeline_service import IndexingPipelineService + +pytestmark = pytest.mark.unit + + +@pytest.fixture +def mock_session(): + session = AsyncMock() + return session + + +@pytest.fixture +def pipeline(mock_session): + return IndexingPipelineService(mock_session) + + +def _make_execute_side_effect(doc_by_hash: dict): + """Return a side_effect for session.execute that resolves documents by hash.""" + + async def _side_effect(stmt): + result = MagicMock() + for h, doc in doc_by_hash.items(): + if h in str(stmt.compile(compile_kwargs={"literal_binds": True})): + result.scalars.return_value.first.return_value = doc + return result + result.scalars.return_value.first.return_value = None + return result + + return _side_effect + + +async def test_updates_hash_and_type_for_legacy_document( + pipeline, mock_session, make_connector_document +): + """Legacy Composio document gets unique_identifier_hash and document_type updated.""" + doc = make_connector_document( + document_type=DocumentType.GOOGLE_GMAIL_CONNECTOR, + unique_id="msg-abc", + search_space_id=1, + ) + + legacy_hash = compute_identifier_hash("COMPOSIO_GMAIL_CONNECTOR", "msg-abc", 1) + native_hash = compute_identifier_hash("GOOGLE_GMAIL_CONNECTOR", "msg-abc", 1) + + existing = MagicMock(spec=Document) + existing.unique_identifier_hash = legacy_hash + existing.document_type = DocumentType.COMPOSIO_GMAIL_CONNECTOR + + result_mock = MagicMock() + result_mock.scalars.return_value.first.return_value = existing + mock_session.execute = AsyncMock(return_value=result_mock) + + await pipeline.migrate_legacy_docs([doc]) + + assert existing.unique_identifier_hash == native_hash + assert existing.document_type == DocumentType.GOOGLE_GMAIL_CONNECTOR + mock_session.commit.assert_awaited_once() + + +async def test_noop_when_no_legacy_document_exists( + pipeline, mock_session, make_connector_document +): + """No updates when no legacy Composio document is found in DB.""" + doc = make_connector_document( + document_type=DocumentType.GOOGLE_GMAIL_CONNECTOR, + unique_id="msg-xyz", + search_space_id=1, + ) + + result_mock = MagicMock() + result_mock.scalars.return_value.first.return_value = None + mock_session.execute = AsyncMock(return_value=result_mock) + + await pipeline.migrate_legacy_docs([doc]) + + mock_session.commit.assert_awaited_once() + + +async def test_skips_non_google_doc_types( + pipeline, mock_session, make_connector_document +): + """Non-Google doc types have no legacy mapping and trigger no DB query.""" + doc = make_connector_document( + document_type=DocumentType.SLACK_CONNECTOR, + unique_id="slack-123", + search_space_id=1, + ) + + await pipeline.migrate_legacy_docs([doc]) + + mock_session.execute.assert_not_awaited() + mock_session.commit.assert_awaited_once() + + +async def test_handles_all_three_google_types( + pipeline, mock_session, make_connector_document +): + """Each native Google type correctly maps to its Composio legacy type.""" + mappings = [ + (DocumentType.GOOGLE_GMAIL_CONNECTOR, "COMPOSIO_GMAIL_CONNECTOR"), + (DocumentType.GOOGLE_CALENDAR_CONNECTOR, "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR"), + (DocumentType.GOOGLE_DRIVE_FILE, "COMPOSIO_GOOGLE_DRIVE_CONNECTOR"), + ] + for native_type, expected_legacy in mappings: + doc = make_connector_document( + document_type=native_type, + unique_id="id-1", + search_space_id=1, + ) + + existing = MagicMock(spec=Document) + existing.document_type = DocumentType(expected_legacy) + + result_mock = MagicMock() + result_mock.scalars.return_value.first.return_value = existing + mock_session.execute = AsyncMock(return_value=result_mock) + mock_session.commit = AsyncMock() + + await pipeline.migrate_legacy_docs([doc]) + + assert existing.document_type == native_type From c3d5c865fdb18b6ea5c039a2e160a5e4cbdbd64f Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 25 Mar 2026 18:51:40 +0530 Subject: [PATCH 028/102] fix: update file skipping logic in Google Drive indexer - Modified the `_should_skip_file` function to prevent skipping of documents with a FAILED status, ensuring they are reprocessed even if their content remains unchanged. - Added a new integration test to verify that FAILED documents are not skipped during the indexing process. --- .../google_drive_indexer.py | 2 +- .../indexing_pipeline/test_drive_pipeline.py | 59 +++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py index 92c074812..af9528bb7 100644 --- a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py @@ -149,7 +149,7 @@ async def _should_skip_file( return True, f"File renamed: '{old_name}' → '{file_name}'" if not DocumentStatus.is_state(existing.status, DocumentStatus.READY): - existing.status = DocumentStatus.ready() + return False, None return True, "unchanged" diff --git a/surfsense_backend/tests/integration/indexing_pipeline/test_drive_pipeline.py b/surfsense_backend/tests/integration/indexing_pipeline/test_drive_pipeline.py index 32af0b8c1..77128ebd9 100644 --- a/surfsense_backend/tests/integration/indexing_pipeline/test_drive_pipeline.py +++ b/surfsense_backend/tests/integration/indexing_pipeline/test_drive_pipeline.py @@ -108,3 +108,62 @@ async def test_drive_legacy_doc_migrated( DocumentType.GOOGLE_DRIVE_FILE.value, file_id, space_id ) assert row.unique_identifier_hash == native_hash + + +async def test_should_skip_file_does_not_skip_failed_document( + db_session, db_search_space, db_user, +): + """A FAILED document with unchanged md5 must NOT be skipped — it needs reprocessing.""" + import importlib + import sys + import types + + pkg = "app.tasks.connector_indexers" + stub = pkg not in sys.modules + if stub: + mod = types.ModuleType(pkg) + mod.__path__ = ["app/tasks/connector_indexers"] + mod.__package__ = pkg + sys.modules[pkg] = mod + + try: + gdm = importlib.import_module( + "app.tasks.connector_indexers.google_drive_indexer" + ) + _should_skip_file = gdm._should_skip_file + finally: + if stub: + sys.modules.pop(pkg, None) + + space_id = db_search_space.id + file_id = "file-failed-drive" + md5 = "abc123deadbeef" + + doc_hash = compute_identifier_hash( + DocumentType.GOOGLE_DRIVE_FILE.value, file_id, space_id + ) + failed_doc = Document( + title="Failed File.pdf", + document_type=DocumentType.GOOGLE_DRIVE_FILE, + content="LLM rate limit exceeded", + content_hash=f"ch-{doc_hash[:12]}", + unique_identifier_hash=doc_hash, + source_markdown="## Real content", + search_space_id=space_id, + created_by_id=str(db_user.id), + embedding=[0.1] * _EMBEDDING_DIM, + status=DocumentStatus.failed("LLM rate limit exceeded"), + document_metadata={ + "google_drive_file_id": file_id, + "google_drive_file_name": "Failed File.pdf", + "md5_checksum": md5, + }, + ) + db_session.add(failed_doc) + await db_session.flush() + + incoming_file = {"id": file_id, "name": "Failed File.pdf", "mimeType": "application/pdf", "md5Checksum": md5} + + should_skip, _msg = await _should_skip_file(db_session, incoming_file, space_id) + + assert not should_skip, "FAILED documents must not be skipped even when content is unchanged" From 91ad36027dbe32f00dbf1a81a8908564ce3dfab4 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 25 Mar 2026 15:57:20 +0200 Subject: [PATCH 029/102] fix: send clipboard text after page load on first open --- surfsense_desktop/src/modules/quick-ask.ts | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/surfsense_desktop/src/modules/quick-ask.ts b/surfsense_desktop/src/modules/quick-ask.ts index 0058a738e..8eb094812 100644 --- a/surfsense_desktop/src/modules/quick-ask.ts +++ b/surfsense_desktop/src/modules/quick-ask.ts @@ -62,10 +62,17 @@ export function registerQuickAsk(): void { const text = clipboard.readText().trim(); if (!text) return; + const isExisting = quickAskWindow && !quickAskWindow.isDestroyed(); const cursor = screen.getCursorScreenPoint(); const win = createQuickAskWindow(cursor.x, cursor.y); - win.webContents.send(IPC_CHANNELS.QUICK_ASK_TEXT, text); + if (isExisting) { + win.webContents.send(IPC_CHANNELS.QUICK_ASK_TEXT, text); + } else { + win.webContents.once('did-finish-load', () => { + win.webContents.send(IPC_CHANNELS.QUICK_ASK_TEXT, text); + }); + } }); if (!ok) { From 4a5a28805d6ce670d01056c67a47b47c46b3a882 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 25 Mar 2026 16:08:39 +0200 Subject: [PATCH 030/102] start at /dashboard, focus cursor after clipboard text --- surfsense_desktop/src/modules/window.ts | 2 +- .../components/assistant-ui/inline-mention-editor.tsx | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/surfsense_desktop/src/modules/window.ts b/surfsense_desktop/src/modules/window.ts index 1b3f3baed..245814cad 100644 --- a/surfsense_desktop/src/modules/window.ts +++ b/surfsense_desktop/src/modules/window.ts @@ -33,7 +33,7 @@ export function createMainWindow(): BrowserWindow { mainWindow?.show(); }); - mainWindow.loadURL(`http://localhost:${getServerPort()}/login`); + mainWindow.loadURL(`http://localhost:${getServerPort()}/dashboard`); mainWindow.webContents.setWindowOpenHandler(({ url }) => { if (url.startsWith('http://localhost')) { diff --git a/surfsense_web/components/assistant-ui/inline-mention-editor.tsx b/surfsense_web/components/assistant-ui/inline-mention-editor.tsx index ab1213a49..ae490cdd0 100644 --- a/surfsense_web/components/assistant-ui/inline-mention-editor.tsx +++ b/surfsense_web/components/assistant-ui/inline-mention-editor.tsx @@ -119,9 +119,16 @@ export const InlineMentionEditor = forwardRef { if (!initialText || !editorRef.current) return; - editorRef.current.textContent = initialText; + editorRef.current.textContent = initialText + "\n"; setIsEmpty(false); onChange?.(initialText, Array.from(mentionedDocs.values())); + editorRef.current.focus(); + const sel = window.getSelection(); + const range = document.createRange(); + range.selectNodeContents(editorRef.current); + range.collapse(false); + sel?.removeAllRanges(); + sel?.addRange(range); }, [initialText]); // eslint-disable-line react-hooks/exhaustive-deps // Focus at the end of the editor From 227fb014d4695908768687e8cec573dea6b05c89 Mon Sep 17 00:00:00 2001 From: likiosliu Date: Wed, 25 Mar 2026 12:32:24 +0800 Subject: [PATCH 031/102] fix: add noopener to window.open call in AnnouncementToastProvider Closes #939 --- .../components/announcements/AnnouncementToastProvider.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/surfsense_web/components/announcements/AnnouncementToastProvider.tsx b/surfsense_web/components/announcements/AnnouncementToastProvider.tsx index 3ae6bf233..6cb1b17e5 100644 --- a/surfsense_web/components/announcements/AnnouncementToastProvider.tsx +++ b/surfsense_web/components/announcements/AnnouncementToastProvider.tsx @@ -34,7 +34,7 @@ function showAnnouncementToast(announcement: Announcement) { label: announcement.link.label, onClick: () => { if (announcement.link?.url.startsWith("http")) { - window.open(announcement.link.url, "_blank"); + window.open(announcement.link.url, "_blank", "noopener,noreferrer"); } else if (announcement.link?.url) { window.location.href = announcement.link.url; } From 2a7b50408f5219003c4e6469c9e38bc174f369d9 Mon Sep 17 00:00:00 2001 From: likiosliu Date: Wed, 25 Mar 2026 12:32:56 +0800 Subject: [PATCH 032/102] fix: add missing type dependency in DocumentTypeChip truncation check Closes #946 --- .../documents/(manage)/components/DocumentTypeIcon.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentTypeIcon.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentTypeIcon.tsx index c07f34935..25eeb4cab 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentTypeIcon.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentTypeIcon.tsx @@ -63,7 +63,7 @@ export function DocumentTypeChip({ type, className }: { type: string; className? checkTruncation(); window.addEventListener("resize", checkTruncation); return () => window.removeEventListener("resize", checkTruncation); - }, []); + }, [type]); const chip = ( Date: Wed, 25 Mar 2026 16:58:46 +0800 Subject: [PATCH 033/102] fix: avoid stale event reference in register page retry action Extract submission logic into submitForm() so the retry toast action does not capture the original SyntheticEvent, which may be recycled by React by the time the user clicks retry. Closes #945 --- surfsense_web/app/(home)/register/page.tsx | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/surfsense_web/app/(home)/register/page.tsx b/surfsense_web/app/(home)/register/page.tsx index 35fa2b668..96fab2c6a 100644 --- a/surfsense_web/app/(home)/register/page.tsx +++ b/surfsense_web/app/(home)/register/page.tsx @@ -43,9 +43,12 @@ export default function RegisterPage() { } }, [router]); - const handleSubmit = async (e: React.FormEvent) => { + const handleSubmit = (e: React.FormEvent) => { e.preventDefault(); + submitForm(); + }; + const submitForm = async () => { // Form validation if (password !== confirmPassword) { setError({ title: t("password_mismatch"), message: t("passwords_no_match_desc") }); @@ -140,7 +143,7 @@ export default function RegisterPage() { if (shouldRetry(errorCode)) { toastOptions.action = { label: tCommon("retry"), - onClick: () => handleSubmit(e), + onClick: () => submitForm(), }; } From 97e7e73baf76340c79a47522c2b11f3983aae78a Mon Sep 17 00:00:00 2001 From: likiosliu Date: Wed, 25 Mar 2026 16:55:26 +0800 Subject: [PATCH 034/102] fix: remove unnecessary useEffect + useState for AUTH_TYPE constant AUTH_TYPE is a static module-level import that never changes. No need for useState + useEffect; use the constant directly. Closes #941 --- surfsense_web/app/(home)/login/LocalLoginForm.tsx | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/surfsense_web/app/(home)/login/LocalLoginForm.tsx b/surfsense_web/app/(home)/login/LocalLoginForm.tsx index 9481976a9..7c85eedbd 100644 --- a/surfsense_web/app/(home)/login/LocalLoginForm.tsx +++ b/surfsense_web/app/(home)/login/LocalLoginForm.tsx @@ -5,7 +5,7 @@ import { AnimatePresence, motion } from "motion/react"; import Link from "next/link"; import { useRouter } from "next/navigation"; import { useTranslations } from "next-intl"; -import { useEffect, useState } from "react"; +import { useState } from "react"; import { loginMutationAtom } from "@/atoms/auth/auth-mutation.atoms"; import { Spinner } from "@/components/ui/spinner"; import { getAuthErrorDetails, isNetworkError } from "@/lib/auth-errors"; @@ -25,15 +25,10 @@ export function LocalLoginForm() { title: null, message: null, }); - const [authType, setAuthType] = useState(null); + const authType = AUTH_TYPE; const router = useRouter(); const [{ mutateAsync: login, isPending: isLoggingIn }] = useAtom(loginMutationAtom); - useEffect(() => { - // Get the auth type from centralized config - setAuthType(AUTH_TYPE); - }, []); - const handleSubmit = async (e: React.FormEvent) => { e.preventDefault(); setError({ title: null, message: null }); // Clear any previous errors From e5cabf95e46f75854d56a5ca6eb2315ccce9752b Mon Sep 17 00:00:00 2001 From: likiosliu Date: Wed, 25 Mar 2026 12:34:30 +0800 Subject: [PATCH 035/102] fix: clean up recursive setTimeout calls in onboarding tour - Add cancelled flag to prevent state updates after unmount in checkAndStartTour retry loop - Store retry timer ID in a ref and clear it on cleanup in updateTarget effect Closes #950 --- surfsense_web/components/onboarding-tour.tsx | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/surfsense_web/components/onboarding-tour.tsx b/surfsense_web/components/onboarding-tour.tsx index 03fad87b6..114a46141 100644 --- a/surfsense_web/components/onboarding-tour.tsx +++ b/surfsense_web/components/onboarding-tour.tsx @@ -436,6 +436,7 @@ export function OnboardingTour() { const { resolvedTheme } = useTheme(); const pathname = usePathname(); const retryCountRef = useRef(0); + const retryTimerRef = useRef | null>(null); const maxRetries = 10; // Track previous user ID to detect user changes const previousUserIdRef = useRef(null); @@ -477,7 +478,7 @@ export function OnboardingTour() { retryCountRef.current = 0; } else if (retryCountRef.current < maxRetries) { retryCountRef.current++; - setTimeout(() => { + retryTimerRef.current = setTimeout(() => { const retryEl = document.querySelector(currentStep.target); if (retryEl) { setTargetEl(retryEl); @@ -487,6 +488,10 @@ export function OnboardingTour() { } }, 200); } + + return () => { + if (retryTimerRef.current) clearTimeout(retryTimerRef.current); + }; }, [currentStep]); // Check if tour should run: localStorage + data validation with user ID tracking @@ -556,7 +561,11 @@ export function OnboardingTour() { } // User is new and hasn't seen tour - wait for DOM elements and start tour + let cancelled = false; + const checkAndStartTour = () => { + if (cancelled) return; + // Check if all required elements exist const connectorEl = document.querySelector(TOUR_STEPS[0].target); const documentsEl = document.querySelector(TOUR_STEPS[1].target); @@ -578,7 +587,10 @@ export function OnboardingTour() { // Start checking after initial delay const timer = setTimeout(checkAndStartTour, 500); - return () => clearTimeout(timer); + return () => { + cancelled = true; + clearTimeout(timer); + }; }, [mounted, user?.id, searchSpaceId, pathname, threadsData, documentTypeCounts, connectors]); // Update position on resize/scroll From f3d6ae95e1dc1c731e9f36de450a4a64053bcf37 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 25 Mar 2026 16:22:32 +0200 Subject: [PATCH 036/102] fix: pull-based clipboard text and cursor at end with br --- surfsense_desktop/src/modules/quick-ask.ts | 21 +++++++++---------- surfsense_desktop/src/preload.ts | 8 +------ .../assistant-ui/inline-mention-editor.tsx | 4 +++- .../components/assistant-ui/thread.tsx | 2 +- surfsense_web/types/window.d.ts | 2 +- 5 files changed, 16 insertions(+), 21 deletions(-) diff --git a/surfsense_desktop/src/modules/quick-ask.ts b/surfsense_desktop/src/modules/quick-ask.ts index 8eb094812..45bfe7c04 100644 --- a/surfsense_desktop/src/modules/quick-ask.ts +++ b/surfsense_desktop/src/modules/quick-ask.ts @@ -1,10 +1,11 @@ -import { BrowserWindow, clipboard, globalShortcut, screen } from 'electron'; +import { BrowserWindow, clipboard, globalShortcut, ipcMain, screen } from 'electron'; import path from 'path'; import { IPC_CHANNELS } from '../ipc/channels'; import { getServerPort } from './server'; const SHORTCUT = 'CommandOrControl+Option+S'; let quickAskWindow: BrowserWindow | null = null; +let pendingText = ''; function hideQuickAsk(): void { if (quickAskWindow && !quickAskWindow.isDestroyed()) { @@ -62,22 +63,20 @@ export function registerQuickAsk(): void { const text = clipboard.readText().trim(); if (!text) return; - const isExisting = quickAskWindow && !quickAskWindow.isDestroyed(); + pendingText = text; const cursor = screen.getCursorScreenPoint(); - const win = createQuickAskWindow(cursor.x, cursor.y); - - if (isExisting) { - win.webContents.send(IPC_CHANNELS.QUICK_ASK_TEXT, text); - } else { - win.webContents.once('did-finish-load', () => { - win.webContents.send(IPC_CHANNELS.QUICK_ASK_TEXT, text); - }); - } + createQuickAskWindow(cursor.x, cursor.y); }); if (!ok) { console.log(`Quick-ask: failed to register ${SHORTCUT}`); } + + ipcMain.handle(IPC_CHANNELS.QUICK_ASK_TEXT, () => { + const text = pendingText; + pendingText = ''; + return text; + }); } export function unregisterQuickAsk(): void { diff --git a/surfsense_desktop/src/preload.ts b/surfsense_desktop/src/preload.ts index ca894d6b3..9c857de1b 100644 --- a/surfsense_desktop/src/preload.ts +++ b/surfsense_desktop/src/preload.ts @@ -17,11 +17,5 @@ contextBridge.exposeInMainWorld('electronAPI', { ipcRenderer.removeListener(IPC_CHANNELS.DEEP_LINK, listener); }; }, - onQuickAskText: (callback: (text: string) => void) => { - const listener = (_event: unknown, text: string) => callback(text); - ipcRenderer.on(IPC_CHANNELS.QUICK_ASK_TEXT, listener); - return () => { - ipcRenderer.removeListener(IPC_CHANNELS.QUICK_ASK_TEXT, listener); - }; - }, + getQuickAskText: () => ipcRenderer.invoke(IPC_CHANNELS.QUICK_ASK_TEXT), }); diff --git a/surfsense_web/components/assistant-ui/inline-mention-editor.tsx b/surfsense_web/components/assistant-ui/inline-mention-editor.tsx index ae490cdd0..40bd16f8d 100644 --- a/surfsense_web/components/assistant-ui/inline-mention-editor.tsx +++ b/surfsense_web/components/assistant-ui/inline-mention-editor.tsx @@ -119,7 +119,9 @@ export const InlineMentionEditor = forwardRef { if (!initialText || !editorRef.current) return; - editorRef.current.textContent = initialText + "\n"; + editorRef.current.innerText = initialText; + editorRef.current.appendChild(document.createElement("br")); + editorRef.current.appendChild(document.createElement("br")); setIsEmpty(false); onChange?.(initialText, Array.from(mentionedDocs.values())); editorRef.current.focus(); diff --git a/surfsense_web/components/assistant-ui/thread.tsx b/surfsense_web/components/assistant-ui/thread.tsx index eb98fd025..64ec79ef2 100644 --- a/surfsense_web/components/assistant-ui/thread.tsx +++ b/surfsense_web/components/assistant-ui/thread.tsx @@ -332,7 +332,7 @@ const Composer: FC = () => { const [quickAskText, setQuickAskText] = useState(); useEffect(() => { - return window.electronAPI?.onQuickAskText((text) => { + window.electronAPI?.getQuickAskText().then((text) => { if (text) setQuickAskText(text); }); }, []); diff --git a/surfsense_web/types/window.d.ts b/surfsense_web/types/window.d.ts index 6c7e192db..c8b4c004a 100644 --- a/surfsense_web/types/window.d.ts +++ b/surfsense_web/types/window.d.ts @@ -10,7 +10,7 @@ interface ElectronAPI { openExternal: (url: string) => void; getAppVersion: () => Promise; onDeepLink: (callback: (url: string) => void) => () => void; - onQuickAskText: (callback: (text: string) => void) => () => void; + getQuickAskText: () => Promise; } declare global { From 7cbb67f0dd88f557670b6767c966e50e42dd911c Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 25 Mar 2026 16:35:23 +0200 Subject: [PATCH 037/102] scroll to cursor after inserting clipboard text --- .../components/assistant-ui/inline-mention-editor.tsx | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/surfsense_web/components/assistant-ui/inline-mention-editor.tsx b/surfsense_web/components/assistant-ui/inline-mention-editor.tsx index 40bd16f8d..66389cade 100644 --- a/surfsense_web/components/assistant-ui/inline-mention-editor.tsx +++ b/surfsense_web/components/assistant-ui/inline-mention-editor.tsx @@ -119,11 +119,13 @@ export const InlineMentionEditor = forwardRef { if (!initialText || !editorRef.current) return; + // Insert the text and add trailing line breaks for typing space editorRef.current.innerText = initialText; editorRef.current.appendChild(document.createElement("br")); editorRef.current.appendChild(document.createElement("br")); setIsEmpty(false); onChange?.(initialText, Array.from(mentionedDocs.values())); + // Place cursor at the end of the content editorRef.current.focus(); const sel = window.getSelection(); const range = document.createRange(); @@ -131,6 +133,11 @@ export const InlineMentionEditor = forwardRef Date: Wed, 25 Mar 2026 17:05:03 +0200 Subject: [PATCH 038/102] keep panel floating, handle window opens, disable fullscreen --- surfsense_desktop/src/modules/quick-ask.ts | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/surfsense_desktop/src/modules/quick-ask.ts b/surfsense_desktop/src/modules/quick-ask.ts index 45bfe7c04..81b74d986 100644 --- a/surfsense_desktop/src/modules/quick-ask.ts +++ b/surfsense_desktop/src/modules/quick-ask.ts @@ -1,4 +1,4 @@ -import { BrowserWindow, clipboard, globalShortcut, ipcMain, screen } from 'electron'; +import { BrowserWindow, clipboard, globalShortcut, ipcMain, screen, shell } from 'electron'; import path from 'path'; import { IPC_CHANNELS } from '../ipc/channels'; import { getServerPort } from './server'; @@ -28,6 +28,7 @@ function createQuickAskWindow(x: number, y: number): BrowserWindow { y, type: 'panel', resizable: true, + fullscreenable: false, webPreferences: { preload: path.join(__dirname, 'preload.js'), contextIsolation: true, @@ -44,7 +45,13 @@ function createQuickAskWindow(x: number, y: number): BrowserWindow { quickAskWindow?.show(); }); - quickAskWindow.on('blur', hideQuickAsk); + quickAskWindow.webContents.setWindowOpenHandler(({ url }) => { + if (url.startsWith('http://localhost')) { + return { action: 'allow' }; + } + shell.openExternal(url); + return { action: 'deny' }; + }); quickAskWindow.on('closed', () => { quickAskWindow = null; From bbd5ee8a1979c67a4ab43b1cadca904445a4008f Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 25 Mar 2026 20:35:23 +0530 Subject: [PATCH 039/102] feat: enhance Google Calendar event update functionality - Introduced helper functions `_is_date_only` and `_build_time_body` to streamline the construction of event start and end times for all-day and timed events. - Refactored the `create_update_calendar_event_tool` to utilize the new helper functions, improving code readability and maintainability. - Updated the Google Calendar sync service to ensure proper handling of calendar IDs with a default fallback to "primary". - Modified the ApprovalCard component to simplify the construction of event update arguments, enhancing clarity and reducing redundancy. --- .../tools/google_calendar/update_event.py | 34 +++++++------- .../google_calendar/kb_sync_service.py | 4 +- .../hitl-edit-panel/hitl-edit-panel.tsx | 2 +- .../tool-ui/google-calendar/update-event.tsx | 46 ++++++++++++++----- 4 files changed, 55 insertions(+), 31 deletions(-) diff --git a/surfsense_backend/app/agents/new_chat/tools/google_calendar/update_event.py b/surfsense_backend/app/agents/new_chat/tools/google_calendar/update_event.py index 4b57cf2e3..a114c84f4 100644 --- a/surfsense_backend/app/agents/new_chat/tools/google_calendar/update_event.py +++ b/surfsense_backend/app/agents/new_chat/tools/google_calendar/update_event.py @@ -14,6 +14,20 @@ from app.services.google_calendar import GoogleCalendarToolMetadataService logger = logging.getLogger(__name__) +def _is_date_only(value: str) -> bool: + """Return True when *value* looks like a bare date (YYYY-MM-DD) with no time component.""" + return len(value) <= 10 and "T" not in value + + +def _build_time_body(value: str, context: dict[str, Any] | Any) -> dict[str, str]: + """Build a Google Calendar start/end body using ``date`` for all-day + events and ``dateTime`` for timed events.""" + if _is_date_only(value): + return {"date": value} + tz = context.get("timezone", "UTC") if isinstance(context, dict) else "UTC" + return {"dateTime": value, "timeZone": tz} + + def create_update_calendar_event_tool( db_session: AsyncSession | None = None, search_space_id: int | None = None, @@ -255,25 +269,13 @@ def create_update_calendar_event_tool( if final_new_summary is not None: update_body["summary"] = final_new_summary if final_new_start_datetime is not None: - tz = ( - context.get("timezone", "UTC") - if isinstance(context, dict) - else "UTC" + update_body["start"] = _build_time_body( + final_new_start_datetime, context ) - update_body["start"] = { - "dateTime": final_new_start_datetime, - "timeZone": tz, - } if final_new_end_datetime is not None: - tz = ( - context.get("timezone", "UTC") - if isinstance(context, dict) - else "UTC" + update_body["end"] = _build_time_body( + final_new_end_datetime, context ) - update_body["end"] = { - "dateTime": final_new_end_datetime, - "timeZone": tz, - } if final_new_description is not None: update_body["description"] = final_new_description if final_new_location is not None: diff --git a/surfsense_backend/app/services/google_calendar/kb_sync_service.py b/surfsense_backend/app/services/google_calendar/kb_sync_service.py index 59afa116e..3cda02b9b 100644 --- a/surfsense_backend/app/services/google_calendar/kb_sync_service.py +++ b/surfsense_backend/app/services/google_calendar/kb_sync_service.py @@ -209,8 +209,8 @@ class GoogleCalendarKBSyncService: ) calendar_id = (document.document_metadata or {}).get( - "calendar_id", "primary" - ) + "calendar_id" + ) or "primary" live_event = await loop.run_in_executor( None, lambda: ( diff --git a/surfsense_web/components/hitl-edit-panel/hitl-edit-panel.tsx b/surfsense_web/components/hitl-edit-panel/hitl-edit-panel.tsx index 25e896842..e8bc1a6cd 100644 --- a/surfsense_web/components/hitl-edit-panel/hitl-edit-panel.tsx +++ b/surfsense_web/components/hitl-edit-panel/hitl-edit-panel.tsx @@ -185,7 +185,7 @@ function DateTimePickerField({ type="time" value={time} onChange={handleTimeChange} - className="w-[120px] text-sm shrink-0 pl-1.5 [&::-webkit-calendar-picker-indicator]:order-first [&::-webkit-calendar-picker-indicator]:mr-1" + className="w-[120px] text-sm shrink-0 appearance-none [&::-webkit-calendar-picker-indicator]:hidden [&::-webkit-calendar-picker-indicator]:appearance-none" /> ); diff --git a/surfsense_web/components/tool-ui/google-calendar/update-event.tsx b/surfsense_web/components/tool-ui/google-calendar/update-event.tsx index cc941bab8..661032628 100644 --- a/surfsense_web/components/tool-ui/google-calendar/update-event.tsx +++ b/surfsense_web/components/tool-ui/google-calendar/update-event.tsx @@ -253,6 +253,12 @@ function ApprovalCard({ String(effectiveNewDescription ?? "") !== (event?.description ?? ""); const buildFinalArgs = useCallback(() => { + const base = { + event_id: event?.event_id, + document_id: event?.document_id, + connector_id: account?.id, + }; + if (pendingEdits) { const attendeesArr = pendingEdits.attendees ? pendingEdits.attendees @@ -260,22 +266,38 @@ function ApprovalCard({ .map((e) => e.trim()) .filter(Boolean) : null; + const origAttendees = event?.attendees?.map((a) => a.email) ?? []; + return { - event_id: event?.event_id, - document_id: event?.document_id, - connector_id: account?.id, - new_summary: pendingEdits.summary || null, - new_description: pendingEdits.description || null, - new_start_datetime: pendingEdits.start_datetime || null, - new_end_datetime: pendingEdits.end_datetime || null, - new_location: pendingEdits.location || null, - new_attendees: attendeesArr, + ...base, + new_summary: + pendingEdits.summary && pendingEdits.summary !== (event?.summary ?? "") + ? pendingEdits.summary + : null, + new_description: + pendingEdits.description !== (event?.description ?? "") + ? pendingEdits.description || null + : null, + new_start_datetime: + pendingEdits.start_datetime && pendingEdits.start_datetime !== (event?.start ?? "") + ? pendingEdits.start_datetime + : null, + new_end_datetime: + pendingEdits.end_datetime && pendingEdits.end_datetime !== (event?.end ?? "") + ? pendingEdits.end_datetime + : null, + new_location: + pendingEdits.location !== (event?.location ?? "") + ? pendingEdits.location || null + : null, + new_attendees: + attendeesArr && attendeesArr.join(",") !== origAttendees.join(",") + ? attendeesArr + : null, }; } return { - event_id: event?.event_id, - document_id: event?.document_id, - connector_id: account?.id, + ...base, new_summary: actionArgs.new_summary ?? null, new_description: actionArgs.new_description ?? null, new_start_datetime: actionArgs.new_start_datetime ?? null, From 743172785da56dbd9a750cd380917dfea480ba0b Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 25 Mar 2026 18:00:00 +0200 Subject: [PATCH 040/102] escape to hide, clamp panel to screen bounds, disable maximize --- surfsense_desktop/src/modules/quick-ask.ts | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/surfsense_desktop/src/modules/quick-ask.ts b/surfsense_desktop/src/modules/quick-ask.ts index 81b74d986..f7753d1d6 100644 --- a/surfsense_desktop/src/modules/quick-ask.ts +++ b/surfsense_desktop/src/modules/quick-ask.ts @@ -13,6 +13,15 @@ function hideQuickAsk(): void { } } +function clampToScreen(x: number, y: number, w: number, h: number): { x: number; y: number } { + const display = screen.getDisplayNearestPoint({ x, y }); + const { x: dx, y: dy, width: dw, height: dh } = display.workArea; + return { + x: Math.max(dx, Math.min(x, dx + dw - w)), + y: Math.max(dy, Math.min(y, dy + dh - h)), + }; +} + function createQuickAskWindow(x: number, y: number): BrowserWindow { if (quickAskWindow && !quickAskWindow.isDestroyed()) { quickAskWindow.setPosition(x, y); @@ -29,6 +38,7 @@ function createQuickAskWindow(x: number, y: number): BrowserWindow { type: 'panel', resizable: true, fullscreenable: false, + maximizable: false, webPreferences: { preload: path.join(__dirname, 'preload.js'), contextIsolation: true, @@ -45,6 +55,10 @@ function createQuickAskWindow(x: number, y: number): BrowserWindow { quickAskWindow?.show(); }); + quickAskWindow.webContents.on('before-input-event', (_event, input) => { + if (input.key === 'Escape') hideQuickAsk(); + }); + quickAskWindow.webContents.setWindowOpenHandler(({ url }) => { if (url.startsWith('http://localhost')) { return { action: 'allow' }; @@ -72,7 +86,8 @@ export function registerQuickAsk(): void { pendingText = text; const cursor = screen.getCursorScreenPoint(); - createQuickAskWindow(cursor.x, cursor.y); + const pos = clampToScreen(cursor.x, cursor.y, 450, 550); + createQuickAskWindow(pos.x, pos.y); }); if (!ok) { From 2af4784e63a7193946a991369f16e6c06a446c59 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 25 Mar 2026 18:26:28 +0200 Subject: [PATCH 041/102] cross-platform panel: toolbar fallback for Windows/Linux --- surfsense_desktop/src/modules/quick-ask.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/surfsense_desktop/src/modules/quick-ask.ts b/surfsense_desktop/src/modules/quick-ask.ts index f7753d1d6..9009099a3 100644 --- a/surfsense_desktop/src/modules/quick-ask.ts +++ b/surfsense_desktop/src/modules/quick-ask.ts @@ -35,7 +35,9 @@ function createQuickAskWindow(x: number, y: number): BrowserWindow { height: 550, x, y, - type: 'panel', + ...(process.platform === 'darwin' + ? { type: 'panel' as const } + : { type: 'toolbar' as const, alwaysOnTop: true }), resizable: true, fullscreenable: false, maximizable: false, From f7640671f3dfe96e0432ba8c2df88a38bb9fd6ba Mon Sep 17 00:00:00 2001 From: likiosliu Date: Thu, 26 Mar 2026 11:49:45 +0800 Subject: [PATCH 042/102] fix: replace router.push with Link for static navigation in UserDropdown Enables route prefetching and follows Next.js best practices. Removes unused useRouter import. --- surfsense_web/components/UserDropdown.tsx | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/surfsense_web/components/UserDropdown.tsx b/surfsense_web/components/UserDropdown.tsx index b79ab6e79..197db6287 100644 --- a/surfsense_web/components/UserDropdown.tsx +++ b/surfsense_web/components/UserDropdown.tsx @@ -1,7 +1,7 @@ "use client"; import { BadgeCheck, LogOut } from "lucide-react"; -import { useRouter } from "next/navigation"; +import Link from "next/link"; import { useState } from "react"; import { Avatar, AvatarFallback, AvatarImage } from "@/components/ui/avatar"; import { Button } from "@/components/ui/button"; @@ -27,7 +27,6 @@ export function UserDropdown({ avatar: string; }; }) { - const router = useRouter(); const [isLoggingOut, setIsLoggingOut] = useState(false); const handleLogout = async () => { @@ -75,12 +74,11 @@ export function UserDropdown({ - router.push(`/dashboard/api-key`)} - className="text-xs md:text-sm" - > - - API Key + + + + API Key + From 3d762ccf6216bac059079b513c63472cfd19c861 Mon Sep 17 00:00:00 2001 From: likiosliu Date: Thu, 26 Mar 2026 11:50:39 +0800 Subject: [PATCH 043/102] fix: remove unnecessary "use client" from pure presentational components These components only render JSX with props and don't use hooks, event handlers, or browser APIs. --- surfsense_web/app/docs/sidebar-separator.tsx | 2 -- surfsense_web/components/Logo.tsx | 2 -- .../components/announcements/AnnouncementsEmptyState.tsx | 2 -- .../public-chat-snapshots/public-chat-snapshots-empty-state.tsx | 2 -- 4 files changed, 8 deletions(-) diff --git a/surfsense_web/app/docs/sidebar-separator.tsx b/surfsense_web/app/docs/sidebar-separator.tsx index 36fff09a4..ceb56b160 100644 --- a/surfsense_web/app/docs/sidebar-separator.tsx +++ b/surfsense_web/app/docs/sidebar-separator.tsx @@ -1,5 +1,3 @@ -"use client"; - import type { Separator } from "fumadocs-core/page-tree"; export function SidebarSeparator({ item }: { item: Separator }) { diff --git a/surfsense_web/components/Logo.tsx b/surfsense_web/components/Logo.tsx index 76446ca59..121185757 100644 --- a/surfsense_web/components/Logo.tsx +++ b/surfsense_web/components/Logo.tsx @@ -1,5 +1,3 @@ -"use client"; - import Image from "next/image"; import Link from "next/link"; import { cn } from "@/lib/utils"; diff --git a/surfsense_web/components/announcements/AnnouncementsEmptyState.tsx b/surfsense_web/components/announcements/AnnouncementsEmptyState.tsx index b4551f56a..9ed1ea45d 100644 --- a/surfsense_web/components/announcements/AnnouncementsEmptyState.tsx +++ b/surfsense_web/components/announcements/AnnouncementsEmptyState.tsx @@ -1,5 +1,3 @@ -"use client"; - import { BellOff } from "lucide-react"; export function AnnouncementsEmptyState() { diff --git a/surfsense_web/components/public-chat-snapshots/public-chat-snapshots-empty-state.tsx b/surfsense_web/components/public-chat-snapshots/public-chat-snapshots-empty-state.tsx index 4bb295217..4a4a57770 100644 --- a/surfsense_web/components/public-chat-snapshots/public-chat-snapshots-empty-state.tsx +++ b/surfsense_web/components/public-chat-snapshots/public-chat-snapshots-empty-state.tsx @@ -1,5 +1,3 @@ -"use client"; - import { Link2Off } from "lucide-react"; interface PublicChatSnapshotsEmptyStateProps { From 2cf6866c10e7e7219ffcf205b33744972dbed866 Mon Sep 17 00:00:00 2001 From: JoeMakuta Date: Thu, 26 Mar 2026 11:59:04 +0200 Subject: [PATCH 044/102] Add loader on new chat route --- .../new-chat/[[...chat_id]]/page.tsx | 38 ++-------------- .../[search_space_id]/new-chat/loading.tsx | 45 +++++++++++++++++++ 2 files changed, 48 insertions(+), 35 deletions(-) create mode 100644 surfsense_web/app/dashboard/[search_space_id]/new-chat/loading.tsx diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx index 8578d2dcb..1cbfca2df 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx @@ -74,6 +74,7 @@ import { trackChatMessageSent, trackChatResponseReceived, } from "@/lib/posthog/events"; +import Loading from "../loading"; /** * After a tool produces output, mark any previously-decided interrupt tool @@ -1527,40 +1528,7 @@ export default function NewChatPage() { // Show loading state only when loading an existing thread if (isInitializing) { return ( -
-
- {/* User message */} -
- -
- - {/* Assistant message */} -
- - - -
- - {/* User message */} -
- -
- - {/* Assistant message */} -
- - - -
-
- - {/* Input bar */} -
-
- -
-
-
+ ); } @@ -1597,4 +1565,4 @@ export default function NewChatPage() { ); -} +} \ No newline at end of file diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/loading.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/loading.tsx new file mode 100644 index 000000000..1f47fb95a --- /dev/null +++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/loading.tsx @@ -0,0 +1,45 @@ +import { Skeleton } from "@/components/ui/skeleton"; + +export default function Loading() { + return ( +
+
+ {/* User message */} +
+ +
+ + {/* Assistant message */} +
+ + + +
+ + {/* User message */} +
+ +
+ + {/* Assistant message */} +
+ + + +
+ + {/* User message */} +
+ +
+
+ + {/* Input bar */} +
+
+ +
+
+
+ ); +} From 80ede9849ab5feed4c0cb3be0935422315811d1f Mon Sep 17 00:00:00 2001 From: JoeMakuta Date: Thu, 26 Mar 2026 12:19:18 +0200 Subject: [PATCH 045/102] Add loading od logs route --- .../[search_space_id]/logs/loading.tsx | 136 ++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 surfsense_web/app/dashboard/[search_space_id]/logs/loading.tsx diff --git a/surfsense_web/app/dashboard/[search_space_id]/logs/loading.tsx b/surfsense_web/app/dashboard/[search_space_id]/logs/loading.tsx new file mode 100644 index 000000000..318c2836b --- /dev/null +++ b/surfsense_web/app/dashboard/[search_space_id]/logs/loading.tsx @@ -0,0 +1,136 @@ +"use client"; + +import { motion } from "motion/react"; +import { Skeleton } from "@/components/ui/skeleton"; + +export default function Loading() { + return ( + + {/* Summary Dashboard Skeleton */} + + {[...Array(4)].map((_, i) => ( +
+
+ + +
+
+ + +
+
+ ))} +
+ + {/* Header Section Skeleton */} + +
+ + +
+ +
+ + {/* Filters Skeleton */} + +
+ + + + +
+
+ + {/* Table Skeleton */} + + {/* Table Header */} +
+ + + + + + + +
+ + {/* Table Rows */} + {[...Array(6)].map((_, i) => ( +
+ + + +
+ + +
+
+ + +
+
+ + +
+ +
+ ))} +
+ + {/* Pagination Skeleton */} +
+ + + + + + + + + +
+ + + + +
+
+
+ ); +} From d535851ad51ad574fd99664ec553c13786b0e5b5 Mon Sep 17 00:00:00 2001 From: JoeMakuta Date: Thu, 26 Mar 2026 12:44:46 +0200 Subject: [PATCH 046/102] Add loader to more-pages route --- .../dashboard/[search_space_id]/more-pages/loading.tsx | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 surfsense_web/app/dashboard/[search_space_id]/more-pages/loading.tsx diff --git a/surfsense_web/app/dashboard/[search_space_id]/more-pages/loading.tsx b/surfsense_web/app/dashboard/[search_space_id]/more-pages/loading.tsx new file mode 100644 index 000000000..9a0c45f3f --- /dev/null +++ b/surfsense_web/app/dashboard/[search_space_id]/more-pages/loading.tsx @@ -0,0 +1,10 @@ +import { Skeleton } from "@/components/ui/skeleton"; + +export default function Loading() { + return ( +
+ + +
+ ); +} From e4d5c119ef6879aa9a58ef59140b36e00695b8f1 Mon Sep 17 00:00:00 2001 From: JoeMakuta Date: Thu, 26 Mar 2026 13:33:29 +0200 Subject: [PATCH 047/102] fix: convert public chat page to server component --- surfsense_web/app/public/[token]/page.tsx | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/surfsense_web/app/public/[token]/page.tsx b/surfsense_web/app/public/[token]/page.tsx index 530664ac6..10cd19732 100644 --- a/surfsense_web/app/public/[token]/page.tsx +++ b/surfsense_web/app/public/[token]/page.tsx @@ -1,11 +1,11 @@ -"use client"; - -import { useParams } from "next/navigation"; import { PublicChatView } from "@/components/public-chat/public-chat-view"; -export default function PublicChatPage() { - const params = useParams(); - const token = params.token as string; +export default async function PublicChatPage({ + params, +}: { + params: Promise<{ token: string }>; +}) { + const { token } = await params; - return ; + return ; } From f00f7826ed09c94d32ee85fc75cd101946dec133 Mon Sep 17 00:00:00 2001 From: JoeMakuta Date: Thu, 26 Mar 2026 15:11:39 +0200 Subject: [PATCH 048/102] fix: improve semantics and structure of settings forms in GeneralSettingsManager and PromptConfigManager --- .../settings/general-settings-manager.tsx | 300 ++++++++------- .../settings/prompt-config-manager.tsx | 344 ++++++++++-------- 2 files changed, 350 insertions(+), 294 deletions(-) diff --git a/surfsense_web/components/settings/general-settings-manager.tsx b/surfsense_web/components/settings/general-settings-manager.tsx index a9482001d..8a847b629 100644 --- a/surfsense_web/components/settings/general-settings-manager.tsx +++ b/surfsense_web/components/settings/general-settings-manager.tsx @@ -9,160 +9,190 @@ import { toast } from "sonner"; import { updateSearchSpaceMutationAtom } from "@/atoms/search-spaces/search-space-mutation.atoms"; import { Alert, AlertDescription } from "@/components/ui/alert"; import { Button } from "@/components/ui/button"; -import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card"; +import { + Card, + CardContent, + CardDescription, + CardHeader, + CardTitle, +} from "@/components/ui/card"; import { Input } from "@/components/ui/input"; import { Label } from "@/components/ui/label"; import { Skeleton } from "@/components/ui/skeleton"; import { searchSpacesApiService } from "@/lib/apis/search-spaces-api.service"; import { cacheKeys } from "@/lib/query-client/cache-keys"; +import { Spinner } from "../ui/spinner"; interface GeneralSettingsManagerProps { - searchSpaceId: number; + searchSpaceId: number; } -export function GeneralSettingsManager({ searchSpaceId }: GeneralSettingsManagerProps) { - const t = useTranslations("searchSpaceSettings"); - const tCommon = useTranslations("common"); - const { - data: searchSpace, - isLoading: loading, - refetch: fetchSearchSpace, - } = useQuery({ - queryKey: cacheKeys.searchSpaces.detail(searchSpaceId.toString()), - queryFn: () => searchSpacesApiService.getSearchSpace({ id: searchSpaceId }), - enabled: !!searchSpaceId, - }); +export function GeneralSettingsManager({ + searchSpaceId, +}: GeneralSettingsManagerProps) { + const t = useTranslations("searchSpaceSettings"); + const tCommon = useTranslations("common"); + const { + data: searchSpace, + isLoading: loading, + refetch: fetchSearchSpace, + } = useQuery({ + queryKey: cacheKeys.searchSpaces.detail(searchSpaceId.toString()), + queryFn: () => searchSpacesApiService.getSearchSpace({ id: searchSpaceId }), + enabled: !!searchSpaceId, + }); - const { mutateAsync: updateSearchSpace } = useAtomValue(updateSearchSpaceMutationAtom); + const { mutateAsync: updateSearchSpace } = useAtomValue( + updateSearchSpaceMutationAtom, + ); - const [name, setName] = useState(""); - const [description, setDescription] = useState(""); - const [saving, setSaving] = useState(false); - const [hasChanges, setHasChanges] = useState(false); + const [name, setName] = useState(""); + const [description, setDescription] = useState(""); + const [saving, setSaving] = useState(false); + const [hasChanges, setHasChanges] = useState(false); - // Initialize state from fetched search space - useEffect(() => { - if (searchSpace) { - setName(searchSpace.name || ""); - setDescription(searchSpace.description || ""); - setHasChanges(false); - } - }, [searchSpace]); + // Initialize state from fetched search space + useEffect(() => { + if (searchSpace) { + setName(searchSpace.name || ""); + setDescription(searchSpace.description || ""); + setHasChanges(false); + } + }, [searchSpace]); - // Track changes - useEffect(() => { - if (searchSpace) { - const currentName = searchSpace.name || ""; - const currentDescription = searchSpace.description || ""; - const changed = currentName !== name || currentDescription !== description; - setHasChanges(changed); - } - }, [searchSpace, name, description]); + // Track changes + useEffect(() => { + if (searchSpace) { + const currentName = searchSpace.name || ""; + const currentDescription = searchSpace.description || ""; + const changed = + currentName !== name || currentDescription !== description; + setHasChanges(changed); + } + }, [searchSpace, name, description]); - const handleSave = async () => { - try { - setSaving(true); + const handleSave = async () => { + try { + setSaving(true); - await updateSearchSpace({ - id: searchSpaceId, - data: { - name: name.trim(), - description: description.trim() || undefined, - }, - }); + await updateSearchSpace({ + id: searchSpaceId, + data: { + name: name.trim(), + description: description.trim() || undefined, + }, + }); - setHasChanges(false); - await fetchSearchSpace(); - } catch (error: any) { - console.error("Error saving search space details:", error); - toast.error(error.message || "Failed to save search space details"); - } finally { - setSaving(false); - } - }; + setHasChanges(false); + await fetchSearchSpace(); + } catch (error: any) { + console.error("Error saving search space details:", error); + toast.error(error.message || "Failed to save search space details"); + } finally { + setSaving(false); + } + }; - if (loading) { - return ( -
- - - - - - - - - - -
- ); - } + const onSubmit = (e: React.FormEvent) => { + e.preventDefault(); + handleSave(); + }; - return ( -
- - - - Update your search space name and description. These details help identify and organize - your workspace. - - + if (loading) { + return ( +
+ + + + + + + + + + +
+ ); + } - {/* Search Space Details Card */} - - - Search Space Details - - Manage the basic information for this search space. - - - -
- - setName(e.target.value)} - className="text-sm md:text-base h-9 md:h-10" - /> -

- {t("general_name_description")} -

-
+ return ( +
+ + + + Update your search space name and description. These details help + identify and organize your workspace. + + -
- - setDescription(e.target.value)} - className="text-sm md:text-base h-9 md:h-10" - /> -

- {t("general_description_description")} -

-
- - + {/* Search Space Details Card */} +
+ + + + Search Space Details + + + Manage the basic information for this search space. + + + +
+ + setName(e.target.value)} + className="text-sm md:text-base h-9 md:h-10" + /> +

+ {t("general_name_description")} +

+
- {/* Action Buttons */} -
- -
-
- ); +
+ + setDescription(e.target.value)} + className="text-sm md:text-base h-9 md:h-10" + /> +

+ {t("general_description_description")} +

+
+
+
+ + {/* Action Buttons */} +
+ +
+ +
+ ); } diff --git a/surfsense_web/components/settings/prompt-config-manager.tsx b/surfsense_web/components/settings/prompt-config-manager.tsx index b9c9c2fc8..dc3a15a7d 100644 --- a/surfsense_web/components/settings/prompt-config-manager.tsx +++ b/surfsense_web/components/settings/prompt-config-manager.tsx @@ -6,187 +6,213 @@ import { useEffect, useState } from "react"; import { toast } from "sonner"; import { Alert, AlertDescription } from "@/components/ui/alert"; import { Button } from "@/components/ui/button"; -import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card"; +import { + Card, + CardContent, + CardDescription, + CardHeader, + CardTitle, +} from "@/components/ui/card"; import { Label } from "@/components/ui/label"; import { Skeleton } from "@/components/ui/skeleton"; import { Textarea } from "@/components/ui/textarea"; import { searchSpacesApiService } from "@/lib/apis/search-spaces-api.service"; import { authenticatedFetch } from "@/lib/auth-utils"; import { cacheKeys } from "@/lib/query-client/cache-keys"; +import { Spinner } from "../ui/spinner"; interface PromptConfigManagerProps { - searchSpaceId: number; + searchSpaceId: number; } -export function PromptConfigManager({ searchSpaceId }: PromptConfigManagerProps) { - const { - data: searchSpace, - isLoading: loading, - refetch: fetchSearchSpace, - } = useQuery({ - queryKey: cacheKeys.searchSpaces.detail(searchSpaceId.toString()), - queryFn: () => searchSpacesApiService.getSearchSpace({ id: searchSpaceId }), - enabled: !!searchSpaceId, - }); +export function PromptConfigManager({ + searchSpaceId, +}: PromptConfigManagerProps) { + const { + data: searchSpace, + isLoading: loading, + refetch: fetchSearchSpace, + } = useQuery({ + queryKey: cacheKeys.searchSpaces.detail(searchSpaceId.toString()), + queryFn: () => searchSpacesApiService.getSearchSpace({ id: searchSpaceId }), + enabled: !!searchSpaceId, + }); - const [customInstructions, setCustomInstructions] = useState(""); - const [saving, setSaving] = useState(false); - const [hasChanges, setHasChanges] = useState(false); + const [customInstructions, setCustomInstructions] = useState(""); + const [saving, setSaving] = useState(false); + const [hasChanges, setHasChanges] = useState(false); - // Initialize state from fetched search space - useEffect(() => { - if (searchSpace) { - setCustomInstructions(searchSpace.qna_custom_instructions || ""); - setHasChanges(false); - } - }, [searchSpace]); + // Initialize state from fetched search space + useEffect(() => { + if (searchSpace) { + setCustomInstructions(searchSpace.qna_custom_instructions || ""); + setHasChanges(false); + } + }, [searchSpace]); - // Track changes - useEffect(() => { - if (searchSpace) { - const currentCustom = searchSpace.qna_custom_instructions || ""; - const changed = currentCustom !== customInstructions; - setHasChanges(changed); - } - }, [searchSpace, customInstructions]); + // Track changes + useEffect(() => { + if (searchSpace) { + const currentCustom = searchSpace.qna_custom_instructions || ""; + const changed = currentCustom !== customInstructions; + setHasChanges(changed); + } + }, [searchSpace, customInstructions]); - const handleSave = async () => { - try { - setSaving(true); + const handleSave = async () => { + try { + setSaving(true); - const payload = { - qna_custom_instructions: customInstructions.trim() || "", - }; + const payload = { + qna_custom_instructions: customInstructions.trim() || "", + }; - const response = await authenticatedFetch( - `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/searchspaces/${searchSpaceId}`, - { - method: "PUT", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify(payload), - } - ); + const response = await authenticatedFetch( + `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/searchspaces/${searchSpaceId}`, + { + method: "PUT", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(payload), + }, + ); - if (!response.ok) { - const errorData = await response.json().catch(() => ({})); - throw new Error(errorData.detail || "Failed to save system instructions"); - } + if (!response.ok) { + const errorData = await response.json().catch(() => ({})); + throw new Error( + errorData.detail || "Failed to save system instructions", + ); + } - toast.success("System instructions saved successfully"); - setHasChanges(false); - await fetchSearchSpace(); - } catch (error: any) { - console.error("Error saving system instructions:", error); - toast.error(error.message || "Failed to save system instructions"); - } finally { - setSaving(false); - } - }; + toast.success("System instructions saved successfully"); + setHasChanges(false); + await fetchSearchSpace(); + } catch (error: any) { + console.error("Error saving system instructions:", error); + toast.error(error.message || "Failed to save system instructions"); + } finally { + setSaving(false); + } + }; - if (loading) { - return ( -
- - - - - - - - - - -
- ); - } + const onSubmit = (e: React.FormEvent) => { + e.preventDefault(); + handleSave(); + }; - return ( -
- {/* Work in Progress Notice */} - - - - Work in Progress: This functionality is currently - under development and not yet connected to the backend. Your instructions will be saved - but won't affect AI behavior until the feature is fully implemented. - - + if (loading) { + return ( +
+ + + + + + + + + + +
+ ); + } - - - - System instructions apply to all AI interactions in this search space. They guide how the - AI responds, its tone, focus areas, and behavior patterns. - - + return ( +
+ {/* Work in Progress Notice */} + + + + Work in Progress: This + functionality is currently under development and not yet connected to + the backend. Your instructions will be saved but won't affect AI + behavior until the feature is fully implemented. + + - {/* System Instructions Card */} - - - Custom System Instructions - - Provide specific guidelines for how you want the AI to respond. These instructions will - be applied to all answers in this search space. - - - -
- -