diff --git a/apps/x/apps/main/src/browser/view.ts b/apps/x/apps/main/src/browser/view.ts index 815b8540..05455223 100644 --- a/apps/x/apps/main/src/browser/view.ts +++ b/apps/x/apps/main/src/browser/view.ts @@ -23,7 +23,7 @@ export type { BrowserPageSnapshot, BrowserState, BrowserTabState }; * standard Chrome UA so sites like Google (OAuth) don't reject it. */ -const PARTITION = 'persist:rowboat-browser'; +export const BROWSER_PARTITION = 'persist:rowboat-browser'; // Claims Chrome 130 on macOS — close enough to recent stable for OAuth servers // that sniff the UA looking for "real browser" shapes. @@ -471,21 +471,15 @@ function buildClickScript(selector: string): string { if (eventTarget instanceof HTMLElement) { eventTarget.focus({ preventScroll: true }); - eventTarget.click(); - } else { - eventTarget.dispatchEvent(new MouseEvent('click', { - bubbles: true, - cancelable: true, - composed: true, - clientX, - clientY, - view: window, - })); } return { ok: true, description: describeElement(target), + clickPoint: { + x: Math.round(clientX), + y: Math.round(clientY), + }, verification: { before, targetSelector: buildUniqueSelector(target) || requestedSelector, @@ -665,7 +659,7 @@ export class BrowserViewManager extends EventEmitter { private getSession(): Session { if (this.browserSession) return this.browserSession; - const browserSession = session.fromPartition(PARTITION); + const browserSession = session.fromPartition(BROWSER_PARTITION); browserSession.setUserAgent(SPOOF_UA); this.browserSession = browserSession; return browserSession; @@ -1133,6 +1127,10 @@ export class BrowserViewManager extends EventEmitter { ok: boolean; error?: string; description?: string; + clickPoint?: { + x: number; + y: number; + }; verification?: { before: unknown; targetSelector: string | null; @@ -1142,6 +1140,37 @@ export class BrowserViewManager extends EventEmitter { signal, ); if (!result.ok) return result; + if (!result.clickPoint) { + return { + ok: false, + error: 'Could not determine where to click on the page.', + }; + } + + this.window?.focus(); + activeTab.view.webContents.focus(); + activeTab.view.webContents.sendInputEvent({ + type: 'mouseMove', + x: result.clickPoint.x, + y: result.clickPoint.y, + movementX: 0, + movementY: 0, + }); + activeTab.view.webContents.sendInputEvent({ + type: 'mouseDown', + x: result.clickPoint.x, + y: result.clickPoint.y, + button: 'left', + clickCount: 1, + }); + activeTab.view.webContents.sendInputEvent({ + type: 'mouseUp', + x: result.clickPoint.x, + y: result.clickPoint.y, + button: 'left', + clickCount: 1, + }); + this.invalidateSnapshot(activeTab.id); await this.waitForWebContentsSettle(activeTab, signal); diff --git a/apps/x/apps/main/src/main.ts b/apps/x/apps/main/src/main.ts index a690d207..53d56866 100644 --- a/apps/x/apps/main/src/main.ts +++ b/apps/x/apps/main/src/main.ts @@ -1,4 +1,4 @@ -import { app, BrowserWindow, desktopCapturer, protocol, net, shell, session } from "electron"; +import { app, BrowserWindow, desktopCapturer, protocol, net, shell, session, type Session } from "electron"; import path from "node:path"; import { setupIpcHandlers, @@ -32,7 +32,7 @@ import { execSync, exec, execFileSync } from "node:child_process"; import { promisify } from "node:util"; import { init as initChromeSync } from "@x/core/dist/knowledge/chrome-extension/server/server.js"; import { registerContainerValues } from "@x/core/dist/di/container.js"; -import { browserViewManager } from "./browser/view.js"; +import { browserViewManager, BROWSER_PARTITION } from "./browser/view.js"; import { setupBrowserEventForwarding } from "./browser/ipc.js"; import { ElectronBrowserControlService } from "./browser/control-service.js"; @@ -116,6 +116,30 @@ protocol.registerSchemesAsPrivileged([ }, ]); +const ALLOWED_SESSION_PERMISSIONS = new Set(["media", "display-capture"]); + +function configureSessionPermissions(targetSession: Session): void { + targetSession.setPermissionCheckHandler((_webContents, permission) => { + return ALLOWED_SESSION_PERMISSIONS.has(permission); + }); + + targetSession.setPermissionRequestHandler((_webContents, permission, callback) => { + callback(ALLOWED_SESSION_PERMISSIONS.has(permission)); + }); + + // Auto-approve display media requests and route system audio as loopback. + // Electron requires a video source in the callback even if we only want audio. + // We pass the first available screen source; the renderer discards the video track. + targetSession.setDisplayMediaRequestHandler(async (_request, callback) => { + const sources = await desktopCapturer.getSources({ types: ['screen'] }); + if (sources.length === 0) { + callback({}); + return; + } + callback({ video: sources[0], audio: 'loopback' }); + }); +} + function createWindow() { const win = new BrowserWindow({ width: 1280, @@ -135,26 +159,8 @@ function createWindow() { }, }); - // Grant microphone and display-capture permissions - session.defaultSession.setPermissionRequestHandler((_webContents, permission, callback) => { - if (permission === 'media' || permission === 'display-capture') { - callback(true); - } else { - callback(false); - } - }); - - // Auto-approve display media requests and route system audio as loopback. - // Electron requires a video source in the callback even if we only want audio. - // We pass the first available screen source; the renderer discards the video track. - session.defaultSession.setDisplayMediaRequestHandler(async (_request, callback) => { - const sources = await desktopCapturer.getSources({ types: ['screen'] }); - if (sources.length === 0) { - callback({}); - return; - } - callback({ video: sources[0], audio: 'loopback' }); - }); + configureSessionPermissions(session.defaultSession); + configureSessionPermissions(session.fromPartition(BROWSER_PARTITION)); // Show window when content is ready to prevent blank screen win.once("ready-to-show", () => { diff --git a/apps/x/apps/renderer/src/components/browser-pane/BrowserPane.tsx b/apps/x/apps/renderer/src/components/browser-pane/BrowserPane.tsx index 20ca7e3a..8777c035 100644 --- a/apps/x/apps/renderer/src/components/browser-pane/BrowserPane.tsx +++ b/apps/x/apps/renderer/src/components/browser-pane/BrowserPane.tsx @@ -34,6 +34,18 @@ const EMPTY_STATE: BrowserState = { } const CHROME_HEIGHT = 40 +const BLOCKING_OVERLAY_SLOTS = new Set([ + 'alert-dialog-content', + 'context-menu-content', + 'context-menu-sub-content', + 'dialog-content', + 'dropdown-menu-content', + 'dropdown-menu-sub-content', + 'hover-card-content', + 'popover-content', + 'select-content', + 'sheet-content', +]) interface BrowserPaneProps { onClose: () => void @@ -42,6 +54,24 @@ interface BrowserPaneProps { const getActiveTab = (state: BrowserState) => state.tabs.find((tab) => tab.id === state.activeTabId) ?? null +const isVisibleOverlayElement = (el: HTMLElement) => { + const style = window.getComputedStyle(el) + if (style.display === 'none' || style.visibility === 'hidden' || style.opacity === '0') { + return false + } + const rect = el.getBoundingClientRect() + return rect.width > 0 && rect.height > 0 +} + +const hasBlockingOverlay = (doc: Document) => { + const openContent = doc.querySelectorAll('[data-slot][data-state="open"]') + return Array.from(openContent).some((el) => { + const slot = el.dataset.slot + if (!slot || !BLOCKING_OVERLAY_SLOTS.has(slot)) return false + return isVisibleOverlayElement(el) + }) +} + const getBrowserTabTitle = (tab: BrowserTabState) => { const title = tab.title.trim() if (title) return title @@ -145,6 +175,13 @@ export function BrowserPane({ onClose }: BrowserPaneProps) { }, []) const syncView = useCallback(() => { + const doc = viewportRef.current?.ownerDocument + if (doc && hasBlockingOverlay(doc)) { + lastBoundsRef.current = null + setViewVisible(false) + return null + } + const bounds = measureBounds() if (!bounds) { lastBoundsRef.current = null @@ -203,6 +240,33 @@ export function BrowserPane({ onClose }: BrowserPaneProps) { } }, [syncView]) + useEffect(() => { + const doc = viewportRef.current?.ownerDocument + if (!doc?.body) return + + let pendingRaf: number | null = null + const schedule = () => { + if (pendingRaf !== null) return + pendingRaf = requestAnimationFrame(() => { + pendingRaf = null + syncView() + }) + } + + const observer = new MutationObserver(schedule) + observer.observe(doc.body, { + subtree: true, + childList: true, + attributes: true, + attributeFilter: ['data-state', 'style', 'hidden', 'aria-hidden', 'open'], + }) + + return () => { + if (pendingRaf !== null) cancelAnimationFrame(pendingRaf) + observer.disconnect() + } + }, [syncView]) + const handleNewTab = useCallback(() => { void window.ipc.invoke('browser:newTab', {}).then((res) => { const result = res as { ok: boolean; error?: string }