fix browser in background

This commit is contained in:
Arjun 2026-04-14 15:03:32 +05:30
parent 200ab632dc
commit 397c499b72
3 changed files with 133 additions and 34 deletions

View file

@ -23,7 +23,7 @@ export type { BrowserPageSnapshot, BrowserState, BrowserTabState };
* standard Chrome UA so sites like Google (OAuth) don't reject it.
*/
const PARTITION = 'persist:rowboat-browser';
export const BROWSER_PARTITION = 'persist:rowboat-browser';
// Claims Chrome 130 on macOS — close enough to recent stable for OAuth servers
// that sniff the UA looking for "real browser" shapes.
@ -471,21 +471,15 @@ function buildClickScript(selector: string): string {
if (eventTarget instanceof HTMLElement) {
eventTarget.focus({ preventScroll: true });
eventTarget.click();
} else {
eventTarget.dispatchEvent(new MouseEvent('click', {
bubbles: true,
cancelable: true,
composed: true,
clientX,
clientY,
view: window,
}));
}
return {
ok: true,
description: describeElement(target),
clickPoint: {
x: Math.round(clientX),
y: Math.round(clientY),
},
verification: {
before,
targetSelector: buildUniqueSelector(target) || requestedSelector,
@ -665,7 +659,7 @@ export class BrowserViewManager extends EventEmitter {
private getSession(): Session {
if (this.browserSession) return this.browserSession;
const browserSession = session.fromPartition(PARTITION);
const browserSession = session.fromPartition(BROWSER_PARTITION);
browserSession.setUserAgent(SPOOF_UA);
this.browserSession = browserSession;
return browserSession;
@ -1133,6 +1127,10 @@ export class BrowserViewManager extends EventEmitter {
ok: boolean;
error?: string;
description?: string;
clickPoint?: {
x: number;
y: number;
};
verification?: {
before: unknown;
targetSelector: string | null;
@ -1142,6 +1140,37 @@ export class BrowserViewManager extends EventEmitter {
signal,
);
if (!result.ok) return result;
if (!result.clickPoint) {
return {
ok: false,
error: 'Could not determine where to click on the page.',
};
}
this.window?.focus();
activeTab.view.webContents.focus();
activeTab.view.webContents.sendInputEvent({
type: 'mouseMove',
x: result.clickPoint.x,
y: result.clickPoint.y,
movementX: 0,
movementY: 0,
});
activeTab.view.webContents.sendInputEvent({
type: 'mouseDown',
x: result.clickPoint.x,
y: result.clickPoint.y,
button: 'left',
clickCount: 1,
});
activeTab.view.webContents.sendInputEvent({
type: 'mouseUp',
x: result.clickPoint.x,
y: result.clickPoint.y,
button: 'left',
clickCount: 1,
});
this.invalidateSnapshot(activeTab.id);
await this.waitForWebContentsSettle(activeTab, signal);

View file

@ -1,4 +1,4 @@
import { app, BrowserWindow, desktopCapturer, protocol, net, shell, session } from "electron";
import { app, BrowserWindow, desktopCapturer, protocol, net, shell, session, type Session } from "electron";
import path from "node:path";
import {
setupIpcHandlers,
@ -32,7 +32,7 @@ import { execSync, exec, execFileSync } from "node:child_process";
import { promisify } from "node:util";
import { init as initChromeSync } from "@x/core/dist/knowledge/chrome-extension/server/server.js";
import { registerContainerValues } from "@x/core/dist/di/container.js";
import { browserViewManager } from "./browser/view.js";
import { browserViewManager, BROWSER_PARTITION } from "./browser/view.js";
import { setupBrowserEventForwarding } from "./browser/ipc.js";
import { ElectronBrowserControlService } from "./browser/control-service.js";
@ -116,6 +116,30 @@ protocol.registerSchemesAsPrivileged([
},
]);
const ALLOWED_SESSION_PERMISSIONS = new Set(["media", "display-capture"]);
function configureSessionPermissions(targetSession: Session): void {
targetSession.setPermissionCheckHandler((_webContents, permission) => {
return ALLOWED_SESSION_PERMISSIONS.has(permission);
});
targetSession.setPermissionRequestHandler((_webContents, permission, callback) => {
callback(ALLOWED_SESSION_PERMISSIONS.has(permission));
});
// Auto-approve display media requests and route system audio as loopback.
// Electron requires a video source in the callback even if we only want audio.
// We pass the first available screen source; the renderer discards the video track.
targetSession.setDisplayMediaRequestHandler(async (_request, callback) => {
const sources = await desktopCapturer.getSources({ types: ['screen'] });
if (sources.length === 0) {
callback({});
return;
}
callback({ video: sources[0], audio: 'loopback' });
});
}
function createWindow() {
const win = new BrowserWindow({
width: 1280,
@ -135,26 +159,8 @@ function createWindow() {
},
});
// Grant microphone and display-capture permissions
session.defaultSession.setPermissionRequestHandler((_webContents, permission, callback) => {
if (permission === 'media' || permission === 'display-capture') {
callback(true);
} else {
callback(false);
}
});
// Auto-approve display media requests and route system audio as loopback.
// Electron requires a video source in the callback even if we only want audio.
// We pass the first available screen source; the renderer discards the video track.
session.defaultSession.setDisplayMediaRequestHandler(async (_request, callback) => {
const sources = await desktopCapturer.getSources({ types: ['screen'] });
if (sources.length === 0) {
callback({});
return;
}
callback({ video: sources[0], audio: 'loopback' });
});
configureSessionPermissions(session.defaultSession);
configureSessionPermissions(session.fromPartition(BROWSER_PARTITION));
// Show window when content is ready to prevent blank screen
win.once("ready-to-show", () => {

View file

@ -34,6 +34,18 @@ const EMPTY_STATE: BrowserState = {
}
const CHROME_HEIGHT = 40
const BLOCKING_OVERLAY_SLOTS = new Set([
'alert-dialog-content',
'context-menu-content',
'context-menu-sub-content',
'dialog-content',
'dropdown-menu-content',
'dropdown-menu-sub-content',
'hover-card-content',
'popover-content',
'select-content',
'sheet-content',
])
interface BrowserPaneProps {
onClose: () => void
@ -42,6 +54,24 @@ interface BrowserPaneProps {
const getActiveTab = (state: BrowserState) =>
state.tabs.find((tab) => tab.id === state.activeTabId) ?? null
const isVisibleOverlayElement = (el: HTMLElement) => {
const style = window.getComputedStyle(el)
if (style.display === 'none' || style.visibility === 'hidden' || style.opacity === '0') {
return false
}
const rect = el.getBoundingClientRect()
return rect.width > 0 && rect.height > 0
}
const hasBlockingOverlay = (doc: Document) => {
const openContent = doc.querySelectorAll<HTMLElement>('[data-slot][data-state="open"]')
return Array.from(openContent).some((el) => {
const slot = el.dataset.slot
if (!slot || !BLOCKING_OVERLAY_SLOTS.has(slot)) return false
return isVisibleOverlayElement(el)
})
}
const getBrowserTabTitle = (tab: BrowserTabState) => {
const title = tab.title.trim()
if (title) return title
@ -145,6 +175,13 @@ export function BrowserPane({ onClose }: BrowserPaneProps) {
}, [])
const syncView = useCallback(() => {
const doc = viewportRef.current?.ownerDocument
if (doc && hasBlockingOverlay(doc)) {
lastBoundsRef.current = null
setViewVisible(false)
return null
}
const bounds = measureBounds()
if (!bounds) {
lastBoundsRef.current = null
@ -203,6 +240,33 @@ export function BrowserPane({ onClose }: BrowserPaneProps) {
}
}, [syncView])
useEffect(() => {
const doc = viewportRef.current?.ownerDocument
if (!doc?.body) return
let pendingRaf: number | null = null
const schedule = () => {
if (pendingRaf !== null) return
pendingRaf = requestAnimationFrame(() => {
pendingRaf = null
syncView()
})
}
const observer = new MutationObserver(schedule)
observer.observe(doc.body, {
subtree: true,
childList: true,
attributes: true,
attributeFilter: ['data-state', 'style', 'hidden', 'aria-hidden', 'open'],
})
return () => {
if (pendingRaf !== null) cancelAnimationFrame(pendingRaf)
observer.disconnect()
}
}, [syncView])
const handleNewTab = useCallback(() => {
void window.ipc.invoke('browser:newTab', {}).then((res) => {
const result = res as { ok: boolean; error?: string }