This commit is contained in:
arkml 2026-04-24 10:32:11 +05:30 committed by GitHub
commit d0df50eb50
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 551 additions and 6 deletions

View file

@ -1,8 +1,24 @@
import type { IBrowserControlService } from '@x/core/dist/application/browser-control/service.js';
import type { BrowserControlAction, BrowserControlInput, BrowserControlResult } from '@x/shared/dist/browser-control.js';
import type { BrowserControlAction, BrowserControlInput, BrowserControlResult, SuggestedBrowserSkill } from '@x/shared/dist/browser-control.js';
import { ensureLoaded, matchSkillsForUrl } from '@x/core/dist/application/browser-skills/index.js';
import { browserViewManager } from './view.js';
import { normalizeNavigationTarget } from './navigation.js';
async function getSuggestedSkills(url: string | undefined): Promise<SuggestedBrowserSkill[] | undefined> {
if (!url) return undefined;
try {
const status = await ensureLoaded();
if (status.status === 'ready' || status.status === 'stale') {
const matched = matchSkillsForUrl(status.index, url);
if (matched.length === 0) return undefined;
return matched.map((e) => ({ id: e.id, title: e.title, path: e.path }));
}
} catch (err) {
console.warn('[browser-control] suggestedSkills lookup failed:', err);
}
return undefined;
}
function buildSuccessResult(
action: BrowserControlAction,
message: string,
@ -52,11 +68,13 @@ export class ElectronBrowserControlService implements IBrowserControlService {
}
await browserViewManager.ensureActiveTabReady(signal);
const page = await browserViewManager.readPageSummary(signal, { waitForReady: false }) ?? undefined;
return buildSuccessResult(
const suggestedSkills = await getSuggestedSkills(page?.url);
const success = buildSuccessResult(
'new-tab',
target ? `Opened a new tab for ${target}.` : 'Opened a new tab.',
page,
);
return suggestedSkills ? { ...success, suggestedSkills } : success;
}
case 'switch-tab': {
@ -99,7 +117,9 @@ export class ElectronBrowserControlService implements IBrowserControlService {
}
await browserViewManager.ensureActiveTabReady(signal);
const page = await browserViewManager.readPageSummary(signal, { waitForReady: false }) ?? undefined;
return buildSuccessResult('navigate', `Navigated to ${target}.`, page);
const suggestedSkills = await getSuggestedSkills(page?.url);
const success = buildSuccessResult('navigate', `Navigated to ${target}.`, page);
return suggestedSkills ? { ...success, suggestedSkills } : success;
}
case 'back': {
@ -140,7 +160,9 @@ export class ElectronBrowserControlService implements IBrowserControlService {
if (!result.ok || !result.page) {
return buildErrorResult('read-page', result.error ?? 'Failed to read the current page.');
}
return buildSuccessResult('read-page', 'Read the current page.', result.page);
const suggestedSkills = await getSuggestedSkills(result.page.url);
const success = buildSuccessResult('read-page', 'Read the current page.', result.page);
return suggestedSkills ? { ...success, suggestedSkills } : success;
}
case 'click': {
@ -232,6 +254,20 @@ export class ElectronBrowserControlService implements IBrowserControlService {
const page = await browserViewManager.readPageSummary(signal, { waitForReady: false }) ?? undefined;
return buildSuccessResult('wait', `Waited ${duration}ms for the page to settle.`, page);
}
case 'eval': {
const code = input.code;
if (!code) {
return buildErrorResult('eval', 'code is required for eval.');
}
await browserViewManager.ensureActiveTabReady(signal);
const result = await browserViewManager.executeScript(code, signal);
if (!result.ok) {
return buildErrorResult('eval', result.error);
}
const success = buildSuccessResult('eval', 'Evaluated script in the active tab.');
return { ...success, result: result.result };
}
}
} catch (error) {
return buildErrorResult(

View file

@ -78,6 +78,41 @@ function abortIfNeeded(signal?: AbortSignal): void {
throw signal.reason instanceof Error ? signal.reason : new Error('Browser action aborted');
}
const EVAL_RESULT_MAX_BYTES = 200_000;
function safeSerialize(value: unknown): unknown {
const seen = new WeakSet<object>();
const coerce = (v: unknown): unknown => {
if (v === null || v === undefined) return v;
const t = typeof v;
if (t === 'string' || t === 'number' || t === 'boolean') return v;
if (t === 'bigint') return (v as bigint).toString();
if (t === 'function' || t === 'symbol') return `[${t}]`;
if (typeof v === 'object') {
if (seen.has(v as object)) return '[circular]';
seen.add(v as object);
if (Array.isArray(v)) return v.map(coerce);
const out: Record<string, unknown> = {};
for (const [k, val] of Object.entries(v as Record<string, unknown>)) {
out[k] = coerce(val);
}
return out;
}
return String(v);
};
const coerced = coerce(value);
try {
const json = JSON.stringify(coerced);
if (json && json.length > EVAL_RESULT_MAX_BYTES) {
return { truncated: true, preview: json.slice(0, EVAL_RESULT_MAX_BYTES) };
}
} catch {
return String(value);
}
return coerced;
}
async function sleep(ms: number, signal?: AbortSignal): Promise<void> {
if (ms <= 0) return;
abortIfNeeded(signal);
@ -778,6 +813,17 @@ export class BrowserViewManager extends EventEmitter {
await this.waitForWebContentsSettle(activeTab, signal);
}
async executeScript(code: string, signal?: AbortSignal): Promise<{ ok: true; result: unknown } | { ok: false; error: string }> {
try {
const wrapped = `(async () => { ${code} \n})()`;
const raw = await this.executeOnActiveTab<unknown>(wrapped, signal);
const serialized = safeSerialize(raw);
return { ok: true, result: serialized };
} catch (error) {
return { ok: false, error: error instanceof Error ? error.message : 'Script evaluation failed.' };
}
}
getState(): BrowserState {
return this.snapshotState();
}

View file

@ -14,8 +14,10 @@ Use this skill when the user asks you to open a website, browse in-app, search t
- page ` + "`url`" + ` and ` + "`title`" + `
- visible page text
- interactable elements with numbered ` + "`index`" + ` values
4. Prefer acting on those numbered indices with ` + "`click`" + ` / ` + "`type`" + ` / ` + "`press`" + `.
5. After each action, read the returned page snapshot before deciding the next step.
- ` + "`suggestedSkills`" + ` site-specific and interaction-specific skill hints for the current page
4. **Always inspect ` + "`suggestedSkills`" + ` before acting.** If any skill in the list matches what the user asked for (site or task), call ` + "`load-browser-skill({ id: \"<id>\" })`" + ` *first*, read it in full, then plan your actions. These skills encode selectors, timing, and gotchas that would otherwise cost you several failed attempts to rediscover. If no skill matches, proceed — but do not skip this check.
5. Prefer acting on those numbered indices with ` + "`click`" + ` / ` + "`type`" + ` / ` + "`press`" + `.
6. After each action, read the returned page snapshot before deciding the next step including re-checking ` + "`suggestedSkills`" + ` if the navigation landed you on a new domain.
## Actions
@ -92,12 +94,38 @@ Wait for the page to settle, useful after async UI changes.
Parameters:
- ` + "`ms`" + `: milliseconds to wait (optional)
### eval
Run arbitrary JavaScript in the active tab and return its value. Use this as an escape hatch when the structured actions above are insufficient for example, submitting a form (` + "`form.submit()`" + `), reading DOM state (` + "`document.querySelector(...).textContent`" + `), or computing something that requires page-scoped APIs.
Parameters:
- ` + "`code`" + `: JavaScript source. The code runs inside an ` + "`async`" + ` IIFE, so you can ` + "`await`" + ` freely. The final expression's value (or a ` + "`return`" + `ed value) is serialized back. Non-serializable values (DOM nodes, functions) are coerced to placeholder strings. Large results are truncated.
Example:
- ` + "`{ action: \"eval\", code: \"return document.querySelector('meta[name=user-login]')?.content ?? null\" }`" + `
Security: ` + "`eval`" + ` runs in the active tab's origin with the user's cookies. Do not exfiltrate credentials, cookies, or localStorage contents to third-party origins.
## Companion Tools
### http-fetch
Use for **unauthenticated** API calls (e.g., ` + "`api.github.com`" + `, public REST endpoints) where you don't need the browser's logged-in session. Often faster and cleaner than DOM scraping many sites expose a public API that returns the same data. For authenticated requests that require the user's browser cookies, use ` + "`browser-control`" + ` with ` + "`action: \"eval\"`" + ` and call ` + "`fetch()`" + ` inside the page context instead.
### load-browser-skill
Rowboat caches a library of browser skills (from ` + "`browser-use/browser-harness`" + `) indexed by both **domain** (github, linkedin, amazon, booking, ) and **interaction type** within a domain (e.g. ` + "`github/repo-actions`" + `, ` + "`github/scraping`" + `, ` + "`arxiv-bulk/*`" + `). Whenever ` + "`browser-control`" + ` returns a ` + "`suggestedSkills`" + ` array which it does on ` + "`navigate`" + `, ` + "`new-tab`" + `, and ` + "`read-page`" + ` treat it as a required reading step, not optional. Pick the entry that matches the current task (domain match first, then the interaction-specific variant if one exists) and call ` + "`load-browser-skill({ id: \"<id>\" })`" + ` before attempting the action.
You can also proactively call ` + "`load-browser-skill({ action: \"list\", site: \"<site>\" })`" + ` when you know you're about to work on a site, to see what skills exist even if ` + "`suggestedSkills`" + ` is empty (e.g. before navigating).
These skills are written against a Python harness, so treat them as **reference knowledge** adapt the recipes into the actions above (especially ` + "`eval`" + ` + ` + "`http-fetch`" + ` for the ` + "`js(...)`" + ` and ` + "`http_get(...)`" + ` calls they use). The selectors, DOM gotchas, and sequencing are the durable part; the exact function names are not.
## Important Rules
- Prefer ` + "`read-page`" + ` before interacting.
- Prefer element ` + "`index`" + ` over CSS selectors.
- If the tool says the snapshot is stale, call ` + "`read-page`" + ` again.
- After navigation, clicking, typing, pressing, or scrolling, use the returned page snapshot instead of assuming the page state.
- **Always check ` + "`suggestedSkills`" + ` after ` + "`navigate`" + `, ` + "`new-tab`" + `, or ` + "`read-page`" + `, and load the matching domain or interaction skill before acting.** Skipping this step is the single most common way to waste a dozen failed clicks on a site whose quirks are already documented. If the array is empty, proceed normally but don't skip the check.
- Prefer structured actions (click/type/press) over ` + "`eval`" + ` when both work. Reach for ` + "`eval`" + ` when the site fights synthetic events, when you need to submit a form directly, or when you need to read DOM state the structured actions don't surface.
- For read-only data, check if ` + "`http-fetch`" + ` against the site's public API works before scraping the DOM.
- Use Rowboat's browser for live interaction. Use web search tools for research where a live session is unnecessary.
- Do not wrap browser URLs or browser pages in ` + "```filepath" + ` blocks. Filepath cards are only for real files on disk, not web pages or browser tabs.
- If you mention a page the browser opened, use plain text for the URL/title instead of trying to create a clickable file card.

View file

@ -0,0 +1,3 @@
export { ensureLoaded, readSkillContent, refreshFromRemote } from './loader.js';
export type { SkillEntry, SkillsIndex, LoaderStatus } from './loader.js';
export { matchSkillsForUrl } from './matcher.js';

View file

@ -0,0 +1,215 @@
import * as path from 'node:path';
import * as fs from 'node:fs/promises';
import { WorkDir } from '../../config/config.js';
const REPO_OWNER = 'browser-use';
const REPO_NAME = 'browser-harness';
const REPO_BRANCH = 'main';
const DOMAIN_SKILLS_PREFIX = 'domain-skills/';
const MANIFEST_TTL_MS = 24 * 60 * 60 * 1000;
const FETCH_TIMEOUT_MS = 20_000;
export type SkillEntry = {
id: string; // e.g. "github/repo-actions"
site: string; // e.g. "github"
fileName: string; // e.g. "repo-actions.md"
title: string; // first H1 from the markdown, or a derived title
path: string; // relative repo path, e.g. "domain-skills/github/repo-actions.md"
localPath: string; // absolute path on disk
};
export type SkillsIndex = {
fetchedAt: number;
treeSha: string;
entries: SkillEntry[];
};
export type LoaderStatus =
| { status: 'ready'; index: SkillsIndex }
| { status: 'stale'; index: SkillsIndex; refreshing: boolean }
| { status: 'empty' }
| { status: 'error'; error: string };
const cacheRoot = () => path.join(WorkDir, 'cache', 'browser-skills');
const skillsDir = () => path.join(cacheRoot(), 'domain-skills');
const manifestPath = () => path.join(cacheRoot(), 'manifest.json');
async function ensureCacheDir(): Promise<void> {
await fs.mkdir(skillsDir(), { recursive: true });
}
async function readManifest(): Promise<SkillsIndex | null> {
try {
const raw = await fs.readFile(manifestPath(), 'utf8');
const parsed = JSON.parse(raw) as SkillsIndex;
if (!parsed.entries || !Array.isArray(parsed.entries)) return null;
return parsed;
} catch {
return null;
}
}
async function writeManifest(index: SkillsIndex): Promise<void> {
await ensureCacheDir();
await fs.writeFile(manifestPath(), JSON.stringify(index, null, 2), 'utf8');
}
function extractTitle(markdown: string, fallback: string): string {
const match = markdown.match(/^#\s+(.+?)\s*$/m);
if (match?.[1]) return match[1].trim();
return fallback;
}
async function fetchWithTimeout(url: string, init?: RequestInit): Promise<Response> {
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
try {
return await fetch(url, {
...init,
signal: controller.signal,
headers: {
'User-Agent': 'rowboat-browser-skills',
Accept: 'application/vnd.github+json',
...(init?.headers ?? {}),
},
});
} finally {
clearTimeout(timer);
}
}
type GithubTreeNode = { path: string; type: string; sha: string };
async function fetchRepoTree(): Promise<{ treeSha: string; skillPaths: string[] }> {
const branchUrl = `https://api.github.com/repos/${REPO_OWNER}/${REPO_NAME}/branches/${REPO_BRANCH}`;
const branchRes = await fetchWithTimeout(branchUrl);
if (!branchRes.ok) {
throw new Error(`GitHub branch fetch failed: ${branchRes.status} ${branchRes.statusText}`);
}
const branch = (await branchRes.json()) as { commit: { commit: { tree: { sha: string } } } };
const treeSha = branch.commit?.commit?.tree?.sha;
if (!treeSha) throw new Error('Could not resolve tree SHA from branch response.');
const treeUrl = `https://api.github.com/repos/${REPO_OWNER}/${REPO_NAME}/git/trees/${treeSha}?recursive=1`;
const treeRes = await fetchWithTimeout(treeUrl);
if (!treeRes.ok) {
throw new Error(`GitHub tree fetch failed: ${treeRes.status} ${treeRes.statusText}`);
}
const tree = (await treeRes.json()) as { tree: GithubTreeNode[]; truncated: boolean };
const skillPaths = tree.tree
.filter((n) => n.type === 'blob' && n.path.startsWith(DOMAIN_SKILLS_PREFIX) && n.path.endsWith('.md'))
.map((n) => n.path);
return { treeSha, skillPaths };
}
async function fetchRawFile(repoPath: string): Promise<string> {
const url = `https://raw.githubusercontent.com/${REPO_OWNER}/${REPO_NAME}/${REPO_BRANCH}/${repoPath}`;
const res = await fetchWithTimeout(url, { headers: { Accept: 'text/plain' } });
if (!res.ok) {
throw new Error(`Raw file fetch failed for ${repoPath}: ${res.status} ${res.statusText}`);
}
return res.text();
}
function parseRepoPath(repoPath: string): { id: string; site: string; fileName: string } | null {
const rel = repoPath.slice(DOMAIN_SKILLS_PREFIX.length);
const parts = rel.split('/');
if (parts.length < 2) return null;
const site = parts[0];
const fileName = parts.slice(1).join('/');
const id = rel.replace(/\.md$/, '');
return { id, site, fileName };
}
export async function refreshFromRemote(): Promise<SkillsIndex> {
await ensureCacheDir();
const { treeSha, skillPaths } = await fetchRepoTree();
const entries: SkillEntry[] = [];
await Promise.all(skillPaths.map(async (repoPath) => {
const parsed = parseRepoPath(repoPath);
if (!parsed) return;
try {
const content = await fetchRawFile(repoPath);
const localRel = path.join(parsed.site, parsed.fileName);
const localPath = path.join(skillsDir(), localRel);
await fs.mkdir(path.dirname(localPath), { recursive: true });
await fs.writeFile(localPath, content, 'utf8');
entries.push({
id: parsed.id,
site: parsed.site,
fileName: parsed.fileName,
title: extractTitle(content, parsed.id),
path: repoPath,
localPath,
});
} catch (err) {
console.warn(`[browser-skills] Failed to fetch ${repoPath}:`, err);
}
}));
entries.sort((a, b) => a.id.localeCompare(b.id));
const index: SkillsIndex = {
fetchedAt: Date.now(),
treeSha,
entries,
};
await writeManifest(index);
return index;
}
let inFlightRefresh: Promise<SkillsIndex> | null = null;
export async function ensureLoaded(options?: { forceRefresh?: boolean }): Promise<LoaderStatus> {
try {
const existing = await readManifest();
const fresh = existing && Date.now() - existing.fetchedAt < MANIFEST_TTL_MS;
if (existing && fresh && !options?.forceRefresh) {
return { status: 'ready', index: existing };
}
if (existing && !options?.forceRefresh) {
if (!inFlightRefresh) {
inFlightRefresh = refreshFromRemote()
.catch((err) => {
console.warn('[browser-skills] Background refresh failed:', err);
return existing;
})
.finally(() => { inFlightRefresh = null; });
}
return { status: 'stale', index: existing, refreshing: true };
}
if (!inFlightRefresh) {
inFlightRefresh = refreshFromRemote().finally(() => { inFlightRefresh = null; });
}
try {
const index = await inFlightRefresh;
return { status: 'ready', index };
} catch (err) {
return { status: 'error', error: err instanceof Error ? err.message : 'Failed to load skills.' };
}
} catch (err) {
return { status: 'error', error: err instanceof Error ? err.message : 'Skill loader failed.' };
}
}
export async function readSkillContent(id: string): Promise<{ ok: true; content: string; entry: SkillEntry } | { ok: false; error: string }> {
const status = await ensureLoaded();
if (status.status === 'error' || status.status === 'empty') {
return { ok: false, error: status.status === 'error' ? status.error : 'No skills cached yet.' };
}
const entry = status.index.entries.find((e) => e.id === id);
if (!entry) return { ok: false, error: `Skill '${id}' not found.` };
try {
const content = await fs.readFile(entry.localPath, 'utf8');
return { ok: true, content, entry };
} catch (err) {
return { ok: false, error: err instanceof Error ? err.message : 'Failed to read skill file.' };
}
}

View file

@ -0,0 +1,56 @@
import type { SkillEntry, SkillsIndex } from './loader.js';
/**
* Map browser-harness `domain-skills/<site>/` folder names to hostname tokens we
* match against the current tab's URL.
*
* Heuristic: for each site folder we generate candidate hostnames like
* "booking-com" -> ["booking-com", "bookingcom", "booking.com"]
* "github" -> ["github", "github.com"]
* "dev-to" -> ["dev-to", "devto", "dev.to"]
* Then we check whether any candidate is a substring of the tab hostname.
*/
function siteCandidates(site: string): string[] {
const candidates = new Set<string>();
candidates.add(site);
candidates.add(site.replace(/-/g, ''));
candidates.add(site.replace(/-/g, '.'));
if (site.endsWith('-com')) {
candidates.add(`${site.slice(0, -4)}.com`);
}
if (site.endsWith('-org')) {
candidates.add(`${site.slice(0, -4)}.org`);
}
if (site.endsWith('-io')) {
candidates.add(`${site.slice(0, -3)}.io`);
}
return Array.from(candidates);
}
function extractHostname(url: string): string | null {
try {
return new URL(url).hostname.toLowerCase();
} catch {
return null;
}
}
export function matchSkillsForUrl(index: SkillsIndex, url: string, limit = 5): SkillEntry[] {
const hostname = extractHostname(url);
if (!hostname) return [];
const bySite = new Map<string, SkillEntry[]>();
for (const entry of index.entries) {
if (!bySite.has(entry.site)) bySite.set(entry.site, []);
bySite.get(entry.site)!.push(entry);
}
const matched: SkillEntry[] = [];
for (const [site, entries] of bySite) {
const candidates = siteCandidates(site);
const hit = candidates.some((c) => hostname === c || hostname.endsWith(`.${c}`) || hostname.includes(c));
if (hit) matched.push(...entries);
}
return matched.slice(0, limit);
}

View file

@ -18,6 +18,7 @@ import { composioAccountsRepo } from "../../composio/repo.js";
import { executeAction as executeComposioAction, isConfigured as isComposioConfigured, searchTools as searchComposioTools } from "../../composio/client.js";
import { CURATED_TOOLKITS, CURATED_TOOLKIT_SLUGS } from "@x/shared/dist/composio.js";
import { BrowserControlInputSchema, type BrowserControlInput } from "@x/shared/dist/browser-control.js";
import { ensureLoaded as ensureBrowserSkillsLoaded, readSkillContent as readBrowserSkillContent, refreshFromRemote as refreshBrowserSkills } from "../browser-skills/index.js";
import type { ToolContext } from "./exec-tool.js";
import { generateText } from "ai";
import { createProvider } from "../../models/models.js";
@ -994,6 +995,147 @@ export const BuiltinTools: z.infer<typeof BuiltinToolsSchema> = {
},
},
// ============================================================================
// HTTP Fetch
// ============================================================================
'http-fetch': {
description: 'Make a plain HTTP request (GET/POST/etc.) and return the response. Use this for API calls that do not require a logged-in browser session. For authenticated requests that need the user\'s active browser cookies, use browser-control with action "eval" and call fetch() inside the page context instead.',
inputSchema: z.object({
url: z.string().url().describe('Absolute URL to fetch.'),
method: z.enum(['GET', 'POST', 'PUT', 'PATCH', 'DELETE', 'HEAD']).optional().describe('HTTP method. Defaults to GET.'),
headers: z.record(z.string(), z.string()).optional().describe('Request headers.'),
body: z.string().optional().describe('Request body as a string. For JSON, stringify first and set Content-Type: application/json.'),
responseType: z.enum(['text', 'json']).optional().describe('How to parse the response body. Defaults to text.'),
timeoutMs: z.number().int().positive().max(60000).optional().describe('Request timeout in milliseconds. Defaults to 15000.'),
}),
execute: async (input: {
url: string;
method?: 'GET' | 'POST' | 'PUT' | 'PATCH' | 'DELETE' | 'HEAD';
headers?: Record<string, string>;
body?: string;
responseType?: 'text' | 'json';
timeoutMs?: number;
}) => {
const MAX_BODY_BYTES = 500_000;
const timeout = input.timeoutMs ?? 15000;
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), timeout);
try {
const response = await fetch(input.url, {
method: input.method ?? 'GET',
headers: input.headers,
body: input.body,
signal: controller.signal,
redirect: 'follow',
});
const responseHeaders: Record<string, string> = {};
response.headers.forEach((value, key) => { responseHeaders[key] = value; });
const rawText = await response.text();
const truncated = rawText.length > MAX_BODY_BYTES;
const text = truncated ? rawText.slice(0, MAX_BODY_BYTES) : rawText;
let parsed: unknown = undefined;
if (input.responseType === 'json') {
try {
parsed = JSON.parse(rawText);
} catch (err) {
return {
success: false,
status: response.status,
statusText: response.statusText,
url: response.url,
headers: responseHeaders,
error: `Response was not valid JSON: ${err instanceof Error ? err.message : 'parse error'}`,
bodyPreview: text.slice(0, 2000),
};
}
}
return {
success: response.ok,
status: response.status,
statusText: response.statusText,
url: response.url,
headers: responseHeaders,
body: input.responseType === 'json' ? parsed : text,
truncated,
};
} catch (error) {
return {
success: false,
error: error instanceof Error ? error.message : 'HTTP fetch failed.',
aborted: controller.signal.aborted,
};
} finally {
clearTimeout(timer);
}
},
},
// ============================================================================
// Browser Skills (browser-use/browser-harness domain-skills cache)
// ============================================================================
'load-browser-skill': {
description: 'Load a site-specific browser skill (from the browser-use/browser-harness domain-skills library) by id. Returns the full markdown content with selectors, gotchas, and recipes for the target site. Call this after browser-control responses surface a matching skill in suggestedSkills. Pass action="list" to see all available skills. Skills are fetched on first use and cached locally; pass action="refresh" to force an update from upstream.',
inputSchema: z.object({
action: z.enum(['load', 'list', 'refresh']).optional().describe('load: fetch a skill by id (default). list: list all cached skills. refresh: re-fetch the library from upstream.'),
id: z.string().optional().describe('Skill id (e.g., "github/repo-actions") — required for load.'),
site: z.string().optional().describe('Filter list results to a single site (e.g., "github").'),
}),
execute: async (input: { action?: 'load' | 'list' | 'refresh'; id?: string; site?: string }) => {
const action = input.action ?? 'load';
try {
if (action === 'refresh') {
const index = await refreshBrowserSkills();
return {
success: true,
message: `Refreshed ${index.entries.length} skill${index.entries.length === 1 ? '' : 's'} from upstream.`,
count: index.entries.length,
treeSha: index.treeSha,
};
}
if (action === 'list') {
const status = await ensureBrowserSkillsLoaded();
if (status.status === 'error') {
return { success: false, error: status.error };
}
if (status.status === 'empty') {
return { success: false, error: 'No browser skills cached yet.' };
}
const entries = status.index.entries
.filter((e) => !input.site || e.site === input.site)
.map((e) => ({ id: e.id, title: e.title, site: e.site }));
return {
success: true,
count: entries.length,
skills: entries,
cacheAgeMs: Date.now() - status.index.fetchedAt,
refreshing: status.status === 'stale' ? status.refreshing : false,
};
}
if (!input.id) {
return { success: false, error: 'id is required for load.' };
}
const result = await readBrowserSkillContent(input.id);
if (!result.ok) {
return { success: false, error: result.error };
}
return {
success: true,
id: result.entry.id,
title: result.entry.title,
site: result.entry.site,
path: result.entry.path,
content: result.content,
};
} catch (err) {
return { success: false, error: err instanceof Error ? err.message : 'Failed to load browser skill.' };
}
},
},
// ============================================================================
// Browser Control
// ============================================================================

View file

@ -51,6 +51,7 @@ export const BrowserControlActionSchema = z.enum([
'press',
'scroll',
'wait',
'eval',
]);
const BrowserElementTargetFields = {
@ -70,6 +71,7 @@ export const BrowserControlInputSchema = z.object({
ms: z.number().int().positive().max(30000).optional(),
maxElements: z.number().int().positive().max(100).optional(),
maxTextLength: z.number().int().positive().max(20000).optional(),
code: z.string().min(1).max(50000).optional(),
...BrowserElementTargetFields,
}).strict().superRefine((value, ctx) => {
const needsElementTarget = value.action === 'click' || value.action === 'type';
@ -114,6 +116,20 @@ export const BrowserControlInputSchema = z.object({
message: 'Provide an element index or selector.',
});
}
if (value.action === 'eval' && !value.code) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
path: ['code'],
message: 'code is required for eval.',
});
}
});
export const SuggestedBrowserSkillSchema = z.object({
id: z.string(),
title: z.string(),
path: z.string(),
});
export const BrowserControlResultSchema = z.object({
@ -123,6 +139,8 @@ export const BrowserControlResultSchema = z.object({
error: z.string().optional(),
browser: BrowserStateSchema,
page: BrowserPageSnapshotSchema.optional(),
result: z.unknown().optional(),
suggestedSkills: z.array(SuggestedBrowserSkillSchema).optional(),
});
export type BrowserTabState = z.infer<typeof BrowserTabStateSchema>;
@ -132,3 +150,4 @@ export type BrowserPageSnapshot = z.infer<typeof BrowserPageSnapshotSchema>;
export type BrowserControlAction = z.infer<typeof BrowserControlActionSchema>;
export type BrowserControlInput = z.infer<typeof BrowserControlInputSchema>;
export type BrowserControlResult = z.infer<typeof BrowserControlResultSchema>;
export type SuggestedBrowserSkill = z.infer<typeof SuggestedBrowserSkillSchema>;