mirror of
https://github.com/rowboatlabs/rowboat.git
synced 2026-04-25 00:16:29 +02:00
Merge 684a45b9b6 into 0bb256879c
This commit is contained in:
commit
d0df50eb50
8 changed files with 551 additions and 6 deletions
|
|
@ -1,8 +1,24 @@
|
|||
import type { IBrowserControlService } from '@x/core/dist/application/browser-control/service.js';
|
||||
import type { BrowserControlAction, BrowserControlInput, BrowserControlResult } from '@x/shared/dist/browser-control.js';
|
||||
import type { BrowserControlAction, BrowserControlInput, BrowserControlResult, SuggestedBrowserSkill } from '@x/shared/dist/browser-control.js';
|
||||
import { ensureLoaded, matchSkillsForUrl } from '@x/core/dist/application/browser-skills/index.js';
|
||||
import { browserViewManager } from './view.js';
|
||||
import { normalizeNavigationTarget } from './navigation.js';
|
||||
|
||||
async function getSuggestedSkills(url: string | undefined): Promise<SuggestedBrowserSkill[] | undefined> {
|
||||
if (!url) return undefined;
|
||||
try {
|
||||
const status = await ensureLoaded();
|
||||
if (status.status === 'ready' || status.status === 'stale') {
|
||||
const matched = matchSkillsForUrl(status.index, url);
|
||||
if (matched.length === 0) return undefined;
|
||||
return matched.map((e) => ({ id: e.id, title: e.title, path: e.path }));
|
||||
}
|
||||
} catch (err) {
|
||||
console.warn('[browser-control] suggestedSkills lookup failed:', err);
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function buildSuccessResult(
|
||||
action: BrowserControlAction,
|
||||
message: string,
|
||||
|
|
@ -52,11 +68,13 @@ export class ElectronBrowserControlService implements IBrowserControlService {
|
|||
}
|
||||
await browserViewManager.ensureActiveTabReady(signal);
|
||||
const page = await browserViewManager.readPageSummary(signal, { waitForReady: false }) ?? undefined;
|
||||
return buildSuccessResult(
|
||||
const suggestedSkills = await getSuggestedSkills(page?.url);
|
||||
const success = buildSuccessResult(
|
||||
'new-tab',
|
||||
target ? `Opened a new tab for ${target}.` : 'Opened a new tab.',
|
||||
page,
|
||||
);
|
||||
return suggestedSkills ? { ...success, suggestedSkills } : success;
|
||||
}
|
||||
|
||||
case 'switch-tab': {
|
||||
|
|
@ -99,7 +117,9 @@ export class ElectronBrowserControlService implements IBrowserControlService {
|
|||
}
|
||||
await browserViewManager.ensureActiveTabReady(signal);
|
||||
const page = await browserViewManager.readPageSummary(signal, { waitForReady: false }) ?? undefined;
|
||||
return buildSuccessResult('navigate', `Navigated to ${target}.`, page);
|
||||
const suggestedSkills = await getSuggestedSkills(page?.url);
|
||||
const success = buildSuccessResult('navigate', `Navigated to ${target}.`, page);
|
||||
return suggestedSkills ? { ...success, suggestedSkills } : success;
|
||||
}
|
||||
|
||||
case 'back': {
|
||||
|
|
@ -140,7 +160,9 @@ export class ElectronBrowserControlService implements IBrowserControlService {
|
|||
if (!result.ok || !result.page) {
|
||||
return buildErrorResult('read-page', result.error ?? 'Failed to read the current page.');
|
||||
}
|
||||
return buildSuccessResult('read-page', 'Read the current page.', result.page);
|
||||
const suggestedSkills = await getSuggestedSkills(result.page.url);
|
||||
const success = buildSuccessResult('read-page', 'Read the current page.', result.page);
|
||||
return suggestedSkills ? { ...success, suggestedSkills } : success;
|
||||
}
|
||||
|
||||
case 'click': {
|
||||
|
|
@ -232,6 +254,20 @@ export class ElectronBrowserControlService implements IBrowserControlService {
|
|||
const page = await browserViewManager.readPageSummary(signal, { waitForReady: false }) ?? undefined;
|
||||
return buildSuccessResult('wait', `Waited ${duration}ms for the page to settle.`, page);
|
||||
}
|
||||
|
||||
case 'eval': {
|
||||
const code = input.code;
|
||||
if (!code) {
|
||||
return buildErrorResult('eval', 'code is required for eval.');
|
||||
}
|
||||
await browserViewManager.ensureActiveTabReady(signal);
|
||||
const result = await browserViewManager.executeScript(code, signal);
|
||||
if (!result.ok) {
|
||||
return buildErrorResult('eval', result.error);
|
||||
}
|
||||
const success = buildSuccessResult('eval', 'Evaluated script in the active tab.');
|
||||
return { ...success, result: result.result };
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
return buildErrorResult(
|
||||
|
|
|
|||
|
|
@ -78,6 +78,41 @@ function abortIfNeeded(signal?: AbortSignal): void {
|
|||
throw signal.reason instanceof Error ? signal.reason : new Error('Browser action aborted');
|
||||
}
|
||||
|
||||
const EVAL_RESULT_MAX_BYTES = 200_000;
|
||||
|
||||
function safeSerialize(value: unknown): unknown {
|
||||
const seen = new WeakSet<object>();
|
||||
const coerce = (v: unknown): unknown => {
|
||||
if (v === null || v === undefined) return v;
|
||||
const t = typeof v;
|
||||
if (t === 'string' || t === 'number' || t === 'boolean') return v;
|
||||
if (t === 'bigint') return (v as bigint).toString();
|
||||
if (t === 'function' || t === 'symbol') return `[${t}]`;
|
||||
if (typeof v === 'object') {
|
||||
if (seen.has(v as object)) return '[circular]';
|
||||
seen.add(v as object);
|
||||
if (Array.isArray(v)) return v.map(coerce);
|
||||
const out: Record<string, unknown> = {};
|
||||
for (const [k, val] of Object.entries(v as Record<string, unknown>)) {
|
||||
out[k] = coerce(val);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
return String(v);
|
||||
};
|
||||
|
||||
const coerced = coerce(value);
|
||||
try {
|
||||
const json = JSON.stringify(coerced);
|
||||
if (json && json.length > EVAL_RESULT_MAX_BYTES) {
|
||||
return { truncated: true, preview: json.slice(0, EVAL_RESULT_MAX_BYTES) };
|
||||
}
|
||||
} catch {
|
||||
return String(value);
|
||||
}
|
||||
return coerced;
|
||||
}
|
||||
|
||||
async function sleep(ms: number, signal?: AbortSignal): Promise<void> {
|
||||
if (ms <= 0) return;
|
||||
abortIfNeeded(signal);
|
||||
|
|
@ -778,6 +813,17 @@ export class BrowserViewManager extends EventEmitter {
|
|||
await this.waitForWebContentsSettle(activeTab, signal);
|
||||
}
|
||||
|
||||
async executeScript(code: string, signal?: AbortSignal): Promise<{ ok: true; result: unknown } | { ok: false; error: string }> {
|
||||
try {
|
||||
const wrapped = `(async () => { ${code} \n})()`;
|
||||
const raw = await this.executeOnActiveTab<unknown>(wrapped, signal);
|
||||
const serialized = safeSerialize(raw);
|
||||
return { ok: true, result: serialized };
|
||||
} catch (error) {
|
||||
return { ok: false, error: error instanceof Error ? error.message : 'Script evaluation failed.' };
|
||||
}
|
||||
}
|
||||
|
||||
getState(): BrowserState {
|
||||
return this.snapshotState();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,8 +14,10 @@ Use this skill when the user asks you to open a website, browse in-app, search t
|
|||
- page ` + "`url`" + ` and ` + "`title`" + `
|
||||
- visible page text
|
||||
- interactable elements with numbered ` + "`index`" + ` values
|
||||
4. Prefer acting on those numbered indices with ` + "`click`" + ` / ` + "`type`" + ` / ` + "`press`" + `.
|
||||
5. After each action, read the returned page snapshot before deciding the next step.
|
||||
- ` + "`suggestedSkills`" + ` — site-specific and interaction-specific skill hints for the current page
|
||||
4. **Always inspect ` + "`suggestedSkills`" + ` before acting.** If any skill in the list matches what the user asked for (site or task), call ` + "`load-browser-skill({ id: \"<id>\" })`" + ` *first*, read it in full, then plan your actions. These skills encode selectors, timing, and gotchas that would otherwise cost you several failed attempts to rediscover. If no skill matches, proceed — but do not skip this check.
|
||||
5. Prefer acting on those numbered indices with ` + "`click`" + ` / ` + "`type`" + ` / ` + "`press`" + `.
|
||||
6. After each action, read the returned page snapshot before deciding the next step — including re-checking ` + "`suggestedSkills`" + ` if the navigation landed you on a new domain.
|
||||
|
||||
## Actions
|
||||
|
||||
|
|
@ -92,12 +94,38 @@ Wait for the page to settle, useful after async UI changes.
|
|||
Parameters:
|
||||
- ` + "`ms`" + `: milliseconds to wait (optional)
|
||||
|
||||
### eval
|
||||
Run arbitrary JavaScript in the active tab and return its value. Use this as an escape hatch when the structured actions above are insufficient — for example, submitting a form (` + "`form.submit()`" + `), reading DOM state (` + "`document.querySelector(...).textContent`" + `), or computing something that requires page-scoped APIs.
|
||||
|
||||
Parameters:
|
||||
- ` + "`code`" + `: JavaScript source. The code runs inside an ` + "`async`" + ` IIFE, so you can ` + "`await`" + ` freely. The final expression's value (or a ` + "`return`" + `ed value) is serialized back. Non-serializable values (DOM nodes, functions) are coerced to placeholder strings. Large results are truncated.
|
||||
|
||||
Example:
|
||||
- ` + "`{ action: \"eval\", code: \"return document.querySelector('meta[name=user-login]')?.content ?? null\" }`" + `
|
||||
|
||||
Security: ` + "`eval`" + ` runs in the active tab's origin with the user's cookies. Do not exfiltrate credentials, cookies, or localStorage contents to third-party origins.
|
||||
|
||||
## Companion Tools
|
||||
|
||||
### http-fetch
|
||||
Use for **unauthenticated** API calls (e.g., ` + "`api.github.com`" + `, public REST endpoints) where you don't need the browser's logged-in session. Often faster and cleaner than DOM scraping — many sites expose a public API that returns the same data. For authenticated requests that require the user's browser cookies, use ` + "`browser-control`" + ` with ` + "`action: \"eval\"`" + ` and call ` + "`fetch()`" + ` inside the page context instead.
|
||||
|
||||
### load-browser-skill
|
||||
Rowboat caches a library of browser skills (from ` + "`browser-use/browser-harness`" + `) indexed by both **domain** (github, linkedin, amazon, booking, …) and **interaction type** within a domain (e.g. ` + "`github/repo-actions`" + `, ` + "`github/scraping`" + `, ` + "`arxiv-bulk/*`" + `). Whenever ` + "`browser-control`" + ` returns a ` + "`suggestedSkills`" + ` array — which it does on ` + "`navigate`" + `, ` + "`new-tab`" + `, and ` + "`read-page`" + ` — treat it as a required reading step, not optional. Pick the entry that matches the current task (domain match first, then the interaction-specific variant if one exists) and call ` + "`load-browser-skill({ id: \"<id>\" })`" + ` before attempting the action.
|
||||
|
||||
You can also proactively call ` + "`load-browser-skill({ action: \"list\", site: \"<site>\" })`" + ` when you know you're about to work on a site, to see what skills exist even if ` + "`suggestedSkills`" + ` is empty (e.g. before navigating).
|
||||
|
||||
These skills are written against a Python harness, so treat them as **reference knowledge** — adapt the recipes into the actions above (especially ` + "`eval`" + ` + ` + "`http-fetch`" + ` for the ` + "`js(...)`" + ` and ` + "`http_get(...)`" + ` calls they use). The selectors, DOM gotchas, and sequencing are the durable part; the exact function names are not.
|
||||
|
||||
## Important Rules
|
||||
|
||||
- Prefer ` + "`read-page`" + ` before interacting.
|
||||
- Prefer element ` + "`index`" + ` over CSS selectors.
|
||||
- If the tool says the snapshot is stale, call ` + "`read-page`" + ` again.
|
||||
- After navigation, clicking, typing, pressing, or scrolling, use the returned page snapshot instead of assuming the page state.
|
||||
- **Always check ` + "`suggestedSkills`" + ` after ` + "`navigate`" + `, ` + "`new-tab`" + `, or ` + "`read-page`" + `, and load the matching domain or interaction skill before acting.** Skipping this step is the single most common way to waste a dozen failed clicks on a site whose quirks are already documented. If the array is empty, proceed normally — but don't skip the check.
|
||||
- Prefer structured actions (click/type/press) over ` + "`eval`" + ` when both work. Reach for ` + "`eval`" + ` when the site fights synthetic events, when you need to submit a form directly, or when you need to read DOM state the structured actions don't surface.
|
||||
- For read-only data, check if ` + "`http-fetch`" + ` against the site's public API works before scraping the DOM.
|
||||
- Use Rowboat's browser for live interaction. Use web search tools for research where a live session is unnecessary.
|
||||
- Do not wrap browser URLs or browser pages in ` + "```filepath" + ` blocks. Filepath cards are only for real files on disk, not web pages or browser tabs.
|
||||
- If you mention a page the browser opened, use plain text for the URL/title instead of trying to create a clickable file card.
|
||||
|
|
|
|||
|
|
@ -0,0 +1,3 @@
|
|||
export { ensureLoaded, readSkillContent, refreshFromRemote } from './loader.js';
|
||||
export type { SkillEntry, SkillsIndex, LoaderStatus } from './loader.js';
|
||||
export { matchSkillsForUrl } from './matcher.js';
|
||||
215
apps/x/packages/core/src/application/browser-skills/loader.ts
Normal file
215
apps/x/packages/core/src/application/browser-skills/loader.ts
Normal file
|
|
@ -0,0 +1,215 @@
|
|||
import * as path from 'node:path';
|
||||
import * as fs from 'node:fs/promises';
|
||||
import { WorkDir } from '../../config/config.js';
|
||||
|
||||
const REPO_OWNER = 'browser-use';
|
||||
const REPO_NAME = 'browser-harness';
|
||||
const REPO_BRANCH = 'main';
|
||||
const DOMAIN_SKILLS_PREFIX = 'domain-skills/';
|
||||
|
||||
const MANIFEST_TTL_MS = 24 * 60 * 60 * 1000;
|
||||
const FETCH_TIMEOUT_MS = 20_000;
|
||||
|
||||
export type SkillEntry = {
|
||||
id: string; // e.g. "github/repo-actions"
|
||||
site: string; // e.g. "github"
|
||||
fileName: string; // e.g. "repo-actions.md"
|
||||
title: string; // first H1 from the markdown, or a derived title
|
||||
path: string; // relative repo path, e.g. "domain-skills/github/repo-actions.md"
|
||||
localPath: string; // absolute path on disk
|
||||
};
|
||||
|
||||
export type SkillsIndex = {
|
||||
fetchedAt: number;
|
||||
treeSha: string;
|
||||
entries: SkillEntry[];
|
||||
};
|
||||
|
||||
export type LoaderStatus =
|
||||
| { status: 'ready'; index: SkillsIndex }
|
||||
| { status: 'stale'; index: SkillsIndex; refreshing: boolean }
|
||||
| { status: 'empty' }
|
||||
| { status: 'error'; error: string };
|
||||
|
||||
const cacheRoot = () => path.join(WorkDir, 'cache', 'browser-skills');
|
||||
const skillsDir = () => path.join(cacheRoot(), 'domain-skills');
|
||||
const manifestPath = () => path.join(cacheRoot(), 'manifest.json');
|
||||
|
||||
async function ensureCacheDir(): Promise<void> {
|
||||
await fs.mkdir(skillsDir(), { recursive: true });
|
||||
}
|
||||
|
||||
async function readManifest(): Promise<SkillsIndex | null> {
|
||||
try {
|
||||
const raw = await fs.readFile(manifestPath(), 'utf8');
|
||||
const parsed = JSON.parse(raw) as SkillsIndex;
|
||||
if (!parsed.entries || !Array.isArray(parsed.entries)) return null;
|
||||
return parsed;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function writeManifest(index: SkillsIndex): Promise<void> {
|
||||
await ensureCacheDir();
|
||||
await fs.writeFile(manifestPath(), JSON.stringify(index, null, 2), 'utf8');
|
||||
}
|
||||
|
||||
function extractTitle(markdown: string, fallback: string): string {
|
||||
const match = markdown.match(/^#\s+(.+?)\s*$/m);
|
||||
if (match?.[1]) return match[1].trim();
|
||||
return fallback;
|
||||
}
|
||||
|
||||
async function fetchWithTimeout(url: string, init?: RequestInit): Promise<Response> {
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
|
||||
try {
|
||||
return await fetch(url, {
|
||||
...init,
|
||||
signal: controller.signal,
|
||||
headers: {
|
||||
'User-Agent': 'rowboat-browser-skills',
|
||||
Accept: 'application/vnd.github+json',
|
||||
...(init?.headers ?? {}),
|
||||
},
|
||||
});
|
||||
} finally {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
}
|
||||
|
||||
type GithubTreeNode = { path: string; type: string; sha: string };
|
||||
|
||||
async function fetchRepoTree(): Promise<{ treeSha: string; skillPaths: string[] }> {
|
||||
const branchUrl = `https://api.github.com/repos/${REPO_OWNER}/${REPO_NAME}/branches/${REPO_BRANCH}`;
|
||||
const branchRes = await fetchWithTimeout(branchUrl);
|
||||
if (!branchRes.ok) {
|
||||
throw new Error(`GitHub branch fetch failed: ${branchRes.status} ${branchRes.statusText}`);
|
||||
}
|
||||
const branch = (await branchRes.json()) as { commit: { commit: { tree: { sha: string } } } };
|
||||
const treeSha = branch.commit?.commit?.tree?.sha;
|
||||
if (!treeSha) throw new Error('Could not resolve tree SHA from branch response.');
|
||||
|
||||
const treeUrl = `https://api.github.com/repos/${REPO_OWNER}/${REPO_NAME}/git/trees/${treeSha}?recursive=1`;
|
||||
const treeRes = await fetchWithTimeout(treeUrl);
|
||||
if (!treeRes.ok) {
|
||||
throw new Error(`GitHub tree fetch failed: ${treeRes.status} ${treeRes.statusText}`);
|
||||
}
|
||||
const tree = (await treeRes.json()) as { tree: GithubTreeNode[]; truncated: boolean };
|
||||
|
||||
const skillPaths = tree.tree
|
||||
.filter((n) => n.type === 'blob' && n.path.startsWith(DOMAIN_SKILLS_PREFIX) && n.path.endsWith('.md'))
|
||||
.map((n) => n.path);
|
||||
|
||||
return { treeSha, skillPaths };
|
||||
}
|
||||
|
||||
async function fetchRawFile(repoPath: string): Promise<string> {
|
||||
const url = `https://raw.githubusercontent.com/${REPO_OWNER}/${REPO_NAME}/${REPO_BRANCH}/${repoPath}`;
|
||||
const res = await fetchWithTimeout(url, { headers: { Accept: 'text/plain' } });
|
||||
if (!res.ok) {
|
||||
throw new Error(`Raw file fetch failed for ${repoPath}: ${res.status} ${res.statusText}`);
|
||||
}
|
||||
return res.text();
|
||||
}
|
||||
|
||||
function parseRepoPath(repoPath: string): { id: string; site: string; fileName: string } | null {
|
||||
const rel = repoPath.slice(DOMAIN_SKILLS_PREFIX.length);
|
||||
const parts = rel.split('/');
|
||||
if (parts.length < 2) return null;
|
||||
const site = parts[0];
|
||||
const fileName = parts.slice(1).join('/');
|
||||
const id = rel.replace(/\.md$/, '');
|
||||
return { id, site, fileName };
|
||||
}
|
||||
|
||||
export async function refreshFromRemote(): Promise<SkillsIndex> {
|
||||
await ensureCacheDir();
|
||||
const { treeSha, skillPaths } = await fetchRepoTree();
|
||||
|
||||
const entries: SkillEntry[] = [];
|
||||
await Promise.all(skillPaths.map(async (repoPath) => {
|
||||
const parsed = parseRepoPath(repoPath);
|
||||
if (!parsed) return;
|
||||
try {
|
||||
const content = await fetchRawFile(repoPath);
|
||||
const localRel = path.join(parsed.site, parsed.fileName);
|
||||
const localPath = path.join(skillsDir(), localRel);
|
||||
await fs.mkdir(path.dirname(localPath), { recursive: true });
|
||||
await fs.writeFile(localPath, content, 'utf8');
|
||||
entries.push({
|
||||
id: parsed.id,
|
||||
site: parsed.site,
|
||||
fileName: parsed.fileName,
|
||||
title: extractTitle(content, parsed.id),
|
||||
path: repoPath,
|
||||
localPath,
|
||||
});
|
||||
} catch (err) {
|
||||
console.warn(`[browser-skills] Failed to fetch ${repoPath}:`, err);
|
||||
}
|
||||
}));
|
||||
|
||||
entries.sort((a, b) => a.id.localeCompare(b.id));
|
||||
|
||||
const index: SkillsIndex = {
|
||||
fetchedAt: Date.now(),
|
||||
treeSha,
|
||||
entries,
|
||||
};
|
||||
await writeManifest(index);
|
||||
return index;
|
||||
}
|
||||
|
||||
let inFlightRefresh: Promise<SkillsIndex> | null = null;
|
||||
|
||||
export async function ensureLoaded(options?: { forceRefresh?: boolean }): Promise<LoaderStatus> {
|
||||
try {
|
||||
const existing = await readManifest();
|
||||
const fresh = existing && Date.now() - existing.fetchedAt < MANIFEST_TTL_MS;
|
||||
|
||||
if (existing && fresh && !options?.forceRefresh) {
|
||||
return { status: 'ready', index: existing };
|
||||
}
|
||||
|
||||
if (existing && !options?.forceRefresh) {
|
||||
if (!inFlightRefresh) {
|
||||
inFlightRefresh = refreshFromRemote()
|
||||
.catch((err) => {
|
||||
console.warn('[browser-skills] Background refresh failed:', err);
|
||||
return existing;
|
||||
})
|
||||
.finally(() => { inFlightRefresh = null; });
|
||||
}
|
||||
return { status: 'stale', index: existing, refreshing: true };
|
||||
}
|
||||
|
||||
if (!inFlightRefresh) {
|
||||
inFlightRefresh = refreshFromRemote().finally(() => { inFlightRefresh = null; });
|
||||
}
|
||||
try {
|
||||
const index = await inFlightRefresh;
|
||||
return { status: 'ready', index };
|
||||
} catch (err) {
|
||||
return { status: 'error', error: err instanceof Error ? err.message : 'Failed to load skills.' };
|
||||
}
|
||||
} catch (err) {
|
||||
return { status: 'error', error: err instanceof Error ? err.message : 'Skill loader failed.' };
|
||||
}
|
||||
}
|
||||
|
||||
export async function readSkillContent(id: string): Promise<{ ok: true; content: string; entry: SkillEntry } | { ok: false; error: string }> {
|
||||
const status = await ensureLoaded();
|
||||
if (status.status === 'error' || status.status === 'empty') {
|
||||
return { ok: false, error: status.status === 'error' ? status.error : 'No skills cached yet.' };
|
||||
}
|
||||
const entry = status.index.entries.find((e) => e.id === id);
|
||||
if (!entry) return { ok: false, error: `Skill '${id}' not found.` };
|
||||
try {
|
||||
const content = await fs.readFile(entry.localPath, 'utf8');
|
||||
return { ok: true, content, entry };
|
||||
} catch (err) {
|
||||
return { ok: false, error: err instanceof Error ? err.message : 'Failed to read skill file.' };
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,56 @@
|
|||
import type { SkillEntry, SkillsIndex } from './loader.js';
|
||||
|
||||
/**
|
||||
* Map browser-harness `domain-skills/<site>/` folder names to hostname tokens we
|
||||
* match against the current tab's URL.
|
||||
*
|
||||
* Heuristic: for each site folder we generate candidate hostnames like
|
||||
* "booking-com" -> ["booking-com", "bookingcom", "booking.com"]
|
||||
* "github" -> ["github", "github.com"]
|
||||
* "dev-to" -> ["dev-to", "devto", "dev.to"]
|
||||
* Then we check whether any candidate is a substring of the tab hostname.
|
||||
*/
|
||||
function siteCandidates(site: string): string[] {
|
||||
const candidates = new Set<string>();
|
||||
candidates.add(site);
|
||||
candidates.add(site.replace(/-/g, ''));
|
||||
candidates.add(site.replace(/-/g, '.'));
|
||||
if (site.endsWith('-com')) {
|
||||
candidates.add(`${site.slice(0, -4)}.com`);
|
||||
}
|
||||
if (site.endsWith('-org')) {
|
||||
candidates.add(`${site.slice(0, -4)}.org`);
|
||||
}
|
||||
if (site.endsWith('-io')) {
|
||||
candidates.add(`${site.slice(0, -3)}.io`);
|
||||
}
|
||||
return Array.from(candidates);
|
||||
}
|
||||
|
||||
function extractHostname(url: string): string | null {
|
||||
try {
|
||||
return new URL(url).hostname.toLowerCase();
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export function matchSkillsForUrl(index: SkillsIndex, url: string, limit = 5): SkillEntry[] {
|
||||
const hostname = extractHostname(url);
|
||||
if (!hostname) return [];
|
||||
|
||||
const bySite = new Map<string, SkillEntry[]>();
|
||||
for (const entry of index.entries) {
|
||||
if (!bySite.has(entry.site)) bySite.set(entry.site, []);
|
||||
bySite.get(entry.site)!.push(entry);
|
||||
}
|
||||
|
||||
const matched: SkillEntry[] = [];
|
||||
for (const [site, entries] of bySite) {
|
||||
const candidates = siteCandidates(site);
|
||||
const hit = candidates.some((c) => hostname === c || hostname.endsWith(`.${c}`) || hostname.includes(c));
|
||||
if (hit) matched.push(...entries);
|
||||
}
|
||||
|
||||
return matched.slice(0, limit);
|
||||
}
|
||||
|
|
@ -18,6 +18,7 @@ import { composioAccountsRepo } from "../../composio/repo.js";
|
|||
import { executeAction as executeComposioAction, isConfigured as isComposioConfigured, searchTools as searchComposioTools } from "../../composio/client.js";
|
||||
import { CURATED_TOOLKITS, CURATED_TOOLKIT_SLUGS } from "@x/shared/dist/composio.js";
|
||||
import { BrowserControlInputSchema, type BrowserControlInput } from "@x/shared/dist/browser-control.js";
|
||||
import { ensureLoaded as ensureBrowserSkillsLoaded, readSkillContent as readBrowserSkillContent, refreshFromRemote as refreshBrowserSkills } from "../browser-skills/index.js";
|
||||
import type { ToolContext } from "./exec-tool.js";
|
||||
import { generateText } from "ai";
|
||||
import { createProvider } from "../../models/models.js";
|
||||
|
|
@ -994,6 +995,147 @@ export const BuiltinTools: z.infer<typeof BuiltinToolsSchema> = {
|
|||
},
|
||||
},
|
||||
|
||||
// ============================================================================
|
||||
// HTTP Fetch
|
||||
// ============================================================================
|
||||
|
||||
'http-fetch': {
|
||||
description: 'Make a plain HTTP request (GET/POST/etc.) and return the response. Use this for API calls that do not require a logged-in browser session. For authenticated requests that need the user\'s active browser cookies, use browser-control with action "eval" and call fetch() inside the page context instead.',
|
||||
inputSchema: z.object({
|
||||
url: z.string().url().describe('Absolute URL to fetch.'),
|
||||
method: z.enum(['GET', 'POST', 'PUT', 'PATCH', 'DELETE', 'HEAD']).optional().describe('HTTP method. Defaults to GET.'),
|
||||
headers: z.record(z.string(), z.string()).optional().describe('Request headers.'),
|
||||
body: z.string().optional().describe('Request body as a string. For JSON, stringify first and set Content-Type: application/json.'),
|
||||
responseType: z.enum(['text', 'json']).optional().describe('How to parse the response body. Defaults to text.'),
|
||||
timeoutMs: z.number().int().positive().max(60000).optional().describe('Request timeout in milliseconds. Defaults to 15000.'),
|
||||
}),
|
||||
execute: async (input: {
|
||||
url: string;
|
||||
method?: 'GET' | 'POST' | 'PUT' | 'PATCH' | 'DELETE' | 'HEAD';
|
||||
headers?: Record<string, string>;
|
||||
body?: string;
|
||||
responseType?: 'text' | 'json';
|
||||
timeoutMs?: number;
|
||||
}) => {
|
||||
const MAX_BODY_BYTES = 500_000;
|
||||
const timeout = input.timeoutMs ?? 15000;
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), timeout);
|
||||
try {
|
||||
const response = await fetch(input.url, {
|
||||
method: input.method ?? 'GET',
|
||||
headers: input.headers,
|
||||
body: input.body,
|
||||
signal: controller.signal,
|
||||
redirect: 'follow',
|
||||
});
|
||||
const responseHeaders: Record<string, string> = {};
|
||||
response.headers.forEach((value, key) => { responseHeaders[key] = value; });
|
||||
const rawText = await response.text();
|
||||
const truncated = rawText.length > MAX_BODY_BYTES;
|
||||
const text = truncated ? rawText.slice(0, MAX_BODY_BYTES) : rawText;
|
||||
let parsed: unknown = undefined;
|
||||
if (input.responseType === 'json') {
|
||||
try {
|
||||
parsed = JSON.parse(rawText);
|
||||
} catch (err) {
|
||||
return {
|
||||
success: false,
|
||||
status: response.status,
|
||||
statusText: response.statusText,
|
||||
url: response.url,
|
||||
headers: responseHeaders,
|
||||
error: `Response was not valid JSON: ${err instanceof Error ? err.message : 'parse error'}`,
|
||||
bodyPreview: text.slice(0, 2000),
|
||||
};
|
||||
}
|
||||
}
|
||||
return {
|
||||
success: response.ok,
|
||||
status: response.status,
|
||||
statusText: response.statusText,
|
||||
url: response.url,
|
||||
headers: responseHeaders,
|
||||
body: input.responseType === 'json' ? parsed : text,
|
||||
truncated,
|
||||
};
|
||||
} catch (error) {
|
||||
return {
|
||||
success: false,
|
||||
error: error instanceof Error ? error.message : 'HTTP fetch failed.',
|
||||
aborted: controller.signal.aborted,
|
||||
};
|
||||
} finally {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
},
|
||||
},
|
||||
|
||||
// ============================================================================
|
||||
// Browser Skills (browser-use/browser-harness domain-skills cache)
|
||||
// ============================================================================
|
||||
|
||||
'load-browser-skill': {
|
||||
description: 'Load a site-specific browser skill (from the browser-use/browser-harness domain-skills library) by id. Returns the full markdown content with selectors, gotchas, and recipes for the target site. Call this after browser-control responses surface a matching skill in suggestedSkills. Pass action="list" to see all available skills. Skills are fetched on first use and cached locally; pass action="refresh" to force an update from upstream.',
|
||||
inputSchema: z.object({
|
||||
action: z.enum(['load', 'list', 'refresh']).optional().describe('load: fetch a skill by id (default). list: list all cached skills. refresh: re-fetch the library from upstream.'),
|
||||
id: z.string().optional().describe('Skill id (e.g., "github/repo-actions") — required for load.'),
|
||||
site: z.string().optional().describe('Filter list results to a single site (e.g., "github").'),
|
||||
}),
|
||||
execute: async (input: { action?: 'load' | 'list' | 'refresh'; id?: string; site?: string }) => {
|
||||
const action = input.action ?? 'load';
|
||||
try {
|
||||
if (action === 'refresh') {
|
||||
const index = await refreshBrowserSkills();
|
||||
return {
|
||||
success: true,
|
||||
message: `Refreshed ${index.entries.length} skill${index.entries.length === 1 ? '' : 's'} from upstream.`,
|
||||
count: index.entries.length,
|
||||
treeSha: index.treeSha,
|
||||
};
|
||||
}
|
||||
|
||||
if (action === 'list') {
|
||||
const status = await ensureBrowserSkillsLoaded();
|
||||
if (status.status === 'error') {
|
||||
return { success: false, error: status.error };
|
||||
}
|
||||
if (status.status === 'empty') {
|
||||
return { success: false, error: 'No browser skills cached yet.' };
|
||||
}
|
||||
const entries = status.index.entries
|
||||
.filter((e) => !input.site || e.site === input.site)
|
||||
.map((e) => ({ id: e.id, title: e.title, site: e.site }));
|
||||
return {
|
||||
success: true,
|
||||
count: entries.length,
|
||||
skills: entries,
|
||||
cacheAgeMs: Date.now() - status.index.fetchedAt,
|
||||
refreshing: status.status === 'stale' ? status.refreshing : false,
|
||||
};
|
||||
}
|
||||
|
||||
if (!input.id) {
|
||||
return { success: false, error: 'id is required for load.' };
|
||||
}
|
||||
const result = await readBrowserSkillContent(input.id);
|
||||
if (!result.ok) {
|
||||
return { success: false, error: result.error };
|
||||
}
|
||||
return {
|
||||
success: true,
|
||||
id: result.entry.id,
|
||||
title: result.entry.title,
|
||||
site: result.entry.site,
|
||||
path: result.entry.path,
|
||||
content: result.content,
|
||||
};
|
||||
} catch (err) {
|
||||
return { success: false, error: err instanceof Error ? err.message : 'Failed to load browser skill.' };
|
||||
}
|
||||
},
|
||||
},
|
||||
|
||||
// ============================================================================
|
||||
// Browser Control
|
||||
// ============================================================================
|
||||
|
|
|
|||
|
|
@ -51,6 +51,7 @@ export const BrowserControlActionSchema = z.enum([
|
|||
'press',
|
||||
'scroll',
|
||||
'wait',
|
||||
'eval',
|
||||
]);
|
||||
|
||||
const BrowserElementTargetFields = {
|
||||
|
|
@ -70,6 +71,7 @@ export const BrowserControlInputSchema = z.object({
|
|||
ms: z.number().int().positive().max(30000).optional(),
|
||||
maxElements: z.number().int().positive().max(100).optional(),
|
||||
maxTextLength: z.number().int().positive().max(20000).optional(),
|
||||
code: z.string().min(1).max(50000).optional(),
|
||||
...BrowserElementTargetFields,
|
||||
}).strict().superRefine((value, ctx) => {
|
||||
const needsElementTarget = value.action === 'click' || value.action === 'type';
|
||||
|
|
@ -114,6 +116,20 @@ export const BrowserControlInputSchema = z.object({
|
|||
message: 'Provide an element index or selector.',
|
||||
});
|
||||
}
|
||||
|
||||
if (value.action === 'eval' && !value.code) {
|
||||
ctx.addIssue({
|
||||
code: z.ZodIssueCode.custom,
|
||||
path: ['code'],
|
||||
message: 'code is required for eval.',
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
export const SuggestedBrowserSkillSchema = z.object({
|
||||
id: z.string(),
|
||||
title: z.string(),
|
||||
path: z.string(),
|
||||
});
|
||||
|
||||
export const BrowserControlResultSchema = z.object({
|
||||
|
|
@ -123,6 +139,8 @@ export const BrowserControlResultSchema = z.object({
|
|||
error: z.string().optional(),
|
||||
browser: BrowserStateSchema,
|
||||
page: BrowserPageSnapshotSchema.optional(),
|
||||
result: z.unknown().optional(),
|
||||
suggestedSkills: z.array(SuggestedBrowserSkillSchema).optional(),
|
||||
});
|
||||
|
||||
export type BrowserTabState = z.infer<typeof BrowserTabStateSchema>;
|
||||
|
|
@ -132,3 +150,4 @@ export type BrowserPageSnapshot = z.infer<typeof BrowserPageSnapshotSchema>;
|
|||
export type BrowserControlAction = z.infer<typeof BrowserControlActionSchema>;
|
||||
export type BrowserControlInput = z.infer<typeof BrowserControlInputSchema>;
|
||||
export type BrowserControlResult = z.infer<typeof BrowserControlResultSchema>;
|
||||
export type SuggestedBrowserSkill = z.infer<typeof SuggestedBrowserSkillSchema>;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue