mirror of
https://github.com/0xMassi/webclaw.git
synced 2026-04-25 00:06:21 +02:00
Wires the vertical extractor catalog into both the CLI and the MCP
server so users don't have to hit the HTTP API to invoke them. Same
semantics as `/v1/scrape/{vertical}` + `/v1/extractors`.
CLI (webclaw-cli):
- New subcommand `webclaw extractors` lists all 28 extractors with
name, label, and sample URL. `--json` flag emits the full catalog
as machine-readable JSON.
- New subcommand `webclaw vertical <name> <url>` runs a specific
extractor and prints typed JSON. Pretty-printed by default; `--raw`
for single-line. Exits 1 with a clear "URL does not match" error
on mismatch.
- FetchClient built with Firefox profile + cloud fallback attached
when WEBCLAW_API_KEY is set, so antibot-gated verticals escalate.
MCP (webclaw-mcp):
- New tool `list_extractors` (no args) returns the catalog as
pretty-printed JSON for in-session discovery.
- New tool `vertical_scrape` takes `{name, url}` and returns typed
JSON. Reuses the long-lived self.fetch_client.
- Tool count goes from 10 to 12. Server-info instruction string
updated accordingly.
Tests: 215 passing, clippy clean. Manual surface-tested end-to-end:
CLI prints real Reddit/github/pypi data; MCP JSON-RPC session returns
28-entry catalog + typed responses for pypi/requests + rust-lang/rust
in 200-400ms.
Version bumped to 0.5.2 (minor for API additions, backwards compatible).
24 lines
679 B
TOML
24 lines
679 B
TOML
[workspace]
|
|
resolver = "2"
|
|
members = ["crates/*"]
|
|
|
|
[workspace.package]
|
|
version = "0.5.2"
|
|
edition = "2024"
|
|
license = "AGPL-3.0"
|
|
repository = "https://github.com/0xMassi/webclaw"
|
|
|
|
[workspace.dependencies]
|
|
webclaw-core = { path = "crates/webclaw-core" }
|
|
webclaw-fetch = { path = "crates/webclaw-fetch" }
|
|
webclaw-llm = { path = "crates/webclaw-llm" }
|
|
webclaw-pdf = { path = "crates/webclaw-pdf" }
|
|
tokio = { version = "1", features = ["full"] }
|
|
serde = { version = "1", features = ["derive"] }
|
|
serde_json = "1"
|
|
thiserror = "2"
|
|
tracing = "0.1"
|
|
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
|
clap = { version = "4", features = ["derive", "env"] }
|
|
dotenvy = "0.15"
|
|
|