mirror of
https://github.com/0xMassi/webclaw.git
synced 2026-04-25 00:06:21 +02:00
Reddit's .json API rejects the wreq-Chrome TLS fingerprint with a 403 even from residential IPs. Their block list includes known browser-emulation library fingerprints. wreq-Firefox passes. The CLI `vertical` subcommand already forced Firefox; MCP `vertical_scrape` was still falling back to the long-lived `self.fetch_client` which defaults to Chrome, so reddit failed on MCP and nobody noticed because the earlier test runs all had an API key set that masked the issue. Switched vertical_scrape to reuse `self.firefox_or_build()` which gives us the cached Firefox client (same pattern the scrape tool uses when the caller requests `browser: firefox`). Firefox is strictly-safer-than-Chrome for every vertical in the catalog, so making it the hard default for `vertical_scrape` is the right call. Verified end-to-end from a clean shell with no WEBCLAW_API_KEY: - MCP reddit: 679ms, post/author/6 comments correct - MCP instagram_profile: 1157ms, 18471 followers No change to the `scrape` tool -- it keeps the user-selectable browser param. Bumps version to 0.5.3.
24 lines
679 B
TOML
24 lines
679 B
TOML
[workspace]
|
|
resolver = "2"
|
|
members = ["crates/*"]
|
|
|
|
[workspace.package]
|
|
version = "0.5.3"
|
|
edition = "2024"
|
|
license = "AGPL-3.0"
|
|
repository = "https://github.com/0xMassi/webclaw"
|
|
|
|
[workspace.dependencies]
|
|
webclaw-core = { path = "crates/webclaw-core" }
|
|
webclaw-fetch = { path = "crates/webclaw-fetch" }
|
|
webclaw-llm = { path = "crates/webclaw-llm" }
|
|
webclaw-pdf = { path = "crates/webclaw-pdf" }
|
|
tokio = { version = "1", features = ["full"] }
|
|
serde = { version = "1", features = ["derive"] }
|
|
serde_json = "1"
|
|
thiserror = "2"
|
|
tracing = "0.1"
|
|
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
|
clap = { version = "4", features = ["derive", "env"] }
|
|
dotenvy = "0.15"
|
|
|