mirror of
https://github.com/0xMassi/webclaw.git
synced 2026-04-25 00:06:21 +02:00
Embeds QuickJS (rquickjs) to execute inline <script> tags and extract data hidden in JavaScript variable assignments. Captures window.__* objects like __preloadedData (NYTimes), __PRELOADED_STATE__ (Wired), and self.__next_f (Next.js RSC flight data). Results: - NYTimes: 1,552 → 4,162 words (+168%) - Wired: 1,459 → 9,937 words (+580%) - Zero measurable performance overhead (<15ms per page) - Feature-gated: disable with --no-default-features for WASM Smart text filtering rejects CSS, base64, file paths, code strings. Only readable prose is appended under "## Additional Content". Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
30 lines
1.1 KiB
TOML
30 lines
1.1 KiB
TOML
[workspace]
|
|
resolver = "2"
|
|
members = ["crates/*"]
|
|
|
|
[workspace.package]
|
|
version = "0.1.4"
|
|
edition = "2024"
|
|
license = "MIT"
|
|
repository = "https://github.com/0xMassi/webclaw"
|
|
|
|
[workspace.dependencies]
|
|
webclaw-core = { path = "crates/webclaw-core" }
|
|
webclaw-fetch = { path = "crates/webclaw-fetch" }
|
|
webclaw-llm = { path = "crates/webclaw-llm" }
|
|
webclaw-pdf = { path = "crates/webclaw-pdf" }
|
|
tokio = { version = "1", features = ["full"] }
|
|
serde = { version = "1", features = ["derive"] }
|
|
serde_json = "1"
|
|
thiserror = "2"
|
|
tracing = "0.1"
|
|
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
|
clap = { version = "4", features = ["derive", "env"] }
|
|
dotenvy = "0.15"
|
|
|
|
# primp requires patched forks with TLS impersonation support
|
|
[patch.crates-io]
|
|
rustls = { git = "https://github.com/deedy5/primp", subdirectory = "crates/primp-rustls/rustls" }
|
|
h2 = { git = "https://github.com/deedy5/primp", subdirectory = "crates/primp-h2" }
|
|
hyper = { git = "https://github.com/deedy5/primp", subdirectory = "crates/primp-hyper" }
|
|
hyper-util = { git = "https://github.com/deedy5/primp", subdirectory = "crates/primp-hyper-util" }
|