webclaw/crates/webclaw-core/Cargo.toml

[package]
name = "webclaw-core"
description = "Pure HTML content extraction engine for LLMs"
version.workspace = true
edition.workspace = true
license.workspace = true
# Reddit regression fixtures are real old.reddit.com pages read at test time;
# they're large and only needed to run the test suite from the repo, so keep
# them out of the published crate.
exclude = ["testdata/reddit/*.html"]

[features]
default = ["quickjs"]
quickjs = ["rquickjs"]

[dependencies]
serde = { workspace = true }
serde_json = { workspace = true }
thiserror = { workspace = true }
tracing = { workspace = true }
scraper = "0.22"
ego-tree = "0.10"
url = { version = "2", features = ["serde"] }
regex = "1"
once_cell = "1"
similar = "2"

# rquickjs links a C library and cannot build for wasm32. Gating it per
# target keeps the `quickjs` feature usable on native while leaving the
# crate WASM-safe even with default features enabled.
[target.'cfg(not(target_arch = "wasm32"))'.dependencies]
rquickjs = { version = "0.9", features = ["classes", "properties"], optional = true }

[dev-dependencies]
tokio = { workspace = true }