[package] name = "webclaw-core" description = "Pure HTML content extraction engine for LLMs" version.workspace = true edition.workspace = true license.workspace = true # Reddit regression fixtures are real old.reddit.com pages read at test time; # they're large and only needed to run the test suite from the repo, so keep # them out of the published crate. exclude = ["testdata/reddit/*.html"] [features] default = ["quickjs"] quickjs = ["rquickjs"] [dependencies] serde = { workspace = true } serde_json = { workspace = true } thiserror = { workspace = true } tracing = { workspace = true } scraper = "0.22" ego-tree = "0.10" url = { version = "2", features = ["serde"] } regex = "1" once_cell = "1" similar = "2" # rquickjs links a C library and cannot build for wasm32. Gating it per # target keeps the `quickjs` feature usable on native while leaving the # crate WASM-safe even with default features enabled. [target.'cfg(not(target_arch = "wasm32"))'.dependencies] rquickjs = { version = "0.9", features = ["classes", "properties"], optional = true } [dev-dependencies] tokio = { workspace = true }