webclaw/deny.toml
webclaw 02302e7a1d perf(core): hot-path extraction speedups + senior-grade hardening
Extraction ~22% faster on the corpus benchmark with byte-identical output:
- hoist recompiled CSS selectors in the markdown noise path
- single-pass shared og() meta parsing across vertical extractors
- output-safe QuickJS gating (skip the JS VM when no candidate data) +
  reuse the already-parsed document instead of re-parsing
- wreq connect_timeout + connection-pool tuning; dedup the retry loop

Reliability + correctness:
- char-boundary-safe truncation of LLM error bodies (shared helper)
- HTTP connect/read timeouts on all LLM provider clients
- isolate pdf-extract behind catch_unwind + spawn_blocking
- OSS server: crawl inherits the shared fetch profile; ProviderChain built
  once in AppState; request TimeoutLayer

API / safety / docs:
- #[non_exhaustive] on public enums + result structs (+ builders)
- #![forbid(unsafe_code)] on pure crates, deny on llm
- //! crate docs + doctests; scrub bypass/vendor/target specifics from
  public crate docs and comments

Tooling: [profile.release] lto/codegen-units/strip, MSRV pin, deny.toml +
cargo-deny CI, macOS test matrix. CLI main.rs split into focused modules.
2026-06-04 20:22:00 +02:00

59 lines
1.9 KiB
TOML

# cargo-deny configuration — supply-chain gate for the webclaw workspace.
# Run locally with `cargo deny check`; CI runs it via EmbarkStudios/cargo-deny-action.
#
# Scope of enforcement:
# advisories — fail on known RUSTSEC vulnerabilities / unmaintained crates
# bans — keep the dep tree lean and free of disallowed crates
# licenses — allow the AGPL-3.0 workspace plus permissive deps only
# sources — only crates.io and our own GitHub org
[graph]
# Evaluate all targets so a vuln gated behind a non-host platform still trips
# the gate. Keep this in sync with the platforms we actually ship.
all-features = true
[advisories]
version = 2
# Fail the build on any unfixed advisory by default (cargo-deny v2 errors on
# `vulnerability`/`unmaintained`/`unsound`/`yanked` unless explicitly ignored).
# Add specific RUSTSEC ids here with a justification only when a fix is not yet
# available upstream.
ignore = []
[bans]
# Warn (don't hard-fail) on duplicate versions of the same crate — common and
# usually benign in a tree this size; revisit if a duplicate becomes a problem.
multiple-versions = "warn"
wildcard-dependencies = "deny"
# Crates that must never enter the tree. Empty for now; this is where a banned
# transitive dep (e.g. an unmaintained TLS or crypto crate) would be listed.
deny = []
[licenses]
version = 2
# Permissive licenses we accept on dependencies, plus AGPL-3.0 for the
# workspace crates themselves. SPDX identifiers.
allow = [
"AGPL-3.0",
"MIT",
"Apache-2.0",
"Apache-2.0 WITH LLVM-exception",
"BSD-2-Clause",
"BSD-3-Clause",
"MPL-2.0",
"ISC",
"Unicode-3.0",
"Unicode-DFS-2016",
"Zlib",
"CC0-1.0",
]
# Crates with no SPDX expression in their manifest fail unless clarified here.
confidence-threshold = 0.8
[sources]
unknown-registry = "deny"
unknown-git = "deny"
allow-registry = ["https://github.com/rust-lang/crates.io-index"]
[sources.allow-org]
github = ["0xMassi"]