mirror of
https://github.com/0xMassi/webclaw.git
synced 2026-04-25 00:06:21 +02:00
Replaces the previous benchmarks/README.md, which claimed specific numbers (94.2% accuracy, 0.8ms extraction, 97% Cloudflare bypass, etc.) with no reproducing code committed to the repo. The `webclaw-bench` crate and `benchmarks/fixtures`, `benchmarks/ground-truth` directories it referenced never existed. This is what #18 was calling out. New benchmarks/ is fully reproducible. Every number ships with the script that produced it. `./benchmarks/run.sh` regenerates everything. Results (18 sites, 90 hand-curated facts, median of 3 runs, webclaw 0.3.18, cl100k_base tokenizer): tool reduction_mean fidelity latency_mean webclaw 92.5% 76/90 (84.4%) 0.41s firecrawl 92.4% 70/90 (77.8%) 0.99s trafilatura 97.8% 45/90 (50.0%) 0.21s webclaw matches or beats both competitors on fidelity on all 18 sites while running 2.4x faster than Firecrawl's hosted API. Includes: - README.md — headline table + per-site breakdown - methodology.md — tokenizer, fact selection, run rationale - sites.txt — 18 canonical URLs - facts.json — 90 curated facts (PRs welcome to add sites) - scripts/bench.py — the runner - results/2026-04-17.json — today's raw data, median of 3 runs - run.sh — one-command reproduction Closes #18 Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
397 lines
No EOL
8.5 KiB
JSON
397 lines
No EOL
8.5 KiB
JSON
{
|
|
"timestamp": "2026-04-17 14:28:42",
|
|
"webclaw_version": "0.3.18",
|
|
"trafilatura_version": "2.0.0",
|
|
"tokenizer": "cl100k_base",
|
|
"runs_per_site": 3,
|
|
"site_count": 18,
|
|
"total_facts": 90,
|
|
"aggregates": {
|
|
"webclaw": {
|
|
"reduction_mean": 92.5,
|
|
"reduction_median": 97.8,
|
|
"facts_preserved": 76,
|
|
"total_facts": 90,
|
|
"fidelity_pct": 84.4,
|
|
"latency_mean": 0.41
|
|
},
|
|
"trafilatura": {
|
|
"reduction_mean": 97.8,
|
|
"reduction_median": 99.7,
|
|
"facts_preserved": 45,
|
|
"total_facts": 90,
|
|
"fidelity_pct": 50.0,
|
|
"latency_mean": 0.2
|
|
},
|
|
"firecrawl": {
|
|
"reduction_mean": 92.4,
|
|
"reduction_median": 96.2,
|
|
"facts_preserved": 70,
|
|
"total_facts": 90,
|
|
"fidelity_pct": 77.8,
|
|
"latency_mean": 0.99
|
|
}
|
|
},
|
|
"per_site": [
|
|
{
|
|
"url": "https://openai.com",
|
|
"facts_count": 5,
|
|
"raw_tokens": 170510,
|
|
"webclaw": {
|
|
"tokens_med": 1238,
|
|
"facts_med": 3,
|
|
"seconds_med": 0.49
|
|
},
|
|
"trafilatura": {
|
|
"tokens_med": 0,
|
|
"facts_med": 0,
|
|
"seconds_med": 0.12
|
|
},
|
|
"firecrawl": {
|
|
"tokens_med": 3139,
|
|
"facts_med": 2,
|
|
"seconds_med": 1.14
|
|
}
|
|
},
|
|
{
|
|
"url": "https://vercel.com",
|
|
"facts_count": 5,
|
|
"raw_tokens": 380172,
|
|
"webclaw": {
|
|
"tokens_med": 1076,
|
|
"facts_med": 3,
|
|
"seconds_med": 0.31
|
|
},
|
|
"trafilatura": {
|
|
"tokens_med": 585,
|
|
"facts_med": 3,
|
|
"seconds_med": 0.23
|
|
},
|
|
"firecrawl": {
|
|
"tokens_med": 4029,
|
|
"facts_med": 3,
|
|
"seconds_med": 0.99
|
|
}
|
|
},
|
|
{
|
|
"url": "https://anthropic.com",
|
|
"facts_count": 5,
|
|
"raw_tokens": 102911,
|
|
"webclaw": {
|
|
"tokens_med": 672,
|
|
"facts_med": 5,
|
|
"seconds_med": 0.31
|
|
},
|
|
"trafilatura": {
|
|
"tokens_med": 96,
|
|
"facts_med": 4,
|
|
"seconds_med": 0.21
|
|
},
|
|
"firecrawl": {
|
|
"tokens_med": 560,
|
|
"facts_med": 5,
|
|
"seconds_med": 0.81
|
|
}
|
|
},
|
|
{
|
|
"url": "https://www.notion.com",
|
|
"facts_count": 5,
|
|
"raw_tokens": 109312,
|
|
"webclaw": {
|
|
"tokens_med": 13416,
|
|
"facts_med": 5,
|
|
"seconds_med": 0.93
|
|
},
|
|
"trafilatura": {
|
|
"tokens_med": 91,
|
|
"facts_med": 2,
|
|
"seconds_med": 0.65
|
|
},
|
|
"firecrawl": {
|
|
"tokens_med": 5261,
|
|
"facts_med": 5,
|
|
"seconds_med": 0.99
|
|
}
|
|
},
|
|
{
|
|
"url": "https://stripe.com",
|
|
"facts_count": 5,
|
|
"raw_tokens": 243465,
|
|
"webclaw": {
|
|
"tokens_med": 81974,
|
|
"facts_med": 5,
|
|
"seconds_med": 0.71
|
|
},
|
|
"trafilatura": {
|
|
"tokens_med": 2418,
|
|
"facts_med": 0,
|
|
"seconds_med": 0.39
|
|
},
|
|
"firecrawl": {
|
|
"tokens_med": 8922,
|
|
"facts_med": 5,
|
|
"seconds_med": 1.04
|
|
}
|
|
},
|
|
{
|
|
"url": "https://tavily.com",
|
|
"facts_count": 5,
|
|
"raw_tokens": 29964,
|
|
"webclaw": {
|
|
"tokens_med": 1361,
|
|
"facts_med": 5,
|
|
"seconds_med": 0.33
|
|
},
|
|
"trafilatura": {
|
|
"tokens_med": 182,
|
|
"facts_med": 3,
|
|
"seconds_med": 0.18
|
|
},
|
|
"firecrawl": {
|
|
"tokens_med": 1969,
|
|
"facts_med": 4,
|
|
"seconds_med": 0.75
|
|
}
|
|
},
|
|
{
|
|
"url": "https://www.shopify.com",
|
|
"facts_count": 5,
|
|
"raw_tokens": 183738,
|
|
"webclaw": {
|
|
"tokens_med": 1939,
|
|
"facts_med": 3,
|
|
"seconds_med": 0.29
|
|
},
|
|
"trafilatura": {
|
|
"tokens_med": 595,
|
|
"facts_med": 3,
|
|
"seconds_med": 0.22
|
|
},
|
|
"firecrawl": {
|
|
"tokens_med": 5384,
|
|
"facts_med": 3,
|
|
"seconds_med": 0.98
|
|
}
|
|
},
|
|
{
|
|
"url": "https://docs.python.org/3/",
|
|
"facts_count": 5,
|
|
"raw_tokens": 5275,
|
|
"webclaw": {
|
|
"tokens_med": 689,
|
|
"facts_med": 4,
|
|
"seconds_med": 0.12
|
|
},
|
|
"trafilatura": {
|
|
"tokens_med": 347,
|
|
"facts_med": 4,
|
|
"seconds_med": 0.04
|
|
},
|
|
"firecrawl": {
|
|
"tokens_med": 1623,
|
|
"facts_med": 4,
|
|
"seconds_med": 0.79
|
|
}
|
|
},
|
|
{
|
|
"url": "https://react.dev",
|
|
"facts_count": 5,
|
|
"raw_tokens": 107406,
|
|
"webclaw": {
|
|
"tokens_med": 3332,
|
|
"facts_med": 5,
|
|
"seconds_med": 0.23
|
|
},
|
|
"trafilatura": {
|
|
"tokens_med": 763,
|
|
"facts_med": 3,
|
|
"seconds_med": 0.17
|
|
},
|
|
"firecrawl": {
|
|
"tokens_med": 4959,
|
|
"facts_med": 5,
|
|
"seconds_med": 0.92
|
|
}
|
|
},
|
|
{
|
|
"url": "https://tailwindcss.com/docs/installation",
|
|
"facts_count": 5,
|
|
"raw_tokens": 113258,
|
|
"webclaw": {
|
|
"tokens_med": 779,
|
|
"facts_med": 4,
|
|
"seconds_med": 0.27
|
|
},
|
|
"trafilatura": {
|
|
"tokens_med": 430,
|
|
"facts_med": 2,
|
|
"seconds_med": 0.2
|
|
},
|
|
"firecrawl": {
|
|
"tokens_med": 813,
|
|
"facts_med": 4,
|
|
"seconds_med": 1.02
|
|
}
|
|
},
|
|
{
|
|
"url": "https://nextjs.org/docs",
|
|
"facts_count": 5,
|
|
"raw_tokens": 228196,
|
|
"webclaw": {
|
|
"tokens_med": 968,
|
|
"facts_med": 4,
|
|
"seconds_med": 0.24
|
|
},
|
|
"trafilatura": {
|
|
"tokens_med": 631,
|
|
"facts_med": 4,
|
|
"seconds_med": 0.17
|
|
},
|
|
"firecrawl": {
|
|
"tokens_med": 885,
|
|
"facts_med": 4,
|
|
"seconds_med": 0.88
|
|
}
|
|
},
|
|
{
|
|
"url": "https://github.com",
|
|
"facts_count": 5,
|
|
"raw_tokens": 234232,
|
|
"webclaw": {
|
|
"tokens_med": 1438,
|
|
"facts_med": 5,
|
|
"seconds_med": 0.33
|
|
},
|
|
"trafilatura": {
|
|
"tokens_med": 486,
|
|
"facts_med": 3,
|
|
"seconds_med": 0.09
|
|
},
|
|
"firecrawl": {
|
|
"tokens_med": 3058,
|
|
"facts_med": 4,
|
|
"seconds_med": 0.92
|
|
}
|
|
},
|
|
{
|
|
"url": "https://en.wikipedia.org/wiki/Rust_(programming_language)",
|
|
"facts_count": 5,
|
|
"raw_tokens": 189406,
|
|
"webclaw": {
|
|
"tokens_med": 47823,
|
|
"facts_med": 5,
|
|
"seconds_med": 0.36
|
|
},
|
|
"trafilatura": {
|
|
"tokens_med": 37427,
|
|
"facts_med": 5,
|
|
"seconds_med": 0.28
|
|
},
|
|
"firecrawl": {
|
|
"tokens_med": 59326,
|
|
"facts_med": 5,
|
|
"seconds_med": 1.49
|
|
}
|
|
},
|
|
{
|
|
"url": "https://simonwillison.net/2026/Mar/15/latent-reasoning/",
|
|
"facts_count": 5,
|
|
"raw_tokens": 3212,
|
|
"webclaw": {
|
|
"tokens_med": 724,
|
|
"facts_med": 4,
|
|
"seconds_med": 0.12
|
|
},
|
|
"trafilatura": {
|
|
"tokens_med": 0,
|
|
"facts_med": 0,
|
|
"seconds_med": 0.03
|
|
},
|
|
"firecrawl": {
|
|
"tokens_med": 525,
|
|
"facts_med": 2,
|
|
"seconds_med": 0.89
|
|
}
|
|
},
|
|
{
|
|
"url": "https://paulgraham.com/essays.html",
|
|
"facts_count": 5,
|
|
"raw_tokens": 1786,
|
|
"webclaw": {
|
|
"tokens_med": 169,
|
|
"facts_med": 2,
|
|
"seconds_med": 0.9
|
|
},
|
|
"trafilatura": {
|
|
"tokens_med": 0,
|
|
"facts_med": 0,
|
|
"seconds_med": 0.22
|
|
},
|
|
"firecrawl": {
|
|
"tokens_med": 295,
|
|
"facts_med": 1,
|
|
"seconds_med": 0.71
|
|
}
|
|
},
|
|
{
|
|
"url": "https://techcrunch.com",
|
|
"facts_count": 5,
|
|
"raw_tokens": 143309,
|
|
"webclaw": {
|
|
"tokens_med": 7265,
|
|
"facts_med": 5,
|
|
"seconds_med": 0.25
|
|
},
|
|
"trafilatura": {
|
|
"tokens_med": 397,
|
|
"facts_med": 5,
|
|
"seconds_med": 0.2
|
|
},
|
|
"firecrawl": {
|
|
"tokens_med": 11408,
|
|
"facts_med": 5,
|
|
"seconds_med": 1.21
|
|
}
|
|
},
|
|
{
|
|
"url": "https://www.databricks.com",
|
|
"facts_count": 5,
|
|
"raw_tokens": 274051,
|
|
"webclaw": {
|
|
"tokens_med": 2001,
|
|
"facts_med": 4,
|
|
"seconds_med": 0.31
|
|
},
|
|
"trafilatura": {
|
|
"tokens_med": 311,
|
|
"facts_med": 4,
|
|
"seconds_med": 0.2
|
|
},
|
|
"firecrawl": {
|
|
"tokens_med": 5471,
|
|
"facts_med": 4,
|
|
"seconds_med": 1.34
|
|
}
|
|
},
|
|
{
|
|
"url": "https://www.hashicorp.com",
|
|
"facts_count": 5,
|
|
"raw_tokens": 108510,
|
|
"webclaw": {
|
|
"tokens_med": 1501,
|
|
"facts_med": 5,
|
|
"seconds_med": 0.91
|
|
},
|
|
"trafilatura": {
|
|
"tokens_med": 0,
|
|
"facts_med": 0,
|
|
"seconds_med": 0.03
|
|
},
|
|
"firecrawl": {
|
|
"tokens_med": 4289,
|
|
"facts_med": 5,
|
|
"seconds_med": 0.91
|
|
}
|
|
}
|
|
]
|
|
} |