fix: clean llm output noise

Port the valid PR #43 LLM cleanup fixes onto current main without stale branch regressions.\n\nIncludes comment-count link cleanup, bare numeric paragraph cleanup, pagination leftover cleanup, JSON-LD article body scrubbing, clearer CLI consent-wall warnings, and quieter parser logs by default.\n\nThanks to @devnen for the report and patch work.
This commit is contained in:
Valerio 2026-05-18 18:39:33 +02:00 committed by GitHub
parent 5eef8358b0
commit 3fabdc1d02
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 348 additions and 18 deletions

14
Cargo.lock generated
View file

@ -3219,7 +3219,7 @@ dependencies = [
[[package]]
name = "webclaw-cli"
version = "0.6.1"
version = "0.6.2"
dependencies = [
"clap",
"dotenvy",
@ -3240,7 +3240,7 @@ dependencies = [
[[package]]
name = "webclaw-core"
version = "0.6.1"
version = "0.6.2"
dependencies = [
"ego-tree",
"once_cell",
@ -3258,7 +3258,7 @@ dependencies = [
[[package]]
name = "webclaw-fetch"
version = "0.6.1"
version = "0.6.2"
dependencies = [
"async-trait",
"bytes",
@ -3284,7 +3284,7 @@ dependencies = [
[[package]]
name = "webclaw-llm"
version = "0.6.1"
version = "0.6.2"
dependencies = [
"async-trait",
"reqwest",
@ -3297,7 +3297,7 @@ dependencies = [
[[package]]
name = "webclaw-mcp"
version = "0.6.1"
version = "0.6.2"
dependencies = [
"dirs",
"dotenvy",
@ -3317,7 +3317,7 @@ dependencies = [
[[package]]
name = "webclaw-pdf"
version = "0.6.1"
version = "0.6.2"
dependencies = [
"pdf-extract",
"thiserror",
@ -3326,7 +3326,7 @@ dependencies = [
[[package]]
name = "webclaw-server"
version = "0.6.1"
version = "0.6.2"
dependencies = [
"anyhow",
"axum",