mirror of
https://github.com/0xMassi/webclaw.git
synced 2026-04-25 00:06:21 +02:00
feat: add fallback sitemap paths for broader discovery
Try /sitemap_index.xml, /wp-sitemap.xml, and /sitemap/sitemap-index.xml after the standard /sitemap.xml. WordPress 5.5+ and many CMS platforms use non-standard paths that were previously missed. Paths found via robots.txt are deduplicated to avoid double-fetching. Bump to 0.3.11. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
25b6282d5f
commit
a4c351d5ae
4 changed files with 39 additions and 13 deletions
12
Cargo.lock
generated
12
Cargo.lock
generated
|
|
@ -3102,7 +3102,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-cli"
|
||||
version = "0.3.9"
|
||||
version = "0.3.11"
|
||||
dependencies = [
|
||||
"clap",
|
||||
"dotenvy",
|
||||
|
|
@ -3122,7 +3122,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-core"
|
||||
version = "0.3.9"
|
||||
version = "0.3.11"
|
||||
dependencies = [
|
||||
"ego-tree",
|
||||
"once_cell",
|
||||
|
|
@ -3140,7 +3140,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-fetch"
|
||||
version = "0.3.9"
|
||||
version = "0.3.11"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"calamine",
|
||||
|
|
@ -3162,7 +3162,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-llm"
|
||||
version = "0.3.9"
|
||||
version = "0.3.11"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"reqwest",
|
||||
|
|
@ -3175,7 +3175,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-mcp"
|
||||
version = "0.3.9"
|
||||
version = "0.3.11"
|
||||
dependencies = [
|
||||
"dirs",
|
||||
"dotenvy",
|
||||
|
|
@ -3196,7 +3196,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-pdf"
|
||||
version = "0.3.9"
|
||||
version = "0.3.11"
|
||||
dependencies = [
|
||||
"pdf-extract",
|
||||
"thiserror",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue