mirror of
https://github.com/0xMassi/webclaw.git
synced 2026-04-25 00:06:21 +02:00
`search_from = abs_pos + 1` landed mid-char when a rejected match started on a multi-byte UTF-8 character, panicking on the next `markdown[search_from..]` slice. Advance by `needle.len()` instead — always a valid char boundary, and skips the whole rejected match instead of re-scanning inside it. Repro: webclaw https://bruler.ru/about_brand -f json Before: panic "byte index 782 is not a char boundary; it is inside 'Ч'" After: extracts 2.3KB of clean Cyrillic markdown with 7 sections Two regression tests cover multi-byte rejected matches and all-rejected cycles in Cyrillic text. Closes #16 Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
095ae5d4b1
commit
7f0420bbf0
4 changed files with 49 additions and 8 deletions
12
Cargo.lock
generated
12
Cargo.lock
generated
|
|
@ -3102,7 +3102,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-cli"
|
||||
version = "0.3.17"
|
||||
version = "0.3.18"
|
||||
dependencies = [
|
||||
"clap",
|
||||
"dotenvy",
|
||||
|
|
@ -3123,7 +3123,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-core"
|
||||
version = "0.3.17"
|
||||
version = "0.3.18"
|
||||
dependencies = [
|
||||
"ego-tree",
|
||||
"once_cell",
|
||||
|
|
@ -3141,7 +3141,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-fetch"
|
||||
version = "0.3.17"
|
||||
version = "0.3.18"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"calamine",
|
||||
|
|
@ -3163,7 +3163,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-llm"
|
||||
version = "0.3.17"
|
||||
version = "0.3.18"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"reqwest",
|
||||
|
|
@ -3176,7 +3176,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-mcp"
|
||||
version = "0.3.17"
|
||||
version = "0.3.18"
|
||||
dependencies = [
|
||||
"dirs",
|
||||
"dotenvy",
|
||||
|
|
@ -3197,7 +3197,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-pdf"
|
||||
version = "0.3.17"
|
||||
version = "0.3.18"
|
||||
dependencies = [
|
||||
"pdf-extract",
|
||||
"thiserror",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue