mirror of
https://github.com/0xMassi/webclaw.git
synced 2026-06-27 03:19:38 +02:00
fix: harden LLM providers, UTF-8 handling, and webhook/batch reliability
- webclaw-llm: add explicit request + connect timeouts to the reqwest client in every provider (anthropic, openai, ollama) with a shorter timeout on the ollama health check, so a stalled provider fails fast. - webclaw-llm: fix a panic when truncating a provider error body that contains multibyte characters near the 500-char cut (char-safe take). - webclaw-core: snap the endpoint-scan budget cut to a UTF-8 char boundary so oversized scripts with non-ASCII content no longer panic. - webclaw-core: rewrite js_literal_to_json to copy raw bytes instead of `byte as char`, preserving multibyte UTF-8 in SvelteKit string values rather than producing Latin-1 mojibake. - webclaw-cli: have fire_webhook return its JoinHandle and await it at the crawl/batch/batch-llm call sites, removing the fixed 500ms sleeps. - webclaw-mcp: drop the up-front DNS pre-validation loop in batch that aborted the whole request on one bad URL; the fetch layer already applies the same SSRF guard per URL and reports per-URL errors. - webclaw-fetch: include the port in the warmup homepage URL so hosts on a non-default port are warmed correctly. Adds regression tests for the UTF-8 endpoint-scan and SvelteKit cases.
This commit is contained in:
parent
d0d7b835f2
commit
499345046c
9 changed files with 117 additions and 51 deletions
|
|
@ -801,11 +801,17 @@ fn is_challenge_html(html: &str) -> bool {
|
|||
false
|
||||
}
|
||||
|
||||
/// Extract the homepage URL (scheme + host) from a full URL.
|
||||
/// Extract the homepage URL (scheme + host[:port]) from a full URL.
|
||||
fn extract_homepage(url: &str) -> Option<String> {
|
||||
url::Url::parse(url)
|
||||
.ok()
|
||||
.map(|u| format!("{}://{}/", u.scheme(), u.host_str().unwrap_or("")))
|
||||
url::Url::parse(url).ok().map(|u| {
|
||||
let host = u.host_str().unwrap_or("");
|
||||
// `port()` is `Some` only for a non-default port; include it so a
|
||||
// host like example.com:8443 is warmed on the right port.
|
||||
match u.port() {
|
||||
Some(port) => format!("{}://{}:{}/", u.scheme(), host, port),
|
||||
None => format!("{}://{}/", u.scheme(), host),
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Convert a webclaw-pdf PdfResult into a webclaw-core ExtractionResult.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue