fix: harden LLM providers, UTF-8 handling, and webhook/batch reliability

- webclaw-llm: add explicit request + connect timeouts to the reqwest
  client in every provider (anthropic, openai, ollama) with a shorter
  timeout on the ollama health check, so a stalled provider fails fast.
- webclaw-llm: fix a panic when truncating a provider error body that
  contains multibyte characters near the 500-char cut (char-safe take).
- webclaw-core: snap the endpoint-scan budget cut to a UTF-8 char
  boundary so oversized scripts with non-ASCII content no longer panic.
- webclaw-core: rewrite js_literal_to_json to copy raw bytes instead of
  `byte as char`, preserving multibyte UTF-8 in SvelteKit string values
  rather than producing Latin-1 mojibake.
- webclaw-cli: have fire_webhook return its JoinHandle and await it at
  the crawl/batch/batch-llm call sites, removing the fixed 500ms sleeps.
- webclaw-mcp: drop the up-front DNS pre-validation loop in batch that
  aborted the whole request on one bad URL; the fetch layer already
  applies the same SSRF guard per URL and reports per-URL errors.
- webclaw-fetch: include the port in the warmup homepage URL so hosts
  on a non-default port are warmed correctly.

Adds regression tests for the UTF-8 endpoint-scan and SvelteKit cases.
This commit is contained in:
Valerio 2026-06-09 21:10:15 +02:00
parent d0d7b835f2
commit 499345046c
9 changed files with 117 additions and 51 deletions

View file

@ -1,6 +1,8 @@
/// Anthropic provider — Claude models via api.anthropic.com.
/// Anthropic's API differs from OpenAI: system message is a top-level param,
/// not part of the messages array.
use std::time::Duration;
use async_trait::async_trait;
use serde_json::json;
@ -35,7 +37,11 @@ impl AnthropicProvider {
let key = load_api_key(key_override, "ANTHROPIC_API_KEY")?;
Some(Self {
client: reqwest::Client::new(),
client: reqwest::Client::builder()
.timeout(Duration::from_secs(120))
.connect_timeout(Duration::from_secs(10))
.build()
.unwrap_or_else(|_| reqwest::Client::new()),
key,
base_url: base_url
.or_else(|| std::env::var("ANTHROPIC_BASE_URL").ok())
@ -108,11 +114,7 @@ impl LlmProvider for AnthropicProvider {
if !resp.status().is_success() {
let status = resp.status();
let text = resp.text().await.unwrap_or_default();
let safe_text = if text.len() > 500 {
&text[..500]
} else {
&text
};
let safe_text = text.chars().take(500).collect::<String>();
return Err(LlmError::ProviderError(format!(
"anthropic returned {status}: {safe_text}"
)));

View file

@ -1,5 +1,7 @@
/// Ollama provider — talks to a local Ollama instance (default localhost:11434).
/// First choice in the provider chain: free, private, fast on Apple Silicon.
use std::time::Duration;
use async_trait::async_trait;
use serde_json::json;
@ -24,7 +26,11 @@ impl OllamaProvider {
.unwrap_or_else(|| "qwen3:8b".into());
Self {
client: reqwest::Client::new(),
client: reqwest::Client::builder()
.timeout(Duration::from_secs(120))
.connect_timeout(Duration::from_secs(10))
.build()
.unwrap_or_else(|_| reqwest::Client::new()),
base_url,
default_model,
}
@ -70,11 +76,7 @@ impl LlmProvider for OllamaProvider {
if !resp.status().is_success() {
let status = resp.status();
let text = resp.text().await.unwrap_or_default();
let safe_text = if text.len() > 500 {
&text[..500]
} else {
&text
};
let safe_text = text.chars().take(500).collect::<String>();
return Err(LlmError::ProviderError(format!(
"ollama returned {status}: {safe_text}"
)));
@ -98,7 +100,8 @@ impl LlmProvider for OllamaProvider {
async fn is_available(&self) -> bool {
let url = format!("{}/api/tags", self.base_url);
matches!(self.client.get(&url).send().await, Ok(r) if r.status().is_success())
let req = self.client.get(&url).timeout(Duration::from_secs(10));
matches!(req.send().await, Ok(r) if r.status().is_success())
}
fn name(&self) -> &str {

View file

@ -1,4 +1,6 @@
/// OpenAI provider — works with api.openai.com and any OpenAI-compatible endpoint.
use std::time::Duration;
use async_trait::async_trait;
use serde_json::json;
@ -69,7 +71,11 @@ impl OpenAiProvider {
let key = load_api_key(key_override, "OPENAI_API_KEY")?;
Some(Self {
client: reqwest::Client::new(),
client: reqwest::Client::builder()
.timeout(Duration::from_secs(120))
.connect_timeout(Duration::from_secs(10))
.build()
.unwrap_or_else(|_| reqwest::Client::new()),
key,
base_url: base_url
.or_else(|| std::env::var("OPENAI_BASE_URL").ok())
@ -132,11 +138,7 @@ impl LlmProvider for OpenAiProvider {
if !resp.status().is_success() {
let status = resp.status();
let text = resp.text().await.unwrap_or_default();
let safe_text = if text.len() > 500 {
&text[..500]
} else {
&text
};
let safe_text = text.chars().take(500).collect::<String>();
return Err(LlmError::ProviderError(format!(
"openai returned {status}: {safe_text}"
)));