mirror of
https://github.com/0xMassi/webclaw.git
synced 2026-06-30 03:49:37 +02:00
fix: harden LLM providers, UTF-8 handling, and webhook/batch reliability
- webclaw-llm: add explicit request + connect timeouts to the reqwest client in every provider (anthropic, openai, ollama) with a shorter timeout on the ollama health check, so a stalled provider fails fast. - webclaw-llm: fix a panic when truncating a provider error body that contains multibyte characters near the 500-char cut (char-safe take). - webclaw-core: snap the endpoint-scan budget cut to a UTF-8 char boundary so oversized scripts with non-ASCII content no longer panic. - webclaw-core: rewrite js_literal_to_json to copy raw bytes instead of `byte as char`, preserving multibyte UTF-8 in SvelteKit string values rather than producing Latin-1 mojibake. - webclaw-cli: have fire_webhook return its JoinHandle and await it at the crawl/batch/batch-llm call sites, removing the fixed 500ms sleeps. - webclaw-mcp: drop the up-front DNS pre-validation loop in batch that aborted the whole request on one bad URL; the fetch layer already applies the same SSRF guard per URL and reports per-URL errors. - webclaw-fetch: include the port in the warmup homepage URL so hosts on a non-default port are warmed correctly. Adds regression tests for the UTF-8 endpoint-scan and SvelteKit cases.
This commit is contained in:
parent
d0d7b835f2
commit
499345046c
9 changed files with 117 additions and 51 deletions
|
|
@ -1,6 +1,8 @@
|
|||
/// Anthropic provider — Claude models via api.anthropic.com.
|
||||
/// Anthropic's API differs from OpenAI: system message is a top-level param,
|
||||
/// not part of the messages array.
|
||||
use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use serde_json::json;
|
||||
|
||||
|
|
@ -35,7 +37,11 @@ impl AnthropicProvider {
|
|||
let key = load_api_key(key_override, "ANTHROPIC_API_KEY")?;
|
||||
|
||||
Some(Self {
|
||||
client: reqwest::Client::new(),
|
||||
client: reqwest::Client::builder()
|
||||
.timeout(Duration::from_secs(120))
|
||||
.connect_timeout(Duration::from_secs(10))
|
||||
.build()
|
||||
.unwrap_or_else(|_| reqwest::Client::new()),
|
||||
key,
|
||||
base_url: base_url
|
||||
.or_else(|| std::env::var("ANTHROPIC_BASE_URL").ok())
|
||||
|
|
@ -108,11 +114,7 @@ impl LlmProvider for AnthropicProvider {
|
|||
if !resp.status().is_success() {
|
||||
let status = resp.status();
|
||||
let text = resp.text().await.unwrap_or_default();
|
||||
let safe_text = if text.len() > 500 {
|
||||
&text[..500]
|
||||
} else {
|
||||
&text
|
||||
};
|
||||
let safe_text = text.chars().take(500).collect::<String>();
|
||||
return Err(LlmError::ProviderError(format!(
|
||||
"anthropic returned {status}: {safe_text}"
|
||||
)));
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
/// Ollama provider — talks to a local Ollama instance (default localhost:11434).
|
||||
/// First choice in the provider chain: free, private, fast on Apple Silicon.
|
||||
use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use serde_json::json;
|
||||
|
||||
|
|
@ -24,7 +26,11 @@ impl OllamaProvider {
|
|||
.unwrap_or_else(|| "qwen3:8b".into());
|
||||
|
||||
Self {
|
||||
client: reqwest::Client::new(),
|
||||
client: reqwest::Client::builder()
|
||||
.timeout(Duration::from_secs(120))
|
||||
.connect_timeout(Duration::from_secs(10))
|
||||
.build()
|
||||
.unwrap_or_else(|_| reqwest::Client::new()),
|
||||
base_url,
|
||||
default_model,
|
||||
}
|
||||
|
|
@ -70,11 +76,7 @@ impl LlmProvider for OllamaProvider {
|
|||
if !resp.status().is_success() {
|
||||
let status = resp.status();
|
||||
let text = resp.text().await.unwrap_or_default();
|
||||
let safe_text = if text.len() > 500 {
|
||||
&text[..500]
|
||||
} else {
|
||||
&text
|
||||
};
|
||||
let safe_text = text.chars().take(500).collect::<String>();
|
||||
return Err(LlmError::ProviderError(format!(
|
||||
"ollama returned {status}: {safe_text}"
|
||||
)));
|
||||
|
|
@ -98,7 +100,8 @@ impl LlmProvider for OllamaProvider {
|
|||
|
||||
async fn is_available(&self) -> bool {
|
||||
let url = format!("{}/api/tags", self.base_url);
|
||||
matches!(self.client.get(&url).send().await, Ok(r) if r.status().is_success())
|
||||
let req = self.client.get(&url).timeout(Duration::from_secs(10));
|
||||
matches!(req.send().await, Ok(r) if r.status().is_success())
|
||||
}
|
||||
|
||||
fn name(&self) -> &str {
|
||||
|
|
|
|||
|
|
@ -1,4 +1,6 @@
|
|||
/// OpenAI provider — works with api.openai.com and any OpenAI-compatible endpoint.
|
||||
use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use serde_json::json;
|
||||
|
||||
|
|
@ -69,7 +71,11 @@ impl OpenAiProvider {
|
|||
let key = load_api_key(key_override, "OPENAI_API_KEY")?;
|
||||
|
||||
Some(Self {
|
||||
client: reqwest::Client::new(),
|
||||
client: reqwest::Client::builder()
|
||||
.timeout(Duration::from_secs(120))
|
||||
.connect_timeout(Duration::from_secs(10))
|
||||
.build()
|
||||
.unwrap_or_else(|_| reqwest::Client::new()),
|
||||
key,
|
||||
base_url: base_url
|
||||
.or_else(|| std::env::var("OPENAI_BASE_URL").ok())
|
||||
|
|
@ -132,11 +138,7 @@ impl LlmProvider for OpenAiProvider {
|
|||
if !resp.status().is_success() {
|
||||
let status = resp.status();
|
||||
let text = resp.text().await.unwrap_or_default();
|
||||
let safe_text = if text.len() > 500 {
|
||||
&text[..500]
|
||||
} else {
|
||||
&text
|
||||
};
|
||||
let safe_text = text.chars().take(500).collect::<String>();
|
||||
return Err(LlmError::ProviderError(format!(
|
||||
"openai returned {status}: {safe_text}"
|
||||
)));
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue