mirror of
https://github.com/0xMassi/webclaw.git
synced 2026-06-11 22:55:13 +02:00
fix: harden LLM providers, UTF-8 handling, and webhook/batch reliability
- webclaw-llm: add explicit request + connect timeouts to the reqwest client in every provider (anthropic, openai, ollama) with a shorter timeout on the ollama health check, so a stalled provider fails fast. - webclaw-llm: fix a panic when truncating a provider error body that contains multibyte characters near the 500-char cut (char-safe take). - webclaw-core: snap the endpoint-scan budget cut to a UTF-8 char boundary so oversized scripts with non-ASCII content no longer panic. - webclaw-core: rewrite js_literal_to_json to copy raw bytes instead of `byte as char`, preserving multibyte UTF-8 in SvelteKit string values rather than producing Latin-1 mojibake. - webclaw-cli: have fire_webhook return its JoinHandle and await it at the crawl/batch/batch-llm call sites, removing the fixed 500ms sleeps. - webclaw-mcp: drop the up-front DNS pre-validation loop in batch that aborted the whole request on one bad URL; the fetch layer already applies the same SSRF guard per URL and reports per-URL errors. - webclaw-fetch: include the port in the warmup homepage URL so hosts on a non-default port are warmed correctly. Adds regression tests for the UTF-8 endpoint-scan and SvelteKit cases.
This commit is contained in:
parent
d0d7b835f2
commit
499345046c
9 changed files with 117 additions and 51 deletions
|
|
@ -1548,7 +1548,7 @@ async fn run_crawl(cli: &Cli) -> Result<(), String> {
|
|||
// Fire webhook on crawl complete
|
||||
if let Some(ref webhook_url) = cli.webhook {
|
||||
let urls: Vec<&str> = result.pages.iter().map(|p| p.url.as_str()).collect();
|
||||
fire_webhook(
|
||||
let handle = fire_webhook(
|
||||
webhook_url,
|
||||
&serde_json::json!({
|
||||
"event": "crawl_complete",
|
||||
|
|
@ -1559,8 +1559,8 @@ async fn run_crawl(cli: &Cli) -> Result<(), String> {
|
|||
"urls": urls,
|
||||
}),
|
||||
);
|
||||
// Brief pause so the async webhook has time to fire
|
||||
tokio::time::sleep(std::time::Duration::from_millis(500)).await;
|
||||
// Wait for the webhook to finish so the process doesn't exit mid-send.
|
||||
let _ = handle.await;
|
||||
}
|
||||
|
||||
if result.errors > 0 {
|
||||
|
|
@ -1658,7 +1658,7 @@ async fn run_batch(cli: &Cli, entries: &[(String, Option<String>)]) -> Result<()
|
|||
// Fire webhook on batch complete
|
||||
if let Some(ref webhook_url) = cli.webhook {
|
||||
let urls: Vec<&str> = results.iter().map(|r| r.url.as_str()).collect();
|
||||
fire_webhook(
|
||||
let handle = fire_webhook(
|
||||
webhook_url,
|
||||
&serde_json::json!({
|
||||
"event": "batch_complete",
|
||||
|
|
@ -1668,7 +1668,7 @@ async fn run_batch(cli: &Cli, entries: &[(String, Option<String>)]) -> Result<()
|
|||
"urls": urls,
|
||||
}),
|
||||
);
|
||||
tokio::time::sleep(std::time::Duration::from_millis(500)).await;
|
||||
let _ = handle.await;
|
||||
}
|
||||
|
||||
if errors > 0 {
|
||||
|
|
@ -1742,9 +1742,12 @@ async fn spawn_on_change(cmd: &str, stdin_payload: &[u8]) {
|
|||
}
|
||||
}
|
||||
|
||||
/// Fire a webhook POST with a JSON payload. Non-blocking — errors logged to stderr.
|
||||
/// Auto-detects Discord and Slack webhook URLs and wraps the payload accordingly.
|
||||
fn fire_webhook(url: &str, payload: &serde_json::Value) {
|
||||
/// Fire a webhook POST with a JSON payload. Spawns the send on a background task
|
||||
/// and returns its `JoinHandle` so callers that need delivery (e.g. one-shot
|
||||
/// crawl/batch runs that exit immediately after) can `.await` it; long-running
|
||||
/// loops can drop the handle and let it run fire-and-forget. Errors are logged
|
||||
/// to stderr. Auto-detects Discord and Slack webhook URLs and wraps the payload.
|
||||
fn fire_webhook(url: &str, payload: &serde_json::Value) -> tokio::task::JoinHandle<()> {
|
||||
let url = url.to_string();
|
||||
let is_discord = url.contains("discord.com/api/webhooks");
|
||||
let is_slack = url.contains("hooks.slack.com");
|
||||
|
|
@ -1806,7 +1809,7 @@ fn fire_webhook(url: &str, payload: &serde_json::Value) {
|
|||
},
|
||||
Err(e) => eprintln!("[webhook] client error: {e}"),
|
||||
}
|
||||
});
|
||||
})
|
||||
}
|
||||
|
||||
async fn run_watch(cli: &Cli, urls: &[String]) -> Result<(), String> {
|
||||
|
|
@ -2318,7 +2321,7 @@ async fn run_batch_llm(cli: &Cli, entries: &[(String, Option<String>)]) -> Resul
|
|||
eprintln!("Processed {total} URLs ({ok} ok, {errors} errors)");
|
||||
|
||||
if let Some(ref webhook_url) = cli.webhook {
|
||||
fire_webhook(
|
||||
let handle = fire_webhook(
|
||||
webhook_url,
|
||||
&serde_json::json!({
|
||||
"event": "batch_llm_complete",
|
||||
|
|
@ -2327,7 +2330,7 @@ async fn run_batch_llm(cli: &Cli, entries: &[(String, Option<String>)]) -> Resul
|
|||
"errors": errors,
|
||||
}),
|
||||
);
|
||||
tokio::time::sleep(std::time::Duration::from_millis(500)).await;
|
||||
let _ = handle.await;
|
||||
}
|
||||
|
||||
if errors > 0 {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue