diff --git a/CHANGELOG.md b/CHANGELOG.md index 1f0477d..96ed417 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,13 @@ All notable changes to webclaw are documented here. Format follows [Keep a Changelog](https://keepachangelog.com/). +## [0.3.15] — 2026-04-16 + +### Fixed +- **Batch/crawl no longer panics on semaphore close (P1).** Three `permit.acquire().await.expect("semaphore closed")` call sites in `webclaw-fetch` (`client::fetch_batch`, `client::fetch_and_extract_batch_with_options`, `crawler` inner loop) now surface a typed `FetchError::Build("semaphore closed before acquire")` or a failed `PageResult` instead of panicking the spawned task. Under normal operation nothing changes; under shutdown-race or adversarial runtime state, the caller sees one failed entry in the batch instead of losing the task silently to the runtime's panic handler. Surfaced by the 2026-04-16 workspace audit. + +--- + ## [0.3.14] — 2026-04-16 ### Security diff --git a/Cargo.lock b/Cargo.lock index 2d67588..4bf0ec4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3102,7 +3102,7 @@ dependencies = [ [[package]] name = "webclaw-cli" -version = "0.3.14" +version = "0.3.15" dependencies = [ "clap", "dotenvy", @@ -3123,7 +3123,7 @@ dependencies = [ [[package]] name = "webclaw-core" -version = "0.3.14" +version = "0.3.15" dependencies = [ "ego-tree", "once_cell", @@ -3141,7 +3141,7 @@ dependencies = [ [[package]] name = "webclaw-fetch" -version = "0.3.14" +version = "0.3.15" dependencies = [ "bytes", "calamine", @@ -3163,7 +3163,7 @@ dependencies = [ [[package]] name = "webclaw-llm" -version = "0.3.14" +version = "0.3.15" dependencies = [ "async-trait", "reqwest", @@ -3176,7 +3176,7 @@ dependencies = [ [[package]] name = "webclaw-mcp" -version = "0.3.14" +version = "0.3.15" dependencies = [ "dirs", "dotenvy", @@ -3197,7 +3197,7 @@ dependencies = [ [[package]] name = "webclaw-pdf" -version = "0.3.14" +version = "0.3.15" dependencies = [ "pdf-extract", "thiserror", diff --git a/Cargo.toml b/Cargo.toml index 4c2061c..97ead31 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ resolver = "2" members = ["crates/*"] [workspace.package] -version = "0.3.14" +version = "0.3.15" edition = "2024" license = "AGPL-3.0" repository = "https://github.com/0xMassi/webclaw" diff --git a/crates/webclaw-fetch/src/client.rs b/crates/webclaw-fetch/src/client.rs index f89343c..2bee533 100644 --- a/crates/webclaw-fetch/src/client.rs +++ b/crates/webclaw-fetch/src/client.rs @@ -390,8 +390,14 @@ impl FetchClient { let url = url.to_string(); handles.push(tokio::spawn(async move { - let _permit = permit.acquire().await.expect("semaphore closed"); - let result = client.fetch(&url).await; + // Don't panic if the semaphore has been closed under us + // (adversarial runtime state or shutdown race). Surface a + // typed error instead so the caller sees one failed URL in + // the batch instead of a silently-dropped task. + let result = match permit.acquire().await { + Ok(_permit) => client.fetch(&url).await, + Err(_) => Err(FetchError::Build("semaphore closed before acquire".into())), + }; (idx, BatchResult { url, result }) })); } @@ -430,8 +436,10 @@ impl FetchClient { let opts = options.clone(); handles.push(tokio::spawn(async move { - let _permit = permit.acquire().await.expect("semaphore closed"); - let result = client.fetch_and_extract_with_options(&url, &opts).await; + let result = match permit.acquire().await { + Ok(_permit) => client.fetch_and_extract_with_options(&url, &opts).await, + Err(_) => Err(FetchError::Build("semaphore closed before acquire".into())), + }; (idx, BatchExtractResult { url, result }) })); } diff --git a/crates/webclaw-fetch/src/crawler.rs b/crates/webclaw-fetch/src/crawler.rs index 74eb30f..bfb86a6 100644 --- a/crates/webclaw-fetch/src/crawler.rs +++ b/crates/webclaw-fetch/src/crawler.rs @@ -294,12 +294,27 @@ impl Crawler { let delay = self.config.delay; handles.push(tokio::spawn(async move { - // Acquire permit -- blocks if concurrency limit reached - let _permit = permit.acquire().await.expect("semaphore closed"); - tokio::time::sleep(delay).await; - + // Acquire permit -- blocks if concurrency limit reached. + // Surface semaphore-closed as a failed PageResult rather + // than panicking the spawned task and silently dropping + // it from the batch. let page_start = Instant::now(); - let result = client.fetch_and_extract(&url).await; + let result = match permit.acquire().await { + Ok(_permit) => { + tokio::time::sleep(delay).await; + client.fetch_and_extract(&url).await + } + Err(_) => { + warn!(url = %url, depth, "semaphore closed before acquire"); + return PageResult { + url, + depth, + extraction: None, + error: Some("semaphore closed before acquire".into()), + elapsed: page_start.elapsed(), + }; + } + }; let elapsed = page_start.elapsed(); match result {