mirror of
https://github.com/0xMassi/webclaw.git
synced 2026-04-25 00:06:21 +02:00
fix(fetch): surface semaphore-closed as typed error instead of panic (P1) (#21)
Three call sites in webclaw-fetch used .expect("semaphore closed") on
`Semaphore::acquire()`. Under normal operation they never fire, but
under a shutdown race or adversarial runtime state the spawned task
would panic and be silently dropped from the batch / crawl run — the
caller would see fewer results than URLs with no indication why.
Rewritten to match on the acquire result:
- client::fetch_batch and client::fetch_and_extract_batch_with_options
now emit BatchResult/BatchExtractResult carrying
FetchError::Build("semaphore closed before acquire").
- crawler's inner loop emits a failed PageResult with the same error
string instead of panicking.
Behaviorally a no-op for the happy path. Fixes the silent-dropped-task
class of bug noted in the 2026-04-16 audit.
Version: 0.3.14 -> 0.3.15
CHANGELOG updated.
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
1352f48e05
commit
7773c8af2a
5 changed files with 46 additions and 16 deletions
|
|
@ -3,6 +3,13 @@
|
|||
All notable changes to webclaw are documented here.
|
||||
Format follows [Keep a Changelog](https://keepachangelog.com/).
|
||||
|
||||
## [0.3.15] — 2026-04-16
|
||||
|
||||
### Fixed
|
||||
- **Batch/crawl no longer panics on semaphore close (P1).** Three `permit.acquire().await.expect("semaphore closed")` call sites in `webclaw-fetch` (`client::fetch_batch`, `client::fetch_and_extract_batch_with_options`, `crawler` inner loop) now surface a typed `FetchError::Build("semaphore closed before acquire")` or a failed `PageResult` instead of panicking the spawned task. Under normal operation nothing changes; under shutdown-race or adversarial runtime state, the caller sees one failed entry in the batch instead of losing the task silently to the runtime's panic handler. Surfaced by the 2026-04-16 workspace audit.
|
||||
|
||||
---
|
||||
|
||||
## [0.3.14] — 2026-04-16
|
||||
|
||||
### Security
|
||||
|
|
|
|||
12
Cargo.lock
generated
12
Cargo.lock
generated
|
|
@ -3102,7 +3102,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-cli"
|
||||
version = "0.3.14"
|
||||
version = "0.3.15"
|
||||
dependencies = [
|
||||
"clap",
|
||||
"dotenvy",
|
||||
|
|
@ -3123,7 +3123,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-core"
|
||||
version = "0.3.14"
|
||||
version = "0.3.15"
|
||||
dependencies = [
|
||||
"ego-tree",
|
||||
"once_cell",
|
||||
|
|
@ -3141,7 +3141,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-fetch"
|
||||
version = "0.3.14"
|
||||
version = "0.3.15"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"calamine",
|
||||
|
|
@ -3163,7 +3163,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-llm"
|
||||
version = "0.3.14"
|
||||
version = "0.3.15"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"reqwest",
|
||||
|
|
@ -3176,7 +3176,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-mcp"
|
||||
version = "0.3.14"
|
||||
version = "0.3.15"
|
||||
dependencies = [
|
||||
"dirs",
|
||||
"dotenvy",
|
||||
|
|
@ -3197,7 +3197,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-pdf"
|
||||
version = "0.3.14"
|
||||
version = "0.3.15"
|
||||
dependencies = [
|
||||
"pdf-extract",
|
||||
"thiserror",
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ resolver = "2"
|
|||
members = ["crates/*"]
|
||||
|
||||
[workspace.package]
|
||||
version = "0.3.14"
|
||||
version = "0.3.15"
|
||||
edition = "2024"
|
||||
license = "AGPL-3.0"
|
||||
repository = "https://github.com/0xMassi/webclaw"
|
||||
|
|
|
|||
|
|
@ -390,8 +390,14 @@ impl FetchClient {
|
|||
let url = url.to_string();
|
||||
|
||||
handles.push(tokio::spawn(async move {
|
||||
let _permit = permit.acquire().await.expect("semaphore closed");
|
||||
let result = client.fetch(&url).await;
|
||||
// Don't panic if the semaphore has been closed under us
|
||||
// (adversarial runtime state or shutdown race). Surface a
|
||||
// typed error instead so the caller sees one failed URL in
|
||||
// the batch instead of a silently-dropped task.
|
||||
let result = match permit.acquire().await {
|
||||
Ok(_permit) => client.fetch(&url).await,
|
||||
Err(_) => Err(FetchError::Build("semaphore closed before acquire".into())),
|
||||
};
|
||||
(idx, BatchResult { url, result })
|
||||
}));
|
||||
}
|
||||
|
|
@ -430,8 +436,10 @@ impl FetchClient {
|
|||
let opts = options.clone();
|
||||
|
||||
handles.push(tokio::spawn(async move {
|
||||
let _permit = permit.acquire().await.expect("semaphore closed");
|
||||
let result = client.fetch_and_extract_with_options(&url, &opts).await;
|
||||
let result = match permit.acquire().await {
|
||||
Ok(_permit) => client.fetch_and_extract_with_options(&url, &opts).await,
|
||||
Err(_) => Err(FetchError::Build("semaphore closed before acquire".into())),
|
||||
};
|
||||
(idx, BatchExtractResult { url, result })
|
||||
}));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -294,12 +294,27 @@ impl Crawler {
|
|||
let delay = self.config.delay;
|
||||
|
||||
handles.push(tokio::spawn(async move {
|
||||
// Acquire permit -- blocks if concurrency limit reached
|
||||
let _permit = permit.acquire().await.expect("semaphore closed");
|
||||
tokio::time::sleep(delay).await;
|
||||
|
||||
// Acquire permit -- blocks if concurrency limit reached.
|
||||
// Surface semaphore-closed as a failed PageResult rather
|
||||
// than panicking the spawned task and silently dropping
|
||||
// it from the batch.
|
||||
let page_start = Instant::now();
|
||||
let result = client.fetch_and_extract(&url).await;
|
||||
let result = match permit.acquire().await {
|
||||
Ok(_permit) => {
|
||||
tokio::time::sleep(delay).await;
|
||||
client.fetch_and_extract(&url).await
|
||||
}
|
||||
Err(_) => {
|
||||
warn!(url = %url, depth, "semaphore closed before acquire");
|
||||
return PageResult {
|
||||
url,
|
||||
depth,
|
||||
extraction: None,
|
||||
error: Some("semaphore closed before acquire".into()),
|
||||
elapsed: page_start.elapsed(),
|
||||
};
|
||||
}
|
||||
};
|
||||
let elapsed = page_start.elapsed();
|
||||
|
||||
match result {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue