diff --git a/CHANGELOG.md b/CHANGELOG.md index c338a3b..1f0477d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,13 @@ All notable changes to webclaw are documented here. Format follows [Keep a Changelog](https://keepachangelog.com/). +## [0.3.14] — 2026-04-16 + +### Security +- **`--on-change` command injection closed (P0).** The `--on-change` flag on `webclaw watch` and its multi-URL variant used to pipe the whole user-supplied string through `sh -c`. Anyone (or any LLM driving the MCP surface, or any config file parsed on the user's behalf) that could influence the flag value could execute arbitrary shell. The command is now tokenized with `shlex` and executed directly via `Command::new(prog).args(args)`, so metacharacters like `;`, `&&`, `|`, `$()`, `<(...)`, and env expansion no longer fire. A `WEBCLAW_ALLOW_SHELL=1` escape hatch is available for users who genuinely need pipelines; it logs a warning on every invocation so it can't slip in silently. Surfaced by the 2026-04-16 workspace audit. + +--- + ## [0.3.13] — 2026-04-10 ### Fixed diff --git a/Cargo.lock b/Cargo.lock index 6470d59..2d67588 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3102,7 +3102,7 @@ dependencies = [ [[package]] name = "webclaw-cli" -version = "0.3.13" +version = "0.3.14" dependencies = [ "clap", "dotenvy", @@ -3110,6 +3110,7 @@ dependencies = [ "regex", "reqwest", "serde_json", + "shlex", "tokio", "tracing", "tracing-subscriber", @@ -3122,7 +3123,7 @@ dependencies = [ [[package]] name = "webclaw-core" -version = "0.3.13" +version = "0.3.14" dependencies = [ "ego-tree", "once_cell", @@ -3140,7 +3141,7 @@ dependencies = [ [[package]] name = "webclaw-fetch" -version = "0.3.13" +version = "0.3.14" dependencies = [ "bytes", "calamine", @@ -3162,7 +3163,7 @@ dependencies = [ [[package]] name = "webclaw-llm" -version = "0.3.13" +version = "0.3.14" dependencies = [ "async-trait", "reqwest", @@ -3175,7 +3176,7 @@ dependencies = [ [[package]] name = "webclaw-mcp" -version = "0.3.13" +version = "0.3.14" dependencies = [ "dirs", "dotenvy", @@ -3196,7 +3197,7 @@ dependencies = [ [[package]] name = "webclaw-pdf" -version = "0.3.13" +version = "0.3.14" dependencies = [ "pdf-extract", "thiserror", diff --git a/Cargo.toml b/Cargo.toml index 89a8aff..4c2061c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ resolver = "2" members = ["crates/*"] [workspace.package] -version = "0.3.13" +version = "0.3.14" edition = "2024" license = "AGPL-3.0" repository = "https://github.com/0xMassi/webclaw" diff --git a/crates/webclaw-cli/Cargo.toml b/crates/webclaw-cli/Cargo.toml index 189e9cf..adce50f 100644 --- a/crates/webclaw-cli/Cargo.toml +++ b/crates/webclaw-cli/Cargo.toml @@ -24,3 +24,4 @@ tracing-subscriber = { workspace = true } reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] } regex = "1" url = "2" +shlex = "1" diff --git a/crates/webclaw-cli/src/main.rs b/crates/webclaw-cli/src/main.rs index d5af713..e520d4f 100644 --- a/crates/webclaw-cli/src/main.rs +++ b/crates/webclaw-cli/src/main.rs @@ -1456,6 +1456,59 @@ fn timestamp() -> String { format!("{hours:02}:{minutes:02}:{seconds:02}") } +/// Spawn the `--on-change` command with `payload` on stdin. +/// +/// Previously this passed the entire user-provided string to `sh -c`, which +/// made `--on-change 'notify "$URL"; rm -rf /'` a plausible disaster the +/// moment an untrusted config file or MCP-driven agent fed us a command. +/// The MCP surface specifically is prompt-injection-exposed: an LLM that +/// controls CLI args can escalate into arbitrary shell on the host. +/// +/// We now parse the command with `shlex` (POSIX-ish tokenization with proper +/// quoting) and exec the program directly without an intermediate shell, so +/// metacharacters like `;`, `&&`, `|`, `$()`, and env expansion can't fire. +/// Users who genuinely need a pipeline can set the whole chain behind a +/// script they've written, or opt in per-call via `WEBCLAW_ALLOW_SHELL=1` +/// (documented escape hatch, noisy by design). +async fn spawn_on_change(cmd: &str, stdin_payload: &[u8]) { + eprintln!("[watch] Running: {cmd}"); + + let allow_shell = std::env::var("WEBCLAW_ALLOW_SHELL") + .map(|v| v == "1" || v.eq_ignore_ascii_case("true")) + .unwrap_or(false); + + let mut command = if allow_shell { + eprintln!("[watch] WEBCLAW_ALLOW_SHELL=1 — executing via sh -c (unsafe)"); + let mut c = tokio::process::Command::new("sh"); + c.arg("-c").arg(cmd); + c + } else { + let Some(argv) = shlex::split(cmd) else { + eprintln!("[watch] Failed to parse --on-change command (unbalanced quotes?)"); + return; + }; + let Some((program, args)) = argv.split_first() else { + eprintln!("[watch] --on-change command is empty"); + return; + }; + let mut c = tokio::process::Command::new(program); + c.args(args); + c + }; + + command.stdin(std::process::Stdio::piped()); + + match command.spawn() { + Ok(mut child) => { + if let Some(mut stdin) = child.stdin.take() { + use tokio::io::AsyncWriteExt; + let _ = stdin.write_all(stdin_payload).await; + } + } + Err(e) => eprintln!("[watch] Failed to run command: {e}"), + } +} + /// Fire a webhook POST with a JSON payload. Non-blocking — errors logged to stderr. /// Auto-detects Discord and Slack webhook URLs and wraps the payload accordingly. fn fire_webhook(url: &str, payload: &serde_json::Value) { @@ -1587,21 +1640,7 @@ async fn run_watch_single( if let Some(ref cmd) = cli.on_change { let diff_json = serde_json::to_string(&diff).unwrap_or_default(); - eprintln!("[watch] Running: {cmd}"); - match tokio::process::Command::new("sh") - .arg("-c") - .arg(cmd) - .stdin(std::process::Stdio::piped()) - .spawn() - { - Ok(mut child) => { - if let Some(mut stdin) = child.stdin.take() { - use tokio::io::AsyncWriteExt; - let _ = stdin.write_all(diff_json.as_bytes()).await; - } - } - Err(e) => eprintln!("[watch] Failed to run command: {e}"), - } + spawn_on_change(cmd, diff_json.as_bytes()).await; } if let Some(ref webhook_url) = cli.webhook { @@ -1745,21 +1784,7 @@ async fn run_watch_multi( "changes": changed, }); let payload_json = serde_json::to_string(&payload).unwrap_or_default(); - eprintln!("[watch] Running: {cmd}"); - match tokio::process::Command::new("sh") - .arg("-c") - .arg(cmd) - .stdin(std::process::Stdio::piped()) - .spawn() - { - Ok(mut child) => { - if let Some(mut stdin) = child.stdin.take() { - use tokio::io::AsyncWriteExt; - let _ = stdin.write_all(payload_json.as_bytes()).await; - } - } - Err(e) => eprintln!("[watch] Failed to run command: {e}"), - } + spawn_on_change(cmd, payload_json.as_bytes()).await; } // Fire webhook once with aggregate payload diff --git a/crates/webclaw-core/src/brand.rs b/crates/webclaw-core/src/brand.rs index 6b874e7..52eb1b7 100644 --- a/crates/webclaw-core/src/brand.rs +++ b/crates/webclaw-core/src/brand.rs @@ -427,7 +427,7 @@ fn extract_colors(decls: &[CssDecl]) -> Vec { .collect(); // Sort by frequency (descending) - colors.sort_by(|a, b| b.count.cmp(&a.count)); + colors.sort_by_key(|c| std::cmp::Reverse(c.count)); // Promote top non-white/black to Primary/Secondary if they're still Unknown let mut assigned_primary = colors.iter().any(|c| c.usage == ColorUsage::Primary); @@ -615,7 +615,7 @@ fn extract_fonts(decls: &[CssDecl]) -> Vec { } let mut fonts: Vec<(String, usize)> = freq.into_iter().collect(); - fonts.sort_by(|a, b| b.1.cmp(&a.1)); + fonts.sort_by_key(|f| std::cmp::Reverse(f.1)); fonts.into_iter().map(|(name, _)| name).collect() }