From b5ee838d5f06ad86ffd6b33125a9579f21cc192f Mon Sep 17 00:00:00 2001 From: Charles Rossi Date: Mon, 15 Jun 2026 01:04:35 -0300 Subject: [PATCH 1/2] fix(tools): accept numeric params as JSON strings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MCP clients (Claude Desktop, VS Code Copilot, etc.) serialize numeric tool arguments as JSON strings ("3" instead of 3). serde's built-in u32/usize deserialisers reject these with: invalid type: string "N", expected u32 Add two private coercion helpers — `deser_opt_u32_or_str` and `deser_opt_usize_or_str` — that accept both JSON number and JSON string representations, falling back to `str::parse` for the string form and returning a clear custom error for non-numeric strings. Annotate the six affected optional fields: CrawlParams: depth (u32), max_pages (usize), concurrency (usize) BatchParams: concurrency (usize) SearchParams: num_results (u32) SummarizeParams: max_sentences (usize) Add 24 unit tests (4 per field: numeric string → value, native number → value, absent → None, non-numeric string → Err) verified green via an isolated serde-only crate. Fixes #58 --- crates/webclaw-mcp/src/tools.rs | 244 ++++++++++++++++++++++++++++++++ 1 file changed, 244 insertions(+) diff --git a/crates/webclaw-mcp/src/tools.rs b/crates/webclaw-mcp/src/tools.rs index 02bf534..83bbba1 100644 --- a/crates/webclaw-mcp/src/tools.rs +++ b/crates/webclaw-mcp/src/tools.rs @@ -4,6 +4,63 @@ use schemars::JsonSchema; use serde::Deserialize; +// ── Coercion helpers ──────────────────────────────────────────────────────── +// +// MCP clients (Claude Desktop, VS Code extension, etc.) sometimes pass numeric +// parameters as JSON strings (e.g. `"depth": "3"` instead of `"depth": 3`). +// serde's default u32/usize deserialisers reject strings with: +// +// "invalid type: string \"3\", expected u32" +// +// These two helpers accept both forms transparently so callers never see that +// error regardless of which representation their client sends. + +fn deser_opt_u32_or_str<'de, D>(d: D) -> Result, D::Error> +where + D: serde::Deserializer<'de>, +{ + #[derive(serde::Deserialize)] + #[serde(untagged)] + enum NumOrStr { + Num(u32), + Str(String), + } + match Option::::deserialize(d)? { + None => Ok(None), + Some(NumOrStr::Num(n)) => Ok(Some(n)), + Some(NumOrStr::Str(s)) => s + .trim() + .parse::() + .map(Some) + .map_err(|_| serde::de::Error::custom(format!("expected a u32, got string \"{s}\""))), + } +} + +fn deser_opt_usize_or_str<'de, D>(d: D) -> Result, D::Error> +where + D: serde::Deserializer<'de>, +{ + #[derive(serde::Deserialize)] + #[serde(untagged)] + enum NumOrStr { + Num(usize), + Str(String), + } + match Option::::deserialize(d)? { + None => Ok(None), + Some(NumOrStr::Num(n)) => Ok(Some(n)), + Some(NumOrStr::Str(s)) => s + .trim() + .parse::() + .map(Some) + .map_err(|_| { + serde::de::Error::custom(format!("expected a usize, got string \"{s}\"")) + }), + } +} + +// ── Parameter structs ─────────────────────────────────────────────────────── + #[derive(Debug, Deserialize, JsonSchema)] pub struct ScrapeParams { /// URL to scrape @@ -27,10 +84,13 @@ pub struct CrawlParams { /// Seed URL to start crawling from pub url: String, /// Maximum link depth to follow (default: 2) + #[serde(default, deserialize_with = "deser_opt_u32_or_str")] pub depth: Option, /// Maximum number of pages to crawl (default: 50) + #[serde(default, deserialize_with = "deser_opt_usize_or_str")] pub max_pages: Option, /// Number of concurrent requests (default: 5) + #[serde(default, deserialize_with = "deser_opt_usize_or_str")] pub concurrency: Option, /// Seed the frontier from sitemap discovery before crawling pub use_sitemap: Option, @@ -51,6 +111,7 @@ pub struct BatchParams { /// Output format: "markdown" (default), "llm", "text" pub format: Option, /// Number of concurrent requests (default: 5) + #[serde(default, deserialize_with = "deser_opt_usize_or_str")] pub concurrency: Option, } @@ -69,6 +130,7 @@ pub struct SummarizeParams { /// URL to fetch and summarize pub url: String, /// Number of sentences in the summary (default: 3) + #[serde(default, deserialize_with = "deser_opt_usize_or_str")] pub max_sentences: Option, } @@ -101,6 +163,7 @@ pub struct SearchParams { /// Search query pub query: String, /// Number of results to return (default: 10) + #[serde(default, deserialize_with = "deser_opt_u32_or_str")] pub num_results: Option, } @@ -120,3 +183,184 @@ pub struct VerticalParams { /// so rmcp can generate a schema and parse the (empty) JSON-RPC params. #[derive(Debug, Deserialize, JsonSchema)] pub struct ListExtractorsParams {} + +#[cfg(test)] +mod tests { + use super::*; + + // ── CrawlParams.depth (u32) ────────────────────────────────────────────── + + #[test] + fn crawl_depth_from_numeric_string() { + let v: CrawlParams = + serde_json::from_str(r#"{"url":"https://x.com","depth":"3"}"#).unwrap(); + assert_eq!(v.depth, Some(3)); + } + + #[test] + fn crawl_depth_from_number() { + let v: CrawlParams = + serde_json::from_str(r#"{"url":"https://x.com","depth":3}"#).unwrap(); + assert_eq!(v.depth, Some(3)); + } + + #[test] + fn crawl_depth_absent_is_none() { + let v: CrawlParams = serde_json::from_str(r#"{"url":"https://x.com"}"#).unwrap(); + assert_eq!(v.depth, None); + } + + #[test] + fn crawl_depth_non_numeric_string_errors() { + let e = serde_json::from_str::(r#"{"url":"https://x.com","depth":"abc"}"#); + assert!(e.is_err(), "expected Err, got {e:?}"); + } + + // ── CrawlParams.max_pages (usize) ──────────────────────────────────────── + + #[test] + fn crawl_max_pages_from_numeric_string() { + let v: CrawlParams = + serde_json::from_str(r#"{"url":"https://x.com","max_pages":"50"}"#).unwrap(); + assert_eq!(v.max_pages, Some(50)); + } + + #[test] + fn crawl_max_pages_from_number() { + let v: CrawlParams = + serde_json::from_str(r#"{"url":"https://x.com","max_pages":50}"#).unwrap(); + assert_eq!(v.max_pages, Some(50)); + } + + #[test] + fn crawl_max_pages_absent_is_none() { + let v: CrawlParams = serde_json::from_str(r#"{"url":"https://x.com"}"#).unwrap(); + assert_eq!(v.max_pages, None); + } + + #[test] + fn crawl_max_pages_non_numeric_string_errors() { + let e = + serde_json::from_str::(r#"{"url":"https://x.com","max_pages":"abc"}"#); + assert!(e.is_err(), "expected Err, got {e:?}"); + } + + // ── CrawlParams.concurrency (usize) ────────────────────────────────────── + + #[test] + fn crawl_concurrency_from_numeric_string() { + let v: CrawlParams = + serde_json::from_str(r#"{"url":"https://x.com","concurrency":"5"}"#).unwrap(); + assert_eq!(v.concurrency, Some(5)); + } + + #[test] + fn crawl_concurrency_from_number() { + let v: CrawlParams = + serde_json::from_str(r#"{"url":"https://x.com","concurrency":5}"#).unwrap(); + assert_eq!(v.concurrency, Some(5)); + } + + #[test] + fn crawl_concurrency_absent_is_none() { + let v: CrawlParams = serde_json::from_str(r#"{"url":"https://x.com"}"#).unwrap(); + assert_eq!(v.concurrency, None); + } + + #[test] + fn crawl_concurrency_non_numeric_string_errors() { + let e = serde_json::from_str::( + r#"{"url":"https://x.com","concurrency":"abc"}"#, + ); + assert!(e.is_err(), "expected Err, got {e:?}"); + } + + // ── BatchParams.concurrency (usize) ────────────────────────────────────── + + #[test] + fn batch_concurrency_from_numeric_string() { + let v: BatchParams = + serde_json::from_str(r#"{"urls":["https://x.com"],"concurrency":"5"}"#).unwrap(); + assert_eq!(v.concurrency, Some(5)); + } + + #[test] + fn batch_concurrency_from_number() { + let v: BatchParams = + serde_json::from_str(r#"{"urls":["https://x.com"],"concurrency":5}"#).unwrap(); + assert_eq!(v.concurrency, Some(5)); + } + + #[test] + fn batch_concurrency_absent_is_none() { + let v: BatchParams = serde_json::from_str(r#"{"urls":["https://x.com"]}"#).unwrap(); + assert_eq!(v.concurrency, None); + } + + #[test] + fn batch_concurrency_non_numeric_string_errors() { + let e = serde_json::from_str::( + r#"{"urls":["https://x.com"],"concurrency":"abc"}"#, + ); + assert!(e.is_err(), "expected Err, got {e:?}"); + } + + // ── SearchParams.num_results (u32) ─────────────────────────────────────── + + #[test] + fn search_num_results_from_numeric_string() { + let v: SearchParams = + serde_json::from_str(r#"{"query":"rust","num_results":"10"}"#).unwrap(); + assert_eq!(v.num_results, Some(10)); + } + + #[test] + fn search_num_results_from_number() { + let v: SearchParams = + serde_json::from_str(r#"{"query":"rust","num_results":10}"#).unwrap(); + assert_eq!(v.num_results, Some(10)); + } + + #[test] + fn search_num_results_absent_is_none() { + let v: SearchParams = serde_json::from_str(r#"{"query":"rust"}"#).unwrap(); + assert_eq!(v.num_results, None); + } + + #[test] + fn search_num_results_non_numeric_string_errors() { + let e = + serde_json::from_str::(r#"{"query":"rust","num_results":"abc"}"#); + assert!(e.is_err(), "expected Err, got {e:?}"); + } + + // ── SummarizeParams.max_sentences (usize) ──────────────────────────────── + + #[test] + fn summarize_max_sentences_from_numeric_string() { + let v: SummarizeParams = + serde_json::from_str(r#"{"url":"https://x.com","max_sentences":"3"}"#).unwrap(); + assert_eq!(v.max_sentences, Some(3)); + } + + #[test] + fn summarize_max_sentences_from_number() { + let v: SummarizeParams = + serde_json::from_str(r#"{"url":"https://x.com","max_sentences":3}"#).unwrap(); + assert_eq!(v.max_sentences, Some(3)); + } + + #[test] + fn summarize_max_sentences_absent_is_none() { + let v: SummarizeParams = serde_json::from_str(r#"{"url":"https://x.com"}"#).unwrap(); + assert_eq!(v.max_sentences, None); + } + + #[test] + fn summarize_max_sentences_non_numeric_string_errors() { + let e = serde_json::from_str::( + r#"{"url":"https://x.com","max_sentences":"abc"}"#, + ); + assert!(e.is_err(), "expected Err, got {e:?}"); + } +} From 24ae3a7af218e5009806d438a8bcfadf3fce68f5 Mon Sep 17 00:00:00 2001 From: Valerio Date: Mon, 15 Jun 2026 11:25:55 +0200 Subject: [PATCH 2/2] style(mcp): apply rustfmt to numeric param coercion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reformat the string-or-number deserialize helpers and tests to satisfy `cargo fmt --check` (style_edition 2024), which the lint CI job enforces. Formatting only — no behavior change. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/webclaw-mcp/src/tools.rs | 37 +++++++++++++-------------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/crates/webclaw-mcp/src/tools.rs b/crates/webclaw-mcp/src/tools.rs index 83bbba1..e4dc310 100644 --- a/crates/webclaw-mcp/src/tools.rs +++ b/crates/webclaw-mcp/src/tools.rs @@ -28,11 +28,11 @@ where match Option::::deserialize(d)? { None => Ok(None), Some(NumOrStr::Num(n)) => Ok(Some(n)), - Some(NumOrStr::Str(s)) => s - .trim() - .parse::() - .map(Some) - .map_err(|_| serde::de::Error::custom(format!("expected a u32, got string \"{s}\""))), + Some(NumOrStr::Str(s)) => { + s.trim().parse::().map(Some).map_err(|_| { + serde::de::Error::custom(format!("expected a u32, got string \"{s}\"")) + }) + } } } @@ -49,13 +49,11 @@ where match Option::::deserialize(d)? { None => Ok(None), Some(NumOrStr::Num(n)) => Ok(Some(n)), - Some(NumOrStr::Str(s)) => s - .trim() - .parse::() - .map(Some) - .map_err(|_| { + Some(NumOrStr::Str(s)) => { + s.trim().parse::().map(Some).map_err(|_| { serde::de::Error::custom(format!("expected a usize, got string \"{s}\"")) - }), + }) + } } } @@ -199,8 +197,7 @@ mod tests { #[test] fn crawl_depth_from_number() { - let v: CrawlParams = - serde_json::from_str(r#"{"url":"https://x.com","depth":3}"#).unwrap(); + let v: CrawlParams = serde_json::from_str(r#"{"url":"https://x.com","depth":3}"#).unwrap(); assert_eq!(v.depth, Some(3)); } @@ -240,8 +237,7 @@ mod tests { #[test] fn crawl_max_pages_non_numeric_string_errors() { - let e = - serde_json::from_str::(r#"{"url":"https://x.com","max_pages":"abc"}"#); + let e = serde_json::from_str::(r#"{"url":"https://x.com","max_pages":"abc"}"#); assert!(e.is_err(), "expected Err, got {e:?}"); } @@ -269,9 +265,8 @@ mod tests { #[test] fn crawl_concurrency_non_numeric_string_errors() { - let e = serde_json::from_str::( - r#"{"url":"https://x.com","concurrency":"abc"}"#, - ); + let e = + serde_json::from_str::(r#"{"url":"https://x.com","concurrency":"abc"}"#); assert!(e.is_err(), "expected Err, got {e:?}"); } @@ -316,8 +311,7 @@ mod tests { #[test] fn search_num_results_from_number() { - let v: SearchParams = - serde_json::from_str(r#"{"query":"rust","num_results":10}"#).unwrap(); + let v: SearchParams = serde_json::from_str(r#"{"query":"rust","num_results":10}"#).unwrap(); assert_eq!(v.num_results, Some(10)); } @@ -329,8 +323,7 @@ mod tests { #[test] fn search_num_results_non_numeric_string_errors() { - let e = - serde_json::from_str::(r#"{"query":"rust","num_results":"abc"}"#); + let e = serde_json::from_str::(r#"{"query":"rust","num_results":"abc"}"#); assert!(e.is_err(), "expected Err, got {e:?}"); }