fix: MCP research saves to file, returns compact response

Research results saved to ~/.webclaw/research/ (report.md + full.json).
MCP returns file paths + findings instead of the full report, preventing
"exceeds maximum allowed tokens" errors in Claude/Cursor.

Same query returns cached result instantly without spending credits.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Valerio 2026-04-03 16:05:45 +02:00
parent f7cc0cc5cf
commit 1d2018c98e
5 changed files with 188 additions and 28 deletions

View file

@ -3,6 +3,17 @@
All notable changes to webclaw are documented here.
Format follows [Keep a Changelog](https://keepachangelog.com/).
## [0.3.8] — 2026-04-03
### Fixed
- **MCP research token overflow**: research results are now saved to `~/.webclaw/research/` and the MCP tool returns file paths + findings instead of the full report. Prevents "exceeds maximum allowed tokens" errors in Claude/Cursor.
- **Research caching**: same query returns cached result instantly without spending credits.
- **Anthropic rate limit throttling**: 60s delay between LLM calls in research to stay under Tier 1 limits (50K input tokens/min).
### Added
- **`dirs` dependency** for `~/.webclaw/research/` path resolution.
---
## [0.3.7] — 2026-04-03
### Added

60
Cargo.lock generated
View file

@ -655,6 +655,27 @@ dependencies = [
"subtle",
]
[[package]]
name = "dirs"
version = "6.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e"
dependencies = [
"dirs-sys",
]
[[package]]
name = "dirs-sys"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab"
dependencies = [
"libc",
"option-ext",
"redox_users",
"windows-sys 0.61.2",
]
[[package]]
name = "displaydoc"
version = "0.2.5"
@ -1417,6 +1438,15 @@ dependencies = [
"windows-link",
]
[[package]]
name = "libredox"
version = "0.1.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ddbf48fd451246b1f8c2610bd3b4ac0cc6e149d89832867093ab69a17194f08"
dependencies = [
"libc",
]
[[package]]
name = "linux-raw-sys"
version = "0.12.1"
@ -1635,6 +1665,12 @@ dependencies = [
"syn",
]
[[package]]
name = "option-ext"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d"
[[package]]
name = "parking_lot"
version = "0.12.5"
@ -1990,6 +2026,17 @@ dependencies = [
"bitflags",
]
[[package]]
name = "redox_users"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac"
dependencies = [
"getrandom 0.2.17",
"libredox",
"thiserror",
]
[[package]]
name = "ref-cast"
version = "1.0.25"
@ -3055,7 +3102,7 @@ dependencies = [
[[package]]
name = "webclaw-cli"
version = "0.3.7"
version = "0.3.8"
dependencies = [
"clap",
"dotenvy",
@ -3075,7 +3122,7 @@ dependencies = [
[[package]]
name = "webclaw-core"
version = "0.3.7"
version = "0.3.8"
dependencies = [
"ego-tree",
"once_cell",
@ -3093,7 +3140,7 @@ dependencies = [
[[package]]
name = "webclaw-fetch"
version = "0.3.7"
version = "0.3.8"
dependencies = [
"bytes",
"calamine",
@ -3115,7 +3162,7 @@ dependencies = [
[[package]]
name = "webclaw-llm"
version = "0.3.7"
version = "0.3.8"
dependencies = [
"async-trait",
"reqwest",
@ -3128,8 +3175,9 @@ dependencies = [
[[package]]
name = "webclaw-mcp"
version = "0.3.7"
version = "0.3.8"
dependencies = [
"dirs",
"dotenvy",
"reqwest",
"rmcp",
@ -3148,7 +3196,7 @@ dependencies = [
[[package]]
name = "webclaw-pdf"
version = "0.3.7"
version = "0.3.8"
dependencies = [
"pdf-extract",
"thiserror",

View file

@ -3,7 +3,7 @@ resolver = "2"
members = ["crates/*"]
[workspace.package]
version = "0.3.7"
version = "0.3.8"
edition = "2024"
license = "AGPL-3.0"
repository = "https://github.com/0xMassi/webclaw"

View file

@ -24,3 +24,4 @@ tracing = { workspace = true }
tracing-subscriber = { workspace = true }
reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
url = "2"
dirs = "6.0.0"

View file

@ -522,7 +522,8 @@ impl WebclawMcp {
}
/// Run a deep research investigation on a topic or question. Requires WEBCLAW_API_KEY.
/// Starts an async research job on the webclaw cloud API, then polls until complete.
/// Saves full result to ~/.webclaw/research/ and returns the file path + key findings.
/// Checks cache first — same query returns the cached result without spending credits.
#[tool]
async fn research(
&self,
@ -533,6 +534,15 @@ impl WebclawMcp {
.as_ref()
.ok_or("Research requires WEBCLAW_API_KEY. Get a key at https://webclaw.io")?;
let research_dir = research_dir();
let slug = slugify(&params.query);
// Check cache first
if let Some(cached) = load_cached_research(&research_dir, &slug) {
info!(query = %params.query, "returning cached research");
return Ok(cached);
}
let mut body = json!({ "query": params.query });
if let Some(deep) = params.deep {
body["deep"] = json!(deep);
@ -551,7 +561,7 @@ impl WebclawMcp {
info!(job_id = %job_id, "research job started, polling for completion");
// Poll until completed or failed, with a max iteration cap (~10 minutes)
// Poll until completed or failed
for poll in 0..RESEARCH_MAX_POLLS {
tokio::time::sleep(Duration::from_secs(3)).await;
@ -563,32 +573,37 @@ impl WebclawMcp {
match status {
"completed" => {
// Return structured result: report + sources + findings
let mut result = json!({
"id": job_id,
// Save full result to file
let (report_path, json_path) =
save_research(&research_dir, &slug, &status_resp);
// Build compact response: file paths + findings (no full report)
let sources_count = status_resp
.get("sources_count")
.and_then(|v| v.as_i64())
.unwrap_or(0);
let findings_count = status_resp
.get("findings_count")
.and_then(|v| v.as_i64())
.unwrap_or(0);
let mut response = json!({
"status": "completed",
"query": params.query,
"report_file": report_path,
"json_file": json_path,
"sources_count": sources_count,
"findings_count": findings_count,
});
if let Some(report) = status_resp.get("report") {
result["report"] = report.clone();
if let Some(findings) = status_resp.get("findings") {
response["findings"] = findings.clone();
}
if let Some(sources) = status_resp.get("sources") {
result["sources"] = sources.clone();
}
if let Some(findings) = status_resp.get("findings") {
result["findings"] = findings.clone();
}
if let Some(elapsed) = status_resp.get("elapsed_ms") {
result["elapsed_ms"] = elapsed.clone();
}
if let Some(sc) = status_resp.get("sources_count") {
result["sources_count"] = sc.clone();
}
if let Some(fc) = status_resp.get("findings_count") {
result["findings_count"] = fc.clone();
response["sources"] = sources.clone();
}
return Ok(serde_json::to_string_pretty(&result).unwrap_or_default());
return Ok(serde_json::to_string_pretty(&response).unwrap_or_default());
}
"failed" => {
let error = status_resp
@ -665,3 +680,88 @@ impl ServerHandler for WebclawMcp {
))
}
}
// ---------------------------------------------------------------------------
// Research file helpers
// ---------------------------------------------------------------------------
fn research_dir() -> std::path::PathBuf {
let dir = dirs::home_dir()
.unwrap_or_else(|| std::path::PathBuf::from("."))
.join(".webclaw")
.join("research");
std::fs::create_dir_all(&dir).ok();
dir
}
fn slugify(query: &str) -> String {
let s: String = query
.chars()
.map(|c| {
if c.is_alphanumeric() || c == ' ' {
c
} else {
' '
}
})
.collect::<String>()
.split_whitespace()
.collect::<Vec<_>>()
.join("-")
.to_lowercase();
if s.len() > 60 { s[..60].to_string() } else { s }
}
/// Check for a cached research result. Returns the compact response if found.
fn load_cached_research(dir: &std::path::Path, slug: &str) -> Option<String> {
let json_path = dir.join(format!("{slug}.json"));
let report_path = dir.join(format!("{slug}.md"));
if !json_path.exists() || !report_path.exists() {
return None;
}
let json_str = std::fs::read_to_string(&json_path).ok()?;
let data: serde_json::Value = serde_json::from_str(&json_str).ok()?;
// Build compact response from cache
let mut response = json!({
"status": "completed",
"cached": true,
"query": data.get("query").cloned().unwrap_or(json!("")),
"report_file": report_path.to_string_lossy(),
"json_file": json_path.to_string_lossy(),
"sources_count": data.get("sources_count").cloned().unwrap_or(json!(0)),
"findings_count": data.get("findings_count").cloned().unwrap_or(json!(0)),
});
if let Some(findings) = data.get("findings") {
response["findings"] = findings.clone();
}
if let Some(sources) = data.get("sources") {
response["sources"] = sources.clone();
}
Some(serde_json::to_string_pretty(&response).unwrap_or_default())
}
/// Save research result to disk. Returns (report_path, json_path) as strings.
fn save_research(dir: &std::path::Path, slug: &str, data: &serde_json::Value) -> (String, String) {
let json_path = dir.join(format!("{slug}.json"));
let report_path = dir.join(format!("{slug}.md"));
// Save full JSON
if let Ok(json_str) = serde_json::to_string_pretty(data) {
std::fs::write(&json_path, json_str).ok();
}
// Save report as markdown
if let Some(report) = data.get("report").and_then(|v| v.as_str()) {
std::fs::write(&report_path, report).ok();
}
(
report_path.to_string_lossy().to_string(),
json_path.to_string_lossy().to_string(),
)
}