mirror of
https://github.com/0xMassi/webclaw.git
synced 2026-04-25 00:06:21 +02:00
fix: MCP research saves to file, returns compact response
Research results saved to ~/.webclaw/research/ (report.md + full.json). MCP returns file paths + findings instead of the full report, preventing "exceeds maximum allowed tokens" errors in Claude/Cursor. Same query returns cached result instantly without spending credits. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
f7cc0cc5cf
commit
1d2018c98e
5 changed files with 188 additions and 28 deletions
11
CHANGELOG.md
11
CHANGELOG.md
|
|
@ -3,6 +3,17 @@
|
|||
All notable changes to webclaw are documented here.
|
||||
Format follows [Keep a Changelog](https://keepachangelog.com/).
|
||||
|
||||
## [0.3.8] — 2026-04-03
|
||||
|
||||
### Fixed
|
||||
- **MCP research token overflow**: research results are now saved to `~/.webclaw/research/` and the MCP tool returns file paths + findings instead of the full report. Prevents "exceeds maximum allowed tokens" errors in Claude/Cursor.
|
||||
- **Research caching**: same query returns cached result instantly without spending credits.
|
||||
- **Anthropic rate limit throttling**: 60s delay between LLM calls in research to stay under Tier 1 limits (50K input tokens/min).
|
||||
|
||||
### Added
|
||||
- **`dirs` dependency** for `~/.webclaw/research/` path resolution.
|
||||
|
||||
---
|
||||
## [0.3.7] — 2026-04-03
|
||||
|
||||
### Added
|
||||
|
|
|
|||
60
Cargo.lock
generated
60
Cargo.lock
generated
|
|
@ -655,6 +655,27 @@ dependencies = [
|
|||
"subtle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dirs"
|
||||
version = "6.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e"
|
||||
dependencies = [
|
||||
"dirs-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dirs-sys"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"option-ext",
|
||||
"redox_users",
|
||||
"windows-sys 0.61.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "displaydoc"
|
||||
version = "0.2.5"
|
||||
|
|
@ -1417,6 +1438,15 @@ dependencies = [
|
|||
"windows-link",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libredox"
|
||||
version = "0.1.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7ddbf48fd451246b1f8c2610bd3b4ac0cc6e149d89832867093ab69a17194f08"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "linux-raw-sys"
|
||||
version = "0.12.1"
|
||||
|
|
@ -1635,6 +1665,12 @@ dependencies = [
|
|||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "option-ext"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d"
|
||||
|
||||
[[package]]
|
||||
name = "parking_lot"
|
||||
version = "0.12.5"
|
||||
|
|
@ -1990,6 +2026,17 @@ dependencies = [
|
|||
"bitflags",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "redox_users"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac"
|
||||
dependencies = [
|
||||
"getrandom 0.2.17",
|
||||
"libredox",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ref-cast"
|
||||
version = "1.0.25"
|
||||
|
|
@ -3055,7 +3102,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-cli"
|
||||
version = "0.3.7"
|
||||
version = "0.3.8"
|
||||
dependencies = [
|
||||
"clap",
|
||||
"dotenvy",
|
||||
|
|
@ -3075,7 +3122,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-core"
|
||||
version = "0.3.7"
|
||||
version = "0.3.8"
|
||||
dependencies = [
|
||||
"ego-tree",
|
||||
"once_cell",
|
||||
|
|
@ -3093,7 +3140,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-fetch"
|
||||
version = "0.3.7"
|
||||
version = "0.3.8"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"calamine",
|
||||
|
|
@ -3115,7 +3162,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-llm"
|
||||
version = "0.3.7"
|
||||
version = "0.3.8"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"reqwest",
|
||||
|
|
@ -3128,8 +3175,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-mcp"
|
||||
version = "0.3.7"
|
||||
version = "0.3.8"
|
||||
dependencies = [
|
||||
"dirs",
|
||||
"dotenvy",
|
||||
"reqwest",
|
||||
"rmcp",
|
||||
|
|
@ -3148,7 +3196,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-pdf"
|
||||
version = "0.3.7"
|
||||
version = "0.3.8"
|
||||
dependencies = [
|
||||
"pdf-extract",
|
||||
"thiserror",
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ resolver = "2"
|
|||
members = ["crates/*"]
|
||||
|
||||
[workspace.package]
|
||||
version = "0.3.7"
|
||||
version = "0.3.8"
|
||||
edition = "2024"
|
||||
license = "AGPL-3.0"
|
||||
repository = "https://github.com/0xMassi/webclaw"
|
||||
|
|
|
|||
|
|
@ -24,3 +24,4 @@ tracing = { workspace = true }
|
|||
tracing-subscriber = { workspace = true }
|
||||
reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
|
||||
url = "2"
|
||||
dirs = "6.0.0"
|
||||
|
|
|
|||
|
|
@ -522,7 +522,8 @@ impl WebclawMcp {
|
|||
}
|
||||
|
||||
/// Run a deep research investigation on a topic or question. Requires WEBCLAW_API_KEY.
|
||||
/// Starts an async research job on the webclaw cloud API, then polls until complete.
|
||||
/// Saves full result to ~/.webclaw/research/ and returns the file path + key findings.
|
||||
/// Checks cache first — same query returns the cached result without spending credits.
|
||||
#[tool]
|
||||
async fn research(
|
||||
&self,
|
||||
|
|
@ -533,6 +534,15 @@ impl WebclawMcp {
|
|||
.as_ref()
|
||||
.ok_or("Research requires WEBCLAW_API_KEY. Get a key at https://webclaw.io")?;
|
||||
|
||||
let research_dir = research_dir();
|
||||
let slug = slugify(¶ms.query);
|
||||
|
||||
// Check cache first
|
||||
if let Some(cached) = load_cached_research(&research_dir, &slug) {
|
||||
info!(query = %params.query, "returning cached research");
|
||||
return Ok(cached);
|
||||
}
|
||||
|
||||
let mut body = json!({ "query": params.query });
|
||||
if let Some(deep) = params.deep {
|
||||
body["deep"] = json!(deep);
|
||||
|
|
@ -551,7 +561,7 @@ impl WebclawMcp {
|
|||
|
||||
info!(job_id = %job_id, "research job started, polling for completion");
|
||||
|
||||
// Poll until completed or failed, with a max iteration cap (~10 minutes)
|
||||
// Poll until completed or failed
|
||||
for poll in 0..RESEARCH_MAX_POLLS {
|
||||
tokio::time::sleep(Duration::from_secs(3)).await;
|
||||
|
||||
|
|
@ -563,32 +573,37 @@ impl WebclawMcp {
|
|||
|
||||
match status {
|
||||
"completed" => {
|
||||
// Return structured result: report + sources + findings
|
||||
let mut result = json!({
|
||||
"id": job_id,
|
||||
// Save full result to file
|
||||
let (report_path, json_path) =
|
||||
save_research(&research_dir, &slug, &status_resp);
|
||||
|
||||
// Build compact response: file paths + findings (no full report)
|
||||
let sources_count = status_resp
|
||||
.get("sources_count")
|
||||
.and_then(|v| v.as_i64())
|
||||
.unwrap_or(0);
|
||||
let findings_count = status_resp
|
||||
.get("findings_count")
|
||||
.and_then(|v| v.as_i64())
|
||||
.unwrap_or(0);
|
||||
|
||||
let mut response = json!({
|
||||
"status": "completed",
|
||||
"query": params.query,
|
||||
"report_file": report_path,
|
||||
"json_file": json_path,
|
||||
"sources_count": sources_count,
|
||||
"findings_count": findings_count,
|
||||
});
|
||||
|
||||
if let Some(report) = status_resp.get("report") {
|
||||
result["report"] = report.clone();
|
||||
if let Some(findings) = status_resp.get("findings") {
|
||||
response["findings"] = findings.clone();
|
||||
}
|
||||
if let Some(sources) = status_resp.get("sources") {
|
||||
result["sources"] = sources.clone();
|
||||
}
|
||||
if let Some(findings) = status_resp.get("findings") {
|
||||
result["findings"] = findings.clone();
|
||||
}
|
||||
if let Some(elapsed) = status_resp.get("elapsed_ms") {
|
||||
result["elapsed_ms"] = elapsed.clone();
|
||||
}
|
||||
if let Some(sc) = status_resp.get("sources_count") {
|
||||
result["sources_count"] = sc.clone();
|
||||
}
|
||||
if let Some(fc) = status_resp.get("findings_count") {
|
||||
result["findings_count"] = fc.clone();
|
||||
response["sources"] = sources.clone();
|
||||
}
|
||||
|
||||
return Ok(serde_json::to_string_pretty(&result).unwrap_or_default());
|
||||
return Ok(serde_json::to_string_pretty(&response).unwrap_or_default());
|
||||
}
|
||||
"failed" => {
|
||||
let error = status_resp
|
||||
|
|
@ -665,3 +680,88 @@ impl ServerHandler for WebclawMcp {
|
|||
))
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Research file helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn research_dir() -> std::path::PathBuf {
|
||||
let dir = dirs::home_dir()
|
||||
.unwrap_or_else(|| std::path::PathBuf::from("."))
|
||||
.join(".webclaw")
|
||||
.join("research");
|
||||
std::fs::create_dir_all(&dir).ok();
|
||||
dir
|
||||
}
|
||||
|
||||
fn slugify(query: &str) -> String {
|
||||
let s: String = query
|
||||
.chars()
|
||||
.map(|c| {
|
||||
if c.is_alphanumeric() || c == ' ' {
|
||||
c
|
||||
} else {
|
||||
' '
|
||||
}
|
||||
})
|
||||
.collect::<String>()
|
||||
.split_whitespace()
|
||||
.collect::<Vec<_>>()
|
||||
.join("-")
|
||||
.to_lowercase();
|
||||
if s.len() > 60 { s[..60].to_string() } else { s }
|
||||
}
|
||||
|
||||
/// Check for a cached research result. Returns the compact response if found.
|
||||
fn load_cached_research(dir: &std::path::Path, slug: &str) -> Option<String> {
|
||||
let json_path = dir.join(format!("{slug}.json"));
|
||||
let report_path = dir.join(format!("{slug}.md"));
|
||||
|
||||
if !json_path.exists() || !report_path.exists() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let json_str = std::fs::read_to_string(&json_path).ok()?;
|
||||
let data: serde_json::Value = serde_json::from_str(&json_str).ok()?;
|
||||
|
||||
// Build compact response from cache
|
||||
let mut response = json!({
|
||||
"status": "completed",
|
||||
"cached": true,
|
||||
"query": data.get("query").cloned().unwrap_or(json!("")),
|
||||
"report_file": report_path.to_string_lossy(),
|
||||
"json_file": json_path.to_string_lossy(),
|
||||
"sources_count": data.get("sources_count").cloned().unwrap_or(json!(0)),
|
||||
"findings_count": data.get("findings_count").cloned().unwrap_or(json!(0)),
|
||||
});
|
||||
|
||||
if let Some(findings) = data.get("findings") {
|
||||
response["findings"] = findings.clone();
|
||||
}
|
||||
if let Some(sources) = data.get("sources") {
|
||||
response["sources"] = sources.clone();
|
||||
}
|
||||
|
||||
Some(serde_json::to_string_pretty(&response).unwrap_or_default())
|
||||
}
|
||||
|
||||
/// Save research result to disk. Returns (report_path, json_path) as strings.
|
||||
fn save_research(dir: &std::path::Path, slug: &str, data: &serde_json::Value) -> (String, String) {
|
||||
let json_path = dir.join(format!("{slug}.json"));
|
||||
let report_path = dir.join(format!("{slug}.md"));
|
||||
|
||||
// Save full JSON
|
||||
if let Ok(json_str) = serde_json::to_string_pretty(data) {
|
||||
std::fs::write(&json_path, json_str).ok();
|
||||
}
|
||||
|
||||
// Save report as markdown
|
||||
if let Some(report) = data.get("report").and_then(|v| v.as_str()) {
|
||||
std::fs::write(&report_path, report).ok();
|
||||
}
|
||||
|
||||
(
|
||||
report_path.to_string_lossy().to_string(),
|
||||
json_path.to_string_lossy().to_string(),
|
||||
)
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue