fix: v0.1.1 — MCP identity, timeouts, exit codes, URL validation

Critical:
- MCP server identifies as "webclaw-mcp" instead of "rmcp"
- Research tool poll loop capped at 200 iterations (~10 min)

CLI:
- Non-zero exit codes on errors
- Text format strips markdown table syntax

MCP server:
- URL validation on all tools
- 60s cloud API timeout, 30s local fetch timeout
- Diff cloud fallback computes actual diff
- Batch capped at 100 URLs, crawl at 500 pages
- Graceful startup failure instead of panic

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Valerio 2026-03-24 17:25:05 +01:00
parent 09fa3f5fc9
commit ea9c783bc5
8 changed files with 194 additions and 34 deletions

View file

@ -3,6 +3,7 @@
/// When local fetch returns a challenge page, this module retries
/// via api.webclaw.io. Requires WEBCLAW_API_KEY to be set.
use std::collections::HashMap;
use std::time::Duration;
use serde_json::{Value, json};
use tracing::info;
@ -23,10 +24,11 @@ impl CloudClient {
if key.is_empty() {
return None;
}
Some(Self {
api_key: key,
http: reqwest::Client::new(),
})
let http = reqwest::Client::builder()
.timeout(Duration::from_secs(60))
.build()
.unwrap_or_default();
Some(Self { api_key: key, http })
}
/// Scrape a URL via the cloud API. Returns the response JSON.
@ -208,10 +210,10 @@ pub async fn smart_fetch(
only_main_content: bool,
formats: &[&str],
) -> Result<SmartFetchResult, String> {
// Step 1: Try local fetch
let fetch_result = client
.fetch(url)
// Step 1: Try local fetch (with timeout to avoid hanging on slow servers)
let fetch_result = tokio::time::timeout(Duration::from_secs(30), client.fetch(url))
.await
.map_err(|_| format!("Fetch timed out after 30s for {url}"))?
.map_err(|e| format!("Fetch failed: {e}"))?;
// Step 2: Check for bot protection