feat: v0.1.3 — crawl streaming, resume/cancel, MCP proxy support

Crawl:
- Real-time progress on stderr as pages complete
- --crawl-state saves progress on Ctrl+C, resumes from saved state
- Visited set + remaining frontier persisted for accurate resume

MCP server:
- Reads WEBCLAW_PROXY and WEBCLAW_PROXY_FILE env vars
- Falls back to proxies.txt in CWD (existing behavior)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Valerio 2026-03-25 21:38:28 +01:00
parent afe4d3077d
commit 0c91c6d5a9
7 changed files with 249 additions and 42 deletions

View file

@ -62,12 +62,20 @@ impl WebclawMcp {
pub async fn new() -> Self {
let mut config = webclaw_fetch::FetchConfig::default();
// Auto-load proxies.txt if present
if std::path::Path::new("proxies.txt").exists()
&& let Ok(pool) = webclaw_fetch::parse_proxy_file("proxies.txt")
// Load proxy config from env vars or local file
if let Ok(proxy) = std::env::var("WEBCLAW_PROXY") {
info!("using single proxy from WEBCLAW_PROXY");
config.proxy = Some(proxy);
}
let proxy_file = std::env::var("WEBCLAW_PROXY_FILE")
.ok()
.unwrap_or_else(|| "proxies.txt".to_string());
if std::path::Path::new(&proxy_file).exists()
&& let Ok(pool) = webclaw_fetch::parse_proxy_file(&proxy_file)
&& !pool.is_empty()
{
info!(count = pool.len(), "loaded proxy pool from proxies.txt");
info!(count = pool.len(), file = %proxy_file, "loaded proxy pool");
config.proxy_pool = pool;
}
@ -210,7 +218,7 @@ impl WebclawMcp {
let crawler = webclaw_fetch::Crawler::new(&params.url, config)
.map_err(|e| format!("Crawler init failed: {e}"))?;
let result = crawler.crawl(&params.url).await;
let result = crawler.crawl(&params.url, None).await;
let mut output = format!(
"Crawled {} pages ({} ok, {} errors) in {:.1}s\n\n",