feat: add --cookie-file support for JSON cookie files

- --cookie-file reads Chrome extension format ([{name, value, domain, ...}])
- Works with EditThisCookie, Cookie-Editor, and similar browser extensions
- Merges with --cookie when both provided
- MCP scrape tool now accepts cookies parameter
- Closes #7

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Valerio 2026-03-31 10:54:53 +02:00
parent 44f23332cc
commit da1d76c97a
4 changed files with 71 additions and 10 deletions

12
Cargo.lock generated
View file

@ -3072,7 +3072,7 @@ dependencies = [
[[package]]
name = "webclaw-cli"
version = "0.3.0"
version = "0.3.1"
dependencies = [
"clap",
"dotenvy",
@ -3092,7 +3092,7 @@ dependencies = [
[[package]]
name = "webclaw-core"
version = "0.3.0"
version = "0.3.1"
dependencies = [
"ego-tree",
"once_cell",
@ -3110,7 +3110,7 @@ dependencies = [
[[package]]
name = "webclaw-fetch"
version = "0.3.0"
version = "0.3.1"
dependencies = [
"calamine",
"http",
@ -3148,7 +3148,7 @@ dependencies = [
[[package]]
name = "webclaw-llm"
version = "0.3.0"
version = "0.3.1"
dependencies = [
"async-trait",
"reqwest 0.12.28",
@ -3161,7 +3161,7 @@ dependencies = [
[[package]]
name = "webclaw-mcp"
version = "0.3.0"
version = "0.3.1"
dependencies = [
"dotenvy",
"reqwest 0.12.28",
@ -3181,7 +3181,7 @@ dependencies = [
[[package]]
name = "webclaw-pdf"
version = "0.3.0"
version = "0.3.1"
dependencies = [
"pdf-extract",
"thiserror",

View file

@ -151,6 +151,10 @@ struct Cli {
#[arg(long)]
cookie: Option<String>,
/// JSON cookie file (Chrome extension format: [{name, value, domain, ...}])
#[arg(long)]
cookie_file: Option<String>,
/// Enable verbose logging
#[arg(short, long)]
verbose: bool,
@ -371,6 +375,24 @@ fn build_fetch_config(cli: &Cli) -> FetchConfig {
headers.insert("Cookie".to_string(), cookie.clone());
}
// --cookie-file: parse JSON array of {name, value, domain, ...}
if let Some(ref path) = cli.cookie_file {
match parse_cookie_file(path) {
Ok(cookie_str) => {
// Merge with existing cookies if --cookie was also provided
if let Some(existing) = headers.get("Cookie") {
headers.insert("Cookie".to_string(), format!("{existing}; {cookie_str}"));
} else {
headers.insert("Cookie".to_string(), cookie_str);
}
}
Err(e) => {
eprintln!("error: failed to parse cookie file: {e}");
process::exit(1);
}
}
}
FetchConfig {
browser: cli.browser.clone().into(),
proxy,
@ -382,6 +404,29 @@ fn build_fetch_config(cli: &Cli) -> FetchConfig {
}
}
/// Parse a JSON cookie file (Chrome extension format) into a Cookie header string.
/// Supports: [{name, value, domain, path, secure, httpOnly, expirationDate, ...}]
fn parse_cookie_file(path: &str) -> Result<String, String> {
let content = std::fs::read_to_string(path).map_err(|e| format!("cannot read {path}: {e}"))?;
let cookies: Vec<serde_json::Value> =
serde_json::from_str(&content).map_err(|e| format!("invalid JSON: {e}"))?;
let pairs: Vec<String> = cookies
.iter()
.filter_map(|c| {
let name = c.get("name")?.as_str()?;
let value = c.get("value")?.as_str()?;
Some(format!("{name}={value}"))
})
.collect();
if pairs.is_empty() {
return Err("no cookies found in file".to_string());
}
Ok(pairs.join("; "))
}
fn build_extraction_options(cli: &Cli) -> ExtractionOptions {
ExtractionOptions {
include_selectors: cli

View file

@ -139,19 +139,33 @@ impl WebclawMcp {
let exclude = params.exclude_selectors.unwrap_or_default();
let main_only = params.only_main_content.unwrap_or(false);
// Use a custom client if a non-default browser is requested
// Build cookie header from params
let cookie_header = params
.cookies
.as_ref()
.filter(|c| !c.is_empty())
.map(|c| c.join("; "));
// Use a custom client if non-default browser or cookies are provided
let is_default_browser = matches!(browser, webclaw_fetch::BrowserProfile::Chrome);
let needs_custom = !is_default_browser || cookie_header.is_some();
let custom_client;
let client: &webclaw_fetch::FetchClient = if is_default_browser {
&self.fetch_client
} else {
let client: &webclaw_fetch::FetchClient = if needs_custom {
let mut headers = std::collections::HashMap::new();
headers.insert("Accept-Language".to_string(), "en-US,en;q=0.9".to_string());
if let Some(ref cookies) = cookie_header {
headers.insert("Cookie".to_string(), cookies.clone());
}
let config = webclaw_fetch::FetchConfig {
browser,
headers,
..Default::default()
};
custom_client = webclaw_fetch::FetchClient::new(config)
.map_err(|e| format!("Failed to build client: {e}"))?;
&custom_client
} else {
&self.fetch_client
};
let formats = [format];

View file

@ -18,6 +18,8 @@ pub struct ScrapeParams {
pub only_main_content: Option<bool>,
/// Browser profile: "chrome" (default), "firefox", or "random"
pub browser: Option<String>,
/// Cookies to send with the request (e.g. ["name=value", "session=abc123"])
pub cookies: Option<Vec<String>>,
}
#[derive(Debug, Deserialize, JsonSchema)]