mirror of
https://github.com/0xMassi/webclaw.git
synced 2026-06-08 22:25:12 +02:00
feat: add --cookie-file support for JSON cookie files
- --cookie-file reads Chrome extension format ([{name, value, domain, ...}])
- Works with EditThisCookie, Cookie-Editor, and similar browser extensions
- Merges with --cookie when both provided
- MCP scrape tool now accepts cookies parameter
- Closes #7
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
44f23332cc
commit
da1d76c97a
4 changed files with 71 additions and 10 deletions
12
Cargo.lock
generated
12
Cargo.lock
generated
|
|
@ -3072,7 +3072,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-cli"
|
||||
version = "0.3.0"
|
||||
version = "0.3.1"
|
||||
dependencies = [
|
||||
"clap",
|
||||
"dotenvy",
|
||||
|
|
@ -3092,7 +3092,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-core"
|
||||
version = "0.3.0"
|
||||
version = "0.3.1"
|
||||
dependencies = [
|
||||
"ego-tree",
|
||||
"once_cell",
|
||||
|
|
@ -3110,7 +3110,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-fetch"
|
||||
version = "0.3.0"
|
||||
version = "0.3.1"
|
||||
dependencies = [
|
||||
"calamine",
|
||||
"http",
|
||||
|
|
@ -3148,7 +3148,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-llm"
|
||||
version = "0.3.0"
|
||||
version = "0.3.1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"reqwest 0.12.28",
|
||||
|
|
@ -3161,7 +3161,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-mcp"
|
||||
version = "0.3.0"
|
||||
version = "0.3.1"
|
||||
dependencies = [
|
||||
"dotenvy",
|
||||
"reqwest 0.12.28",
|
||||
|
|
@ -3181,7 +3181,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-pdf"
|
||||
version = "0.3.0"
|
||||
version = "0.3.1"
|
||||
dependencies = [
|
||||
"pdf-extract",
|
||||
"thiserror",
|
||||
|
|
|
|||
|
|
@ -151,6 +151,10 @@ struct Cli {
|
|||
#[arg(long)]
|
||||
cookie: Option<String>,
|
||||
|
||||
/// JSON cookie file (Chrome extension format: [{name, value, domain, ...}])
|
||||
#[arg(long)]
|
||||
cookie_file: Option<String>,
|
||||
|
||||
/// Enable verbose logging
|
||||
#[arg(short, long)]
|
||||
verbose: bool,
|
||||
|
|
@ -371,6 +375,24 @@ fn build_fetch_config(cli: &Cli) -> FetchConfig {
|
|||
headers.insert("Cookie".to_string(), cookie.clone());
|
||||
}
|
||||
|
||||
// --cookie-file: parse JSON array of {name, value, domain, ...}
|
||||
if let Some(ref path) = cli.cookie_file {
|
||||
match parse_cookie_file(path) {
|
||||
Ok(cookie_str) => {
|
||||
// Merge with existing cookies if --cookie was also provided
|
||||
if let Some(existing) = headers.get("Cookie") {
|
||||
headers.insert("Cookie".to_string(), format!("{existing}; {cookie_str}"));
|
||||
} else {
|
||||
headers.insert("Cookie".to_string(), cookie_str);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("error: failed to parse cookie file: {e}");
|
||||
process::exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FetchConfig {
|
||||
browser: cli.browser.clone().into(),
|
||||
proxy,
|
||||
|
|
@ -382,6 +404,29 @@ fn build_fetch_config(cli: &Cli) -> FetchConfig {
|
|||
}
|
||||
}
|
||||
|
||||
/// Parse a JSON cookie file (Chrome extension format) into a Cookie header string.
|
||||
/// Supports: [{name, value, domain, path, secure, httpOnly, expirationDate, ...}]
|
||||
fn parse_cookie_file(path: &str) -> Result<String, String> {
|
||||
let content = std::fs::read_to_string(path).map_err(|e| format!("cannot read {path}: {e}"))?;
|
||||
let cookies: Vec<serde_json::Value> =
|
||||
serde_json::from_str(&content).map_err(|e| format!("invalid JSON: {e}"))?;
|
||||
|
||||
let pairs: Vec<String> = cookies
|
||||
.iter()
|
||||
.filter_map(|c| {
|
||||
let name = c.get("name")?.as_str()?;
|
||||
let value = c.get("value")?.as_str()?;
|
||||
Some(format!("{name}={value}"))
|
||||
})
|
||||
.collect();
|
||||
|
||||
if pairs.is_empty() {
|
||||
return Err("no cookies found in file".to_string());
|
||||
}
|
||||
|
||||
Ok(pairs.join("; "))
|
||||
}
|
||||
|
||||
fn build_extraction_options(cli: &Cli) -> ExtractionOptions {
|
||||
ExtractionOptions {
|
||||
include_selectors: cli
|
||||
|
|
|
|||
|
|
@ -139,19 +139,33 @@ impl WebclawMcp {
|
|||
let exclude = params.exclude_selectors.unwrap_or_default();
|
||||
let main_only = params.only_main_content.unwrap_or(false);
|
||||
|
||||
// Use a custom client if a non-default browser is requested
|
||||
// Build cookie header from params
|
||||
let cookie_header = params
|
||||
.cookies
|
||||
.as_ref()
|
||||
.filter(|c| !c.is_empty())
|
||||
.map(|c| c.join("; "));
|
||||
|
||||
// Use a custom client if non-default browser or cookies are provided
|
||||
let is_default_browser = matches!(browser, webclaw_fetch::BrowserProfile::Chrome);
|
||||
let needs_custom = !is_default_browser || cookie_header.is_some();
|
||||
let custom_client;
|
||||
let client: &webclaw_fetch::FetchClient = if is_default_browser {
|
||||
&self.fetch_client
|
||||
} else {
|
||||
let client: &webclaw_fetch::FetchClient = if needs_custom {
|
||||
let mut headers = std::collections::HashMap::new();
|
||||
headers.insert("Accept-Language".to_string(), "en-US,en;q=0.9".to_string());
|
||||
if let Some(ref cookies) = cookie_header {
|
||||
headers.insert("Cookie".to_string(), cookies.clone());
|
||||
}
|
||||
let config = webclaw_fetch::FetchConfig {
|
||||
browser,
|
||||
headers,
|
||||
..Default::default()
|
||||
};
|
||||
custom_client = webclaw_fetch::FetchClient::new(config)
|
||||
.map_err(|e| format!("Failed to build client: {e}"))?;
|
||||
&custom_client
|
||||
} else {
|
||||
&self.fetch_client
|
||||
};
|
||||
|
||||
let formats = [format];
|
||||
|
|
|
|||
|
|
@ -18,6 +18,8 @@ pub struct ScrapeParams {
|
|||
pub only_main_content: Option<bool>,
|
||||
/// Browser profile: "chrome" (default), "firefox", or "random"
|
||||
pub browser: Option<String>,
|
||||
/// Cookies to send with the request (e.g. ["name=value", "session=abc123"])
|
||||
pub cookies: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, JsonSchema)]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue