feat(search): standalone web search with your own Serper.dev key

OSS surfaces can now search without the hosted webclaw API. New
webclaw-fetch::search() calls Serper.dev directly with a user-supplied key
and optionally fetches + extracts the result pages. Wired into the CLI
(webclaw search, --serper-key / SERPER_API_KEY), the MCP search tool
(local-first when SERPER_API_KEY is set, cloud fallback otherwise), and the
OSS reference server (POST /v1/search). Adds futures for concurrent result
page scraping.
This commit is contained in:
webclaw 2026-06-06 14:20:03 +02:00
parent b7bd1155c6
commit de899ab3ba
14 changed files with 671 additions and 7 deletions

View file

@ -271,6 +271,43 @@ pub enum Commands {
#[arg(long)]
raw: bool,
},
/// Web search via Serper.dev using YOUR OWN API key.
///
/// Returns Google organic results (title, link, snippet). With
/// `--scrape`, each result page is fetched and extracted to markdown.
/// Get a free key at serper.dev, then pass `--serper-key` or set
/// `SERPER_API_KEY`.
///
/// Example: `webclaw search "rust async runtime" --num 5 --scrape`.
Search {
/// Search query.
query: String,
/// Serper.dev API key. Falls back to the `SERPER_API_KEY` env var.
#[arg(long, env = "SERPER_API_KEY")]
serper_key: Option<String>,
/// Number of results to return (1-10).
#[arg(long, default_value = "5")]
num: usize,
/// Country code for localization (e.g. "us", "gb", "it").
#[arg(long)]
country: Option<String>,
/// Language code for localization (e.g. "en", "it").
#[arg(long)]
lang: Option<String>,
/// Fetch + extract each result page and include its markdown.
#[arg(long)]
scrape: bool,
/// Output format: `markdown` (human-readable, default) or `json`.
#[arg(short, long, default_value = "markdown")]
format: OutputFormat,
},
}
#[derive(Clone, ValueEnum)]

View file

@ -21,7 +21,7 @@ use fetch::{
use output::{format_output, print_cloud_output, print_output};
use run::{
has_llm_flags, run_batch, run_batch_llm, run_brand, run_crawl, run_diff, run_llm, run_map,
run_research, run_watch,
run_research, run_search, run_watch,
};
fn init_logging(verbose: bool) {
@ -145,6 +145,40 @@ async fn main() {
}
return;
}
Commands::Search {
query,
serper_key,
num,
country,
lang,
scrape,
format,
} => {
let key = match serper_key {
Some(k) if !k.trim().is_empty() => k.clone(),
_ => {
eprintln!(
"error: search requires a Serper.dev API key: pass --serper-key or set SERPER_API_KEY (get one free at serper.dev)"
);
process::exit(1);
}
};
if let Err(e) = run_search(
&key,
query,
*num,
country.as_deref(),
lang.as_deref(),
*scrape,
format,
)
.await
{
eprintln!("error: {e}");
process::exit(1);
}
return;
}
}
}

View file

@ -229,6 +229,73 @@ pub async fn run_map(cli: &Cli) -> Result<(), String> {
Ok(())
}
/// Web search via Serper.dev with the caller's own API key.
///
/// The Serper key is resolved by the caller (flag or `SERPER_API_KEY`
/// env, via clap's `env`) and passed in already-unwrapped. When `scrape`
/// is set, each result page is fetched + extracted through a FetchClient
/// (which carries the browser TLS profile) and its markdown is included.
#[allow(clippy::too_many_arguments)]
pub async fn run_search(
serper_key: &str,
query: &str,
num: usize,
country: Option<&str>,
lang: Option<&str>,
scrape: bool,
format: &OutputFormat,
) -> Result<(), String> {
// Default fetch config is enough: search localization is handled by
// Serper's gl/hl, and the result-page scrape just needs a standard
// browser profile. Attach cloud fallback when WEBCLAW_API_KEY is set
// so scraped pages behind bot protection can still escalate.
let mut client = FetchClient::new(webclaw_fetch::FetchConfig::default())
.map_err(|e| format!("client error: {e}"))?;
if let Some(cloud) = webclaw_fetch::cloud::CloudClient::from_env() {
client = client.with_cloud(cloud);
}
let opts = webclaw_fetch::SearchOptions {
num_results: num,
country: country.map(str::to_string),
lang: lang.map(str::to_string),
scrape,
};
let results = webclaw_fetch::search(&client, serper_key, query, &opts)
.await
.map_err(|e| format!("search error: {e}"))?;
if matches!(format, OutputFormat::Json) {
let json = serde_json::json!({ "query": query, "results": results });
match serde_json::to_string_pretty(&json) {
Ok(s) => println!("{s}"),
Err(e) => return Err(format!("JSON encode failed: {e}")),
}
return Ok(());
}
if results.is_empty() {
eprintln!("no results for \"{query}\"");
return Ok(());
}
for r in &results {
println!("{}. {}", r.position, r.title);
println!(" {}", r.link);
if !r.snippet.is_empty() {
println!(" {}", r.snippet);
}
if let Some(ref content) = r.content {
println!();
println!("{content}");
}
println!();
}
Ok(())
}
pub async fn run_batch(cli: &Cli, entries: &[(String, Option<String>)]) -> Result<(), String> {
let client = Arc::new(
FetchClient::new(build_fetch_config(cli)).map_err(|e| format!("client error: {e}"))?,