mirror of
https://github.com/0xMassi/webclaw.git
synced 2026-06-17 23:55:13 +02:00
feat(search): standalone web search via Serper.dev (bring-your-own-key)
Rescued from the stale perf/audit-fixes branch and ported cleanly onto current main. OSS surfaces can now search without the hosted webclaw API when the caller supplies their own Serper.dev key (free at serper.dev). - webclaw-fetch::search() — calls Serper.dev directly (plain wreq client; a JSON API needs no fingerprinting) and, with scrape=true, fetches + extracts the top result pages concurrently (bounded) via the caller's FetchClient. parse_serper_organic() is pure and unit-tested. - MCP `search` tool: local-first — uses SERPER_API_KEY when set, else falls back to the hosted webclaw API. Adds country/lang/scrape params. - OSS REST server: POST /v1/search, gated on SERPER_API_KEY (501 when unset, with a setup hint). Adds ApiError::NotImplemented. - CLI: `webclaw search <query> [--serper-key|SERPER_API_KEY] [--num] [--country] [--lang] [--scrape] [--format]`. No new dependencies (reuses futures-util already in the tree). Original work by the prior author on perf/audit-fixes; this re-applies only the search slice onto main. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
0c6f323f51
commit
06f151c560
10 changed files with 622 additions and 7 deletions
|
|
@ -410,6 +410,43 @@ enum Commands {
|
|||
#[arg(long)]
|
||||
raw: bool,
|
||||
},
|
||||
|
||||
/// Web search via Serper.dev using YOUR OWN API key.
|
||||
///
|
||||
/// Returns Google organic results (title, link, snippet). With
|
||||
/// `--scrape`, each result page is fetched and extracted to markdown.
|
||||
/// Get a free key at serper.dev, then pass `--serper-key` or set
|
||||
/// `SERPER_API_KEY`.
|
||||
///
|
||||
/// Example: `webclaw search "rust async runtime" --num 5 --scrape`.
|
||||
Search {
|
||||
/// Search query.
|
||||
query: String,
|
||||
|
||||
/// Serper.dev API key. Falls back to the `SERPER_API_KEY` env var.
|
||||
#[arg(long, env = "SERPER_API_KEY")]
|
||||
serper_key: Option<String>,
|
||||
|
||||
/// Number of results to return (1-10).
|
||||
#[arg(long, default_value = "5")]
|
||||
num: usize,
|
||||
|
||||
/// Country code for localization (e.g. "us", "gb", "it").
|
||||
#[arg(long)]
|
||||
country: Option<String>,
|
||||
|
||||
/// Language code for localization (e.g. "en", "it").
|
||||
#[arg(long)]
|
||||
lang: Option<String>,
|
||||
|
||||
/// Fetch + extract each result page and include its markdown.
|
||||
#[arg(long)]
|
||||
scrape: bool,
|
||||
|
||||
/// Output format: `markdown` (human-readable, default) or `json`.
|
||||
#[arg(short, long, default_value = "markdown")]
|
||||
format: OutputFormat,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Clone, ValueEnum)]
|
||||
|
|
@ -1573,6 +1610,73 @@ async fn run_crawl(cli: &Cli) -> Result<(), String> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Web search via Serper.dev with the caller's own API key.
|
||||
///
|
||||
/// The Serper key is resolved by the caller (flag or `SERPER_API_KEY`
|
||||
/// env, via clap's `env`) and passed in already-unwrapped. When `scrape`
|
||||
/// is set, each result page is fetched + extracted through a FetchClient
|
||||
/// (which carries the browser TLS profile) and its markdown is included.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn run_search(
|
||||
serper_key: &str,
|
||||
query: &str,
|
||||
num: usize,
|
||||
country: Option<&str>,
|
||||
lang: Option<&str>,
|
||||
scrape: bool,
|
||||
format: &OutputFormat,
|
||||
) -> Result<(), String> {
|
||||
// Default fetch config is enough: search localization is handled by
|
||||
// Serper's gl/hl, and the result-page scrape just needs a standard
|
||||
// browser profile. Attach cloud fallback when WEBCLAW_API_KEY is set
|
||||
// so scraped pages behind bot protection can still escalate.
|
||||
let mut client = webclaw_fetch::FetchClient::new(webclaw_fetch::FetchConfig::default())
|
||||
.map_err(|e| format!("client error: {e}"))?;
|
||||
if let Some(cloud) = webclaw_fetch::cloud::CloudClient::from_env() {
|
||||
client = client.with_cloud(cloud);
|
||||
}
|
||||
|
||||
let opts = webclaw_fetch::SearchOptions {
|
||||
num_results: num,
|
||||
country: country.map(str::to_string),
|
||||
lang: lang.map(str::to_string),
|
||||
scrape,
|
||||
};
|
||||
|
||||
let results = webclaw_fetch::search(&client, serper_key, query, &opts)
|
||||
.await
|
||||
.map_err(|e| format!("search error: {e}"))?;
|
||||
|
||||
if matches!(format, OutputFormat::Json) {
|
||||
let json = serde_json::json!({ "query": query, "results": results });
|
||||
match serde_json::to_string_pretty(&json) {
|
||||
Ok(s) => println!("{s}"),
|
||||
Err(e) => return Err(format!("JSON encode failed: {e}")),
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if results.is_empty() {
|
||||
eprintln!("no results for \"{query}\"");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
for r in &results {
|
||||
println!("{}. {}", r.position, r.title);
|
||||
println!(" {}", r.link);
|
||||
if !r.snippet.is_empty() {
|
||||
println!(" {}", r.snippet);
|
||||
}
|
||||
if let Some(ref content) = r.content {
|
||||
println!();
|
||||
println!("{content}");
|
||||
}
|
||||
println!();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn run_map(cli: &Cli) -> Result<(), String> {
|
||||
let url = cli
|
||||
.urls
|
||||
|
|
@ -2589,6 +2693,40 @@ async fn main() {
|
|||
}
|
||||
return;
|
||||
}
|
||||
Commands::Search {
|
||||
query,
|
||||
serper_key,
|
||||
num,
|
||||
country,
|
||||
lang,
|
||||
scrape,
|
||||
format,
|
||||
} => {
|
||||
let key = match serper_key {
|
||||
Some(k) if !k.trim().is_empty() => k.clone(),
|
||||
_ => {
|
||||
eprintln!(
|
||||
"error: search requires a Serper.dev API key: pass --serper-key or set SERPER_API_KEY (get one free at serper.dev)"
|
||||
);
|
||||
process::exit(1);
|
||||
}
|
||||
};
|
||||
if let Err(e) = run_search(
|
||||
&key,
|
||||
query,
|
||||
*num,
|
||||
country.as_deref(),
|
||||
lang.as_deref(),
|
||||
*scrape,
|
||||
format,
|
||||
)
|
||||
.await
|
||||
{
|
||||
eprintln!("error: {e}");
|
||||
process::exit(1);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue