mirror of
https://github.com/0xMassi/webclaw.git
synced 2026-07-01 03:59:37 +02:00
feat(search): standalone web search via Serper.dev (bring-your-own-key)
Rescued from the stale perf/audit-fixes branch and ported cleanly onto current main. OSS surfaces can now search without the hosted webclaw API when the caller supplies their own Serper.dev key (free at serper.dev). - webclaw-fetch::search() — calls Serper.dev directly (plain wreq client; a JSON API needs no fingerprinting) and, with scrape=true, fetches + extracts the top result pages concurrently (bounded) via the caller's FetchClient. parse_serper_organic() is pure and unit-tested. - MCP `search` tool: local-first — uses SERPER_API_KEY when set, else falls back to the hosted webclaw API. Adds country/lang/scrape params. - OSS REST server: POST /v1/search, gated on SERPER_API_KEY (501 when unset, with a setup hint). Adds ApiError::NotImplemented. - CLI: `webclaw search <query> [--serper-key|SERPER_API_KEY] [--num] [--country] [--lang] [--scrape] [--format]`. No new dependencies (reuses futures-util already in the tree). Original work by the prior author on perf/audit-fixes; this re-applies only the search slice onto main. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
0c6f323f51
commit
06f151c560
10 changed files with 622 additions and 7 deletions
|
|
@ -668,13 +668,55 @@ impl WebclawMcp {
|
|||
))
|
||||
}
|
||||
|
||||
/// Search the web for a query and return structured results. Requires WEBCLAW_API_KEY.
|
||||
/// Search the web for a query and return structured results.
|
||||
///
|
||||
/// Resolves the backend in priority order:
|
||||
/// 1. `SERPER_API_KEY` set → local Serper.dev search with the user's
|
||||
/// own key (no hosted API needed). Supports `country`, `lang`, and
|
||||
/// `scrape` (fetch + extract each result page).
|
||||
/// 2. else `WEBCLAW_API_KEY` set → the hosted webclaw search API.
|
||||
/// 3. else → an error explaining both options.
|
||||
#[tool]
|
||||
async fn search(&self, Parameters(params): Parameters<SearchParams>) -> Result<String, String> {
|
||||
let cloud = self
|
||||
.cloud
|
||||
.as_ref()
|
||||
.ok_or("Search requires WEBCLAW_API_KEY. Get a key at https://webclaw.io")?;
|
||||
// Local path: user's own Serper key. Preferred when present so the
|
||||
// tool works without the hosted API and without spending credits.
|
||||
if let Ok(serper_key) = std::env::var("SERPER_API_KEY")
|
||||
&& !serper_key.trim().is_empty()
|
||||
{
|
||||
let opts = webclaw_fetch::SearchOptions {
|
||||
num_results: params.num_results.unwrap_or(5) as usize,
|
||||
country: params.country.clone(),
|
||||
lang: params.lang.clone(),
|
||||
scrape: params.scrape.unwrap_or(false),
|
||||
};
|
||||
let results = webclaw_fetch::search(
|
||||
self.fetch_client.as_ref(),
|
||||
&serper_key,
|
||||
¶ms.query,
|
||||
&opts,
|
||||
)
|
||||
.await
|
||||
.map_err(|e| format!("search error: {e}"))?;
|
||||
|
||||
let mut output = format!("Found {} results:\n\n", results.len());
|
||||
for r in &results {
|
||||
output.push_str(&format!("{}. {}\n {}\n", r.position, r.title, r.link));
|
||||
if !r.snippet.is_empty() {
|
||||
output.push_str(&format!(" {}\n", r.snippet));
|
||||
}
|
||||
if let Some(ref content) = r.content {
|
||||
output.push_str(&format!("\n{content}\n"));
|
||||
}
|
||||
output.push('\n');
|
||||
}
|
||||
return Ok(output);
|
||||
}
|
||||
|
||||
// Hosted path: the webclaw cloud API.
|
||||
let cloud = self.cloud.as_ref().ok_or(
|
||||
"Search requires a search backend: set SERPER_API_KEY for local search \
|
||||
(get one free at serper.dev), or WEBCLAW_API_KEY for the hosted API.",
|
||||
)?;
|
||||
|
||||
let mut body = json!({ "query": params.query });
|
||||
if let Some(num) = params.num_results {
|
||||
|
|
|
|||
|
|
@ -160,9 +160,18 @@ pub struct ResearchParams {
|
|||
pub struct SearchParams {
|
||||
/// Search query
|
||||
pub query: String,
|
||||
/// Number of results to return (default: 10)
|
||||
/// Number of results to return (default: 5, max: 10)
|
||||
#[serde(default, deserialize_with = "deser_opt_u32_or_str")]
|
||||
pub num_results: Option<u32>,
|
||||
/// Country code for localization (e.g. "us", "gb", "it").
|
||||
/// Only used by the local Serper path (SERPER_API_KEY).
|
||||
pub country: Option<String>,
|
||||
/// Language code for localization (e.g. "en", "it").
|
||||
/// Only used by the local Serper path (SERPER_API_KEY).
|
||||
pub lang: Option<String>,
|
||||
/// When true, fetch + extract each result page and include its
|
||||
/// markdown. Only used by the local Serper path (SERPER_API_KEY).
|
||||
pub scrape: Option<bool>,
|
||||
}
|
||||
|
||||
/// Parameters for `vertical_scrape`: run a site-specific extractor by name.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue