From de899ab3ba1cf11ca126572590e1d279c9bd3287 Mon Sep 17 00:00:00 2001
From: webclaw <dev@webclaw.local>
Date: Sat, 6 Jun 2026 14:20:03 +0200
Subject: [PATCH] feat(search): standalone web search with your own Serper.dev
 key

OSS surfaces can now search without the hosted webclaw API. New
webclaw-fetch::search() calls Serper.dev directly with a user-supplied key
and optionally fetches + extracts the result pages. Wired into the CLI
(webclaw search, --serper-key / SERPER_API_KEY), the MCP search tool
(local-first when SERPER_API_KEY is set, cloud fallback otherwise), and the
OSS reference server (POST /v1/search). Adds futures for concurrent result
page scraping.
---
 Cargo.lock                                 |   1 +
 crates/webclaw-cli/src/cli.rs              |  37 +++
 crates/webclaw-cli/src/main.rs             |  36 ++-
 crates/webclaw-cli/src/run.rs              |  67 +++++
 crates/webclaw-fetch/Cargo.toml            |   3 +
 crates/webclaw-fetch/src/lib.rs            |   2 +
 crates/webclaw-fetch/src/search.rs         | 322 +++++++++++++++++++++
 crates/webclaw-mcp/src/server.rs           |  52 +++-
 crates/webclaw-mcp/src/tools.rs            |  11 +-
 crates/webclaw-server/src/error.rs         |  10 +
 crates/webclaw-server/src/main.rs          |  45 +++
 crates/webclaw-server/src/routes/mod.rs    |   6 +
 crates/webclaw-server/src/routes/search.rs |  68 +++++
 crates/webclaw-server/src/state.rs         |  18 ++
 14 files changed, 671 insertions(+), 7 deletions(-)
 create mode 100644 crates/webclaw-fetch/src/search.rs
 create mode 100644 crates/webclaw-server/src/routes/search.rs
diff --git a/Cargo.lock b/Cargo.lock
index 4acefe2..9ad7389 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3264,6 +3264,7 @@ dependencies = [
  "bytes",
  "calamine",
  "flate2",
+ "futures",
  "http",
  "quick-xml 0.37.5",
  "rand 0.8.5",
diff --git a/crates/webclaw-cli/src/cli.rs b/crates/webclaw-cli/src/cli.rs
index 1221cdf..0ce5204 100644
--- a/crates/webclaw-cli/src/cli.rs
+++ b/crates/webclaw-cli/src/cli.rs
@@ -271,6 +271,43 @@ pub enum Commands {
         #[arg(long)]
         raw: bool,
     },
+
+    /// Web search via Serper.dev using YOUR OWN API key.
+    ///
+    /// Returns Google organic results (title, link, snippet). With
+    /// `--scrape`, each result page is fetched and extracted to markdown.
+    /// Get a free key at serper.dev, then pass `--serper-key` or set
+    /// `SERPER_API_KEY`.
+    ///
+    /// Example: `webclaw search "rust async runtime" --num 5 --scrape`.
+    Search {
+        /// Search query.
+        query: String,
+
+        /// Serper.dev API key. Falls back to the `SERPER_API_KEY` env var.
+        #[arg(long, env = "SERPER_API_KEY")]
+        serper_key: Option<String>,
+
+        /// Number of results to return (1-10).
+        #[arg(long, default_value = "5")]
+        num: usize,
+
+        /// Country code for localization (e.g. "us", "gb", "it").
+        #[arg(long)]
+        country: Option<String>,
+
+        /// Language code for localization (e.g. "en", "it").
+        #[arg(long)]
+        lang: Option<String>,
+
+        /// Fetch + extract each result page and include its markdown.
+        #[arg(long)]
+        scrape: bool,
+
+        /// Output format: `markdown` (human-readable, default) or `json`.
+        #[arg(short, long, default_value = "markdown")]
+        format: OutputFormat,
+    },
 }
 
 #[derive(Clone, ValueEnum)]
diff --git a/crates/webclaw-cli/src/main.rs b/crates/webclaw-cli/src/main.rs
index 1a834e4..c93764d 100644
--- a/crates/webclaw-cli/src/main.rs
+++ b/crates/webclaw-cli/src/main.rs
@@ -21,7 +21,7 @@ use fetch::{
 use output::{format_output, print_cloud_output, print_output};
 use run::{
     has_llm_flags, run_batch, run_batch_llm, run_brand, run_crawl, run_diff, run_llm, run_map,
-    run_research, run_watch,
+    run_research, run_search, run_watch,
 };
 
 fn init_logging(verbose: bool) {
@@ -145,6 +145,40 @@ async fn main() {
                 }
                 return;
             }
+            Commands::Search {
+                query,
+                serper_key,
+                num,
+                country,
+                lang,
+                scrape,
+                format,
+            } => {
+                let key = match serper_key {
+                    Some(k) if !k.trim().is_empty() => k.clone(),
+                    _ => {
+                        eprintln!(
+                            "error: search requires a Serper.dev API key: pass --serper-key or set SERPER_API_KEY (get one free at serper.dev)"
+                        );
+                        process::exit(1);
+                    }
+                };
+                if let Err(e) = run_search(
+                    &key,
+                    query,
+                    *num,
+                    country.as_deref(),
+                    lang.as_deref(),
+                    *scrape,
+                    format,
+                )
+                .await
+                {
+                    eprintln!("error: {e}");
+                    process::exit(1);
+                }
+                return;
+            }
         }
     }
 
diff --git a/crates/webclaw-cli/src/run.rs b/crates/webclaw-cli/src/run.rs
index e5a0bf3..7657e5a 100644
--- a/crates/webclaw-cli/src/run.rs
+++ b/crates/webclaw-cli/src/run.rs
@@ -229,6 +229,73 @@ pub async fn run_map(cli: &Cli) -> Result<(), String> {
     Ok(())
 }
 
+/// Web search via Serper.dev with the caller's own API key.
+///
+/// The Serper key is resolved by the caller (flag or `SERPER_API_KEY`
+/// env, via clap's `env`) and passed in already-unwrapped. When `scrape`
+/// is set, each result page is fetched + extracted through a FetchClient
+/// (which carries the browser TLS profile) and its markdown is included.
+#[allow(clippy::too_many_arguments)]
+pub async fn run_search(
+    serper_key: &str,
+    query: &str,
+    num: usize,
+    country: Option<&str>,
+    lang: Option<&str>,
+    scrape: bool,
+    format: &OutputFormat,
+) -> Result<(), String> {
+    // Default fetch config is enough: search localization is handled by
+    // Serper's gl/hl, and the result-page scrape just needs a standard
+    // browser profile. Attach cloud fallback when WEBCLAW_API_KEY is set
+    // so scraped pages behind bot protection can still escalate.
+    let mut client = FetchClient::new(webclaw_fetch::FetchConfig::default())
+        .map_err(|e| format!("client error: {e}"))?;
+    if let Some(cloud) = webclaw_fetch::cloud::CloudClient::from_env() {
+        client = client.with_cloud(cloud);
+    }
+
+    let opts = webclaw_fetch::SearchOptions {
+        num_results: num,
+        country: country.map(str::to_string),
+        lang: lang.map(str::to_string),
+        scrape,
+    };
+
+    let results = webclaw_fetch::search(&client, serper_key, query, &opts)
+        .await
+        .map_err(|e| format!("search error: {e}"))?;
+
+    if matches!(format, OutputFormat::Json) {
+        let json = serde_json::json!({ "query": query, "results": results });
+        match serde_json::to_string_pretty(&json) {
+            Ok(s) => println!("{s}"),
+            Err(e) => return Err(format!("JSON encode failed: {e}")),
+        }
+        return Ok(());
+    }
+
+    if results.is_empty() {
+        eprintln!("no results for \"{query}\"");
+        return Ok(());
+    }
+
+    for r in &results {
+        println!("{}. {}", r.position, r.title);
+        println!("   {}", r.link);
+        if !r.snippet.is_empty() {
+            println!("   {}", r.snippet);
+        }
+        if let Some(ref content) = r.content {
+            println!();
+            println!("{content}");
+        }
+        println!();
+    }
+
+    Ok(())
+}
+
 pub async fn run_batch(cli: &Cli, entries: &[(String, Option<String>)]) -> Result<(), String> {
     let client = Arc::new(
         FetchClient::new(build_fetch_config(cli)).map_err(|e| format!("client error: {e}"))?,
diff --git a/crates/webclaw-fetch/Cargo.toml b/crates/webclaw-fetch/Cargo.toml
index dc2011f..abbf6f7 100644
--- a/crates/webclaw-fetch/Cargo.toml
+++ b/crates/webclaw-fetch/Cargo.toml
@@ -30,6 +30,9 @@ serde_json.workspace = true
 calamine = "0.34"
 zip = "2"
 flate2 = "1"
+# Already in the dependency tree (transitive); used directly here for
+# `join_all` to drive bounded-concurrent result-page scrapes in search.
+futures = { version = "0.3", default-features = false }
 
 [dev-dependencies]
 tempfile = "3"
diff --git a/crates/webclaw-fetch/src/lib.rs b/crates/webclaw-fetch/src/lib.rs
index 9fb702a..3f86ae3 100644
--- a/crates/webclaw-fetch/src/lib.rs
+++ b/crates/webclaw-fetch/src/lib.rs
@@ -14,6 +14,7 @@ pub mod locale;
 pub mod map;
 pub mod proxy;
 pub mod reddit;
+pub mod search;
 pub mod sitemap;
 pub mod tls;
 pub mod url_security;
@@ -27,5 +28,6 @@ pub use http::HeaderMap;
 pub use locale::{accept_language_for_tld, accept_language_for_url};
 pub use map::{MapOptions, discover_urls};
 pub use proxy::{parse_proxy_file, parse_proxy_line};
+pub use search::{SearchOptions, SearchResult, parse_serper_organic, search};
 pub use sitemap::SitemapEntry;
 pub use webclaw_pdf::PdfMode;
diff --git a/crates/webclaw-fetch/src/search.rs b/crates/webclaw-fetch/src/search.rs
new file mode 100644
index 0000000..1a03592
--- /dev/null
+++ b/crates/webclaw-fetch/src/search.rs
@@ -0,0 +1,322 @@
+//! Web search via Serper.dev (Google results) with optional content scraping.
+//!
+//! This is the self-hosted search path: the caller supplies their own
+//! Serper.dev API key (free tier at serper.dev). The CLI, MCP server, and
+//! OSS REST server all route through [`search`] so search works without the
+//! hosted webclaw API.
+//!
+//! Serper returns a plain JSON API, so we hit it with a vanilla wreq client
+//! (10s timeout) — no browser TLS fingerprinting needed. When `scrape` is
+//! set, the top results are fetched through the caller's [`FetchClient`]
+//! (which *does* carry the fingerprinting) and extracted to markdown.
+use std::sync::Arc;
+use std::time::Duration;
+
+use serde::{Deserialize, Serialize};
+use serde_json::{Value, json};
+use tokio::sync::Semaphore;
+use tracing::warn;
+
+use crate::client::FetchClient;
+use crate::error::FetchError;
+
+/// Serper.dev search endpoint.
+const SERPER_URL: &str = "https://google.serper.dev/search";
+
+/// Bound on the number of result pages scraped concurrently when
+/// `scrape` is enabled. Keeps the fan-out from overwhelming the proxy
+/// pool / remote hosts on a large result set.
+const SCRAPE_CONCURRENCY: usize = 5;
+
+/// Options controlling a search request.
+#[derive(Debug, Clone)]
+pub struct SearchOptions {
+    /// Number of organic results to request (clamped to `1..=10`).
+    pub num_results: usize,
+    /// Country code for localization (Serper `gl`, e.g. `"us"`, `"gb"`).
+    pub country: Option<String>,
+    /// Language code for localization (Serper `hl`, e.g. `"en"`, `"it"`).
+    pub lang: Option<String>,
+    /// When true, fetch + extract the result pages and fill in `content`.
+    pub scrape: bool,
+}
+
+impl Default for SearchOptions {
+    fn default() -> Self {
+        Self {
+            num_results: 5,
+            country: None,
+            lang: None,
+            scrape: false,
+        }
+    }
+}
+
+/// A single organic search result. When `scrape` was requested and the
+/// fetch succeeded, `content` holds the extracted markdown; otherwise it
+/// is `None` (a per-result fetch failure never fails the whole search).
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct SearchResult {
+    pub title: String,
+    pub link: String,
+    pub snippet: String,
+    pub position: usize,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub content: Option<String>,
+}
+
+/// Run a web search through Serper.dev.
+///
+/// `client`     — the caller's [`FetchClient`], used only when `opts.scrape`
+///                is set (to fetch + extract the result pages).
+/// `serper_key` — the caller's Serper.dev API key.
+/// `query`      — the search query.
+/// `opts`       — result count, localization, and whether to scrape.
+///
+/// Returns the organic results in Serper's order. With `scrape` enabled,
+/// the top results are fetched concurrently (bounded) and their extracted
+/// markdown is attached to `content`.
+pub async fn search(
+    client: &FetchClient,
+    serper_key: &str,
+    query: &str,
+    opts: &SearchOptions,
+) -> Result<Vec<SearchResult>, FetchError> {
+    let num = opts.num_results.clamp(1, 10);
+
+    let response = call_serper(
+        serper_key,
+        query,
+        num,
+        opts.country.as_deref(),
+        opts.lang.as_deref(),
+    )
+    .await?;
+
+    let mut results = parse_serper_organic(&response);
+
+    if opts.scrape && !results.is_empty() {
+        scrape_results(client, &mut results).await;
+    }
+
+    Ok(results)
+}
+
+/// POST the query to Serper.dev and return the raw JSON response.
+///
+/// Builds a plain wreq client (no browser emulation — Serper is a JSON
+/// API, not a bot-protected page). Non-2xx responses are surfaced as a
+/// [`FetchError::Build`] carrying the status and body so the caller can
+/// show Serper's own error (bad key, quota exceeded, etc.).
+async fn call_serper(
+    api_key: &str,
+    query: &str,
+    num: usize,
+    country: Option<&str>,
+    lang: Option<&str>,
+) -> Result<Value, FetchError> {
+    let http = wreq::Client::builder()
+        .timeout(Duration::from_secs(10))
+        .build()
+        .map_err(|e| FetchError::Build(format!("failed to build serper client: {e}")))?;
+
+    let mut body = json!({ "q": query, "num": num });
+    if let Some(gl) = country {
+        body["gl"] = json!(gl);
+    }
+    if let Some(hl) = lang {
+        body["hl"] = json!(hl);
+    }
+    // Serialize ourselves rather than `.json()` — the wreq `json` feature
+    // is not enabled in this crate and isn't worth pulling in for one call.
+    let payload = serde_json::to_vec(&body)
+        .map_err(|e| FetchError::Build(format!("serper request encode error: {e}")))?;
+
+    let resp = http
+        .post(SERPER_URL)
+        .header("X-API-KEY", api_key)
+        .header("Content-Type", "application/json")
+        .body(payload)
+        .send()
+        .await?;
+
+    let status = resp.status();
+    if !status.is_success() {
+        let code = status.as_u16();
+        let text = resp.text().await.unwrap_or_default();
+        return Err(FetchError::Build(format!("serper returned {code}: {text}")));
+    }
+
+    let text = resp
+        .text()
+        .await
+        .map_err(|e| FetchError::BodyDecode(format!("serper response read error: {e}")))?;
+    serde_json::from_str::<Value>(&text)
+        .map_err(|e| FetchError::BodyDecode(format!("serper response parse error: {e}")))
+}
+
+/// Parse the `organic` array of a Serper response into [`SearchResult`]s.
+///
+/// Pure (no network), so it is unit-tested against a fixture. Entries
+/// missing `title` or `link` are skipped; `snippet` defaults to empty.
+/// `position` is 1-based over the kept entries.
+pub fn parse_serper_organic(response: &Value) -> Vec<SearchResult> {
+    let Some(organic) = response.get("organic").and_then(|v| v.as_array()) else {
+        return Vec::new();
+    };
+
+    organic
+        .iter()
+        .filter_map(|item| {
+            let title = item.get("title")?.as_str()?.to_string();
+            let link = item.get("link")?.as_str()?.to_string();
+            let snippet = item
+                .get("snippet")
+                .and_then(|v| v.as_str())
+                .unwrap_or("")
+                .to_string();
+            Some(SearchResult {
+                title,
+                link,
+                snippet,
+                // Filled in after collection so it tracks kept entries,
+                // not the raw array index (which may include skips).
+                position: 0,
+                content: None,
+            })
+        })
+        .enumerate()
+        .map(|(i, mut r)| {
+            r.position = i + 1;
+            r
+        })
+        .collect()
+}
+
+/// Fetch + extract the result pages and attach markdown to `content`.
+///
+/// Bounded by [`SCRAPE_CONCURRENCY`]. A per-result fetch or extraction
+/// failure leaves that result's `content` as `None` rather than failing
+/// the whole search.
+async fn scrape_results(client: &FetchClient, results: &mut [SearchResult]) {
+    let sem = Arc::new(Semaphore::new(SCRAPE_CONCURRENCY));
+
+    // Collect owned links first so the per-result futures don't borrow
+    // `results`. That keeps the future captures free of the slice's
+    // lifetime, which is what lets this compile inside the MCP `#[tool]`
+    // macro's stricter `Send`/lifetime bounds.
+    let links: Vec<String> = results.iter().map(|r| r.link.clone()).collect();
+
+    let scrapes = links.into_iter().map(|link| {
+        let sem = sem.clone();
+        async move {
+            // If the semaphore is closed (shutdown race), skip rather than panic.
+            let _permit = match sem.acquire().await {
+                Ok(p) => p,
+                Err(_) => return None,
+            };
+            match client.fetch(&link).await {
+                Ok(fetched) => match webclaw_core::extract(&fetched.html, Some(&fetched.url)) {
+                    Ok(extraction) => Some(extraction.content.markdown),
+                    Err(e) => {
+                        warn!(url = %link, error = %e, "search: extraction failed");
+                        None
+                    }
+                },
+                Err(e) => {
+                    warn!(url = %link, error = %e, "search: fetch failed");
+                    None
+                }
+            }
+        }
+    });
+
+    // `join_all` drives every scrape future concurrently and returns
+    // results in input order; the semaphore caps how many fetches run at
+    // once. Result set is tiny (≤10), so the all-at-once poll is fine.
+    let contents = futures::future::join_all(scrapes).await;
+    for (r, content) in results.iter_mut().zip(contents) {
+        r.content = content;
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn fixture() -> Value {
+        json!({
+            "searchParameters": { "q": "rust async", "type": "search" },
+            "organic": [
+                {
+                    "title": "Async Rust",
+                    "link": "https://example.com/async",
+                    "snippet": "Learn async in Rust.",
+                    "position": 1
+                },
+                {
+                    // snippet missing on purpose -> defaults to ""
+                    "title": "Tokio",
+                    "link": "https://tokio.rs"
+                },
+                {
+                    // no link -> skipped, must not shift positions of the rest
+                    "title": "No Link Here"
+                }
+            ]
+        })
+    }
+
+    #[test]
+    fn parses_organic_results() {
+        let results = parse_serper_organic(&fixture());
+        assert_eq!(results.len(), 2);
+
+        assert_eq!(results[0].title, "Async Rust");
+        assert_eq!(results[0].link, "https://example.com/async");
+        assert_eq!(results[0].snippet, "Learn async in Rust.");
+        assert_eq!(results[0].position, 1);
+        assert!(results[0].content.is_none());
+
+        // Missing snippet -> empty string, and position is 1-based over
+        // kept entries (the link-less entry is dropped, not counted).
+        assert_eq!(results[1].title, "Tokio");
+        assert_eq!(results[1].snippet, "");
+        assert_eq!(results[1].position, 2);
+    }
+
+    #[test]
+    fn missing_organic_key_yields_empty() {
+        assert!(parse_serper_organic(&json!({})).is_empty());
+        assert!(parse_serper_organic(&json!({ "organic": "not-an-array" })).is_empty());
+    }
+
+    #[test]
+    fn search_result_serializes_without_null_content() {
+        let r = SearchResult {
+            title: "T".into(),
+            link: "https://e.com".into(),
+            snippet: "s".into(),
+            position: 1,
+            content: None,
+        };
+        let v = serde_json::to_value(&r).unwrap();
+        assert!(v.get("content").is_none(), "None content should be skipped");
+
+        let r2 = SearchResult {
+            content: Some("# md".into()),
+            ..r
+        };
+        let v2 = serde_json::to_value(&r2).unwrap();
+        assert_eq!(v2.get("content").and_then(|c| c.as_str()), Some("# md"));
+    }
+
+    #[test]
+    fn default_options() {
+        let o = SearchOptions::default();
+        assert_eq!(o.num_results, 5);
+        assert!(!o.scrape);
+        assert!(o.country.is_none());
+        assert!(o.lang.is_none());
+    }
+}
diff --git a/crates/webclaw-mcp/src/server.rs b/crates/webclaw-mcp/src/server.rs
index 9a469aa..ed76920 100644
--- a/crates/webclaw-mcp/src/server.rs
+++ b/crates/webclaw-mcp/src/server.rs
@@ -650,13 +650,55 @@ impl WebclawMcp {
         ))
     }
 
-    /// Search the web for a query and return structured results. Requires WEBCLAW_API_KEY.
+    /// Search the web for a query and return structured results.
+    ///
+    /// Resolves the backend in priority order:
+    /// 1. `SERPER_API_KEY` set → local Serper.dev search with the user's
+    ///    own key (no hosted API needed). Supports `country`, `lang`, and
+    ///    `scrape` (fetch + extract each result page).
+    /// 2. else `WEBCLAW_API_KEY` set → the hosted webclaw search API.
+    /// 3. else → an error explaining both options.
     #[tool]
     async fn search(&self, Parameters(params): Parameters<SearchParams>) -> Result<String, String> {
-        let cloud = self
-            .cloud
-            .as_ref()
-            .ok_or("Search requires WEBCLAW_API_KEY. Get a key at https://webclaw.io")?;
+        // Local path: user's own Serper key. Preferred when present so the
+        // tool works without the hosted API and without spending credits.
+        if let Ok(serper_key) = std::env::var("SERPER_API_KEY") {
+            if !serper_key.trim().is_empty() {
+                let opts = webclaw_fetch::SearchOptions {
+                    num_results: params.num_results.unwrap_or(5) as usize,
+                    country: params.country.clone(),
+                    lang: params.lang.clone(),
+                    scrape: params.scrape.unwrap_or(false),
+                };
+                let results = webclaw_fetch::search(
+                    self.fetch_client.as_ref(),
+                    &serper_key,
+                    &params.query,
+                    &opts,
+                )
+                .await
+                .map_err(|e| format!("search error: {e}"))?;
+
+                let mut output = format!("Found {} results:\n\n", results.len());
+                for r in &results {
+                    output.push_str(&format!("{}. {}\n   {}\n", r.position, r.title, r.link));
+                    if !r.snippet.is_empty() {
+                        output.push_str(&format!("   {}\n", r.snippet));
+                    }
+                    if let Some(ref content) = r.content {
+                        output.push_str(&format!("\n{content}\n"));
+                    }
+                    output.push('\n');
+                }
+                return Ok(output);
+            }
+        }
+
+        // Hosted path: the webclaw cloud API.
+        let cloud = self.cloud.as_ref().ok_or(
+            "Search requires a search backend: set SERPER_API_KEY for local search \
+             (get one free at serper.dev), or WEBCLAW_API_KEY for the hosted API.",
+        )?;
 
         let mut body = json!({ "query": params.query });
         if let Some(num) = params.num_results {
diff --git a/crates/webclaw-mcp/src/tools.rs b/crates/webclaw-mcp/src/tools.rs
index 02bf534..a1d9446 100644
--- a/crates/webclaw-mcp/src/tools.rs
+++ b/crates/webclaw-mcp/src/tools.rs
@@ -100,8 +100,17 @@ pub struct ResearchParams {
 pub struct SearchParams {
     /// Search query
     pub query: String,
-    /// Number of results to return (default: 10)
+    /// Number of results to return (default: 5, max: 10)
     pub num_results: Option<u32>,
+    /// Country code for localization (e.g. "us", "gb", "it").
+    /// Only used by the local Serper path (SERPER_API_KEY).
+    pub country: Option<String>,
+    /// Language code for localization (e.g. "en", "it").
+    /// Only used by the local Serper path (SERPER_API_KEY).
+    pub lang: Option<String>,
+    /// When true, fetch + extract each result page and include its
+    /// markdown. Only used by the local Serper path (SERPER_API_KEY).
+    pub scrape: Option<bool>,
 }
 
 /// Parameters for `vertical_scrape`: run a site-specific extractor by name.
diff --git a/crates/webclaw-server/src/error.rs b/crates/webclaw-server/src/error.rs
index a63848f..95c858e 100644
--- a/crates/webclaw-server/src/error.rs
+++ b/crates/webclaw-server/src/error.rs
@@ -38,6 +38,9 @@ pub enum ApiError {
 
     #[error("internal: {0}")]
     Internal(String),
+
+    #[error("{0}")]
+    NotImplemented(String),
 }
 
 impl ApiError {
@@ -48,6 +51,12 @@ impl ApiError {
     pub fn internal(msg: impl Into<String>) -> Self {
         Self::Internal(msg.into())
     }
+    /// 501 — a capability the operator hasn't configured (e.g. search
+    /// without `SERPER_API_KEY`). Distinct from `BadRequest` (client's
+    /// fault) and `Internal` (our fault): it's a deployment-config gap.
+    pub fn not_implemented(msg: impl Into<String>) -> Self {
+        Self::NotImplemented(msg.into())
+    }
 
     fn status(&self) -> StatusCode {
         match self {
@@ -57,6 +66,7 @@ impl ApiError {
             Self::Fetch(_) => StatusCode::BAD_GATEWAY,
             Self::Extract(_) | Self::Llm(_) => StatusCode::UNPROCESSABLE_ENTITY,
             Self::Internal(_) => StatusCode::INTERNAL_SERVER_ERROR,
+            Self::NotImplemented(_) => StatusCode::NOT_IMPLEMENTED,
         }
     }
 }
diff --git a/crates/webclaw-server/src/main.rs b/crates/webclaw-server/src/main.rs
index 0053db5..8da3764 100644
--- a/crates/webclaw-server/src/main.rs
+++ b/crates/webclaw-server/src/main.rs
@@ -123,6 +123,7 @@ fn build_app(state: AppState) -> Router {
         )
         .route("/crawl", post(routes::crawl::crawl))
         .route("/map", post(routes::map::map))
+        .route("/search", post(routes::search::search))
         .route("/batch", post(routes::batch::batch))
         .route("/extract", post(routes::extract::extract))
         .route("/extractors", get(routes::structured::list_extractors))
@@ -289,4 +290,48 @@ mod tests {
             "expected unknown-format error, got {body:?}"
         );
     }
+
+    fn post_json(uri: &str, body: &str) -> Request<Body> {
+        Request::builder()
+            .method("POST")
+            .uri(uri)
+            .header("content-type", "application/json")
+            .body(Body::from(body.to_owned()))
+            .expect("request")
+    }
+
+    #[tokio::test]
+    async fn search_empty_query_is_bad_request() {
+        // The empty-query guard runs before the key check, so this is
+        // hermetic regardless of whether SERPER_API_KEY is set.
+        let app = app_with_key(None).await;
+        let resp = app
+            .oneshot(post_json("/v1/search", r#"{"query":"   "}"#))
+            .await
+            .expect("response");
+        assert_eq!(resp.status(), StatusCode::BAD_REQUEST);
+    }
+
+    #[tokio::test]
+    async fn search_without_serper_key_is_not_implemented() {
+        // Only meaningful when the operator hasn't configured a key.
+        // Skip if the test environment happens to set SERPER_API_KEY so
+        // we don't make a live Serper call from the test suite.
+        if std::env::var("SERPER_API_KEY").is_ok_and(|k| !k.trim().is_empty()) {
+            return;
+        }
+        let app = app_with_key(None).await;
+        let resp = app
+            .oneshot(post_json("/v1/search", r#"{"query":"rust"}"#))
+            .await
+            .expect("response");
+        assert_eq!(resp.status(), StatusCode::NOT_IMPLEMENTED);
+        let body = json_body(resp).await;
+        assert!(
+            body["error"]
+                .as_str()
+                .is_some_and(|e| e.contains("SERPER_API_KEY")),
+            "expected serper setup hint, got {body:?}"
+        );
+    }
 }
diff --git a/crates/webclaw-server/src/routes/mod.rs b/crates/webclaw-server/src/routes/mod.rs
index 01f1052..3ed2273 100644
--- a/crates/webclaw-server/src/routes/mod.rs
+++ b/crates/webclaw-server/src/routes/mod.rs
@@ -6,6 +6,11 @@
 //! (anti-bot bypass with stealth Chrome, JS rendering at scale,
 //! per-user auth, billing, async job queues, agent loops) are
 //! intentionally not implemented here. Use api.webclaw.io for those.
+//!
+//! `POST /v1/search` is supported when the operator supplies their own
+//! Serper.dev API key via the `SERPER_API_KEY` env var (free key at
+//! serper.dev). Without it, the route returns 501. This is the
+//! bring-your-own-key path — no hosted webclaw account required.
 
 pub mod batch;
 pub mod brand;
@@ -15,5 +20,6 @@ pub mod extract;
 pub mod health;
 pub mod map;
 pub mod scrape;
+pub mod search;
 pub mod structured;
 pub mod summarize;
diff --git a/crates/webclaw-server/src/routes/search.rs b/crates/webclaw-server/src/routes/search.rs
new file mode 100644
index 0000000..5bc480e
--- /dev/null
+++ b/crates/webclaw-server/src/routes/search.rs
@@ -0,0 +1,68 @@
+//! POST /v1/search — web search via Serper.dev using the operator's own key.
+//!
+//! Enabled only when the server is started with `SERPER_API_KEY` set
+//! (get a free key at serper.dev). Without it, this route returns 501 so
+//! self-hosters know the capability exists but isn't configured.
+//!
+//! With `scrape: true`, each result page is fetched + extracted to
+//! markdown via the shared [`webclaw_fetch::FetchClient`]. A per-result
+//! fetch failure leaves that result's `content` null; it never fails the
+//! whole search.
+
+use axum::{Json, extract::State};
+use serde::Deserialize;
+use serde_json::{Value, json};
+
+use crate::{error::ApiError, state::AppState};
+
+#[derive(Debug, Deserialize)]
+pub struct SearchRequest {
+    pub query: String,
+    /// Max results to return (default 5, clamped to 1..=10).
+    #[serde(default = "default_num_results")]
+    pub num_results: usize,
+    /// Country code for localization (e.g. "us", "gb", "it").
+    pub country: Option<String>,
+    /// Language code for localization (e.g. "en", "it").
+    pub lang: Option<String>,
+    /// When true, fetch + extract each result page and include its markdown.
+    #[serde(default)]
+    pub scrape: bool,
+}
+
+fn default_num_results() -> usize {
+    5
+}
+
+pub async fn search(
+    State(state): State<AppState>,
+    Json(req): Json<SearchRequest>,
+) -> Result<Json<Value>, ApiError> {
+    if req.query.trim().is_empty() {
+        return Err(ApiError::bad_request("`query` is required"));
+    }
+
+    let serper_key = state.serper_api_key().ok_or_else(|| {
+        ApiError::not_implemented(
+            "search is not configured: start the server with SERPER_API_KEY set \
+             (get a free key at serper.dev)",
+        )
+    })?;
+
+    let opts = webclaw_fetch::SearchOptions {
+        num_results: req.num_results,
+        country: req.country.clone(),
+        lang: req.lang.clone(),
+        scrape: req.scrape,
+    };
+
+    let results = webclaw_fetch::search(state.fetch(), serper_key, &req.query, &opts)
+        .await
+        .map_err(|e| ApiError::internal(format!("search failed: {e}")))?;
+
+    Ok(Json(json!({
+        "query": req.query,
+        "count": results.len(),
+        "results": results,
+    })))
+}
diff --git a/crates/webclaw-server/src/state.rs b/crates/webclaw-server/src/state.rs
index 9807a04..afa304c 100644
--- a/crates/webclaw-server/src/state.rs
+++ b/crates/webclaw-server/src/state.rs
@@ -47,6 +47,9 @@ struct Inner {
     pub llm_chain: Arc<ProviderChain>,
     /// Inbound bearer-auth token for this server's own `/v1/*` surface.
     pub api_key: Option<String>,
+    /// Operator's own Serper.dev API key, read from `SERPER_API_KEY`.
+    /// Enables `/v1/search`. Unset = `/v1/search` returns 501.
+    pub serper_api_key: Option<String>,
 }
 
 impl AppState {
@@ -82,12 +85,22 @@ impl AppState {
 
         let llm_chain = Arc::new(ProviderChain::default().await);
 
+        // Operator's own Serper.dev key enables /v1/search. Empty/unset
+        // leaves search returning 501 with a setup hint.
+        let serper_api_key = std::env::var("SERPER_API_KEY")
+            .ok()
+            .filter(|k| !k.trim().is_empty());
+        if serper_api_key.is_some() {
+            info!("search enabled — using SERPER_API_KEY for /v1/search");
+        }
+
         Ok(Self {
             inner: Arc::new(Inner {
                 fetch: Arc::new(fetch),
                 fetch_config: config,
                 llm_chain,
                 api_key: inbound_api_key,
+                serper_api_key,
             }),
         })
     }
@@ -112,6 +125,11 @@ impl AppState {
     pub fn api_key(&self) -> Option<&str> {
         self.inner.api_key.as_deref()
     }
+
+    /// Operator's Serper.dev key for `/v1/search`, if configured.
+    pub fn serper_api_key(&self) -> Option<&str> {
+        self.inner.serper_api_key.as_deref()
+    }
 }
 
 /// Resolve the outbound cloud key. Prefers `WEBCLAW_CLOUD_API_KEY`;