diff --git a/.gitignore b/.gitignore
index f97d040..7a5a785 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,15 @@
 target/
 .DS_Store
 .env
+.env.*
 proxies.txt
 .claude/skills/
-*.json
+# Scratch / local artifacts (previously covered by overbroad `*.json`,
+# which would have also swallowed package.json, components.json,
+# .smithery/*.json if they were ever modified).
+*.local.json
+local-test-results.json
+# CLI research command dumps JSON output keyed on the query; they're
+# not code and shouldn't live in git. Track deliberately-saved research
+# output under a different name.
+research-*.json
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 96ed417..5079bbc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,19 @@
 All notable changes to webclaw are documented here.
 Format follows [Keep a Changelog](https://keepachangelog.com/).
 
+## [0.3.16] — 2026-04-16
+
+### Hardened
+- **Response body caps across fetch + LLM providers (P2).** Every HTTP response buffered from the network is now rejected if it exceeds a hard size cap. `webclaw-fetch::Response::from_wreq` caps HTML/doc responses at 50 MB (before the allocation pays for anything and as a belt-and-braces check after `bytes().await`); `webclaw-llm` providers (anthropic / openai / ollama) cap JSON responses at 5 MB via a shared `response_json_capped` helper. Previously an adversarial or runaway upstream could push unbounded memory into the process. Closes the DoS-via-giant-body class of bugs noted in the audit.
+- **Crawler frontier cap (P2).** After each depth level the frontier is truncated to `max(max_pages × 10, 100)` entries, keeping the most recently discovered links. Dense pages (tag clouds, search results) used to push the frontier into the tens of thousands even after `max_pages` halted new fetches, keeping string allocations alive long after the crawl was effectively done.
+- **Glob pattern validation (P2).** User-supplied `include_patterns` / `exclude_patterns` passed to the crawler are now rejected if they contain more than 4 `**` wildcards or exceed 1024 chars. The backtracking matcher degrades exponentially on deeply-nested `**` against long paths; this keeps adversarial config files from weaponising it.
+
+### Cleanup
+- **Removed blanket `#![allow(dead_code)]` in `webclaw-cli/src/main.rs`.** No dead code surfaced; the suppression was obsolete.
+- **`.gitignore`: replaced overbroad `*.json` with specific local-artifact patterns.** The previous rule would have swallowed `package.json` / `components.json` / `.smithery/*.json` if they were ever modified.
+
+---
+
 ## [0.3.15] — 2026-04-16
 
 ### Fixed
diff --git a/Cargo.lock b/Cargo.lock
index 4bf0ec4..09bec62 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3102,7 +3102,7 @@ dependencies = [
 
 [[package]]
 name = "webclaw-cli"
-version = "0.3.15"
+version = "0.3.16"
 dependencies = [
  "clap",
  "dotenvy",
@@ -3123,7 +3123,7 @@ dependencies = [
 
 [[package]]
 name = "webclaw-core"
-version = "0.3.15"
+version = "0.3.16"
 dependencies = [
  "ego-tree",
  "once_cell",
@@ -3141,7 +3141,7 @@ dependencies = [
 
 [[package]]
 name = "webclaw-fetch"
-version = "0.3.15"
+version = "0.3.16"
 dependencies = [
  "bytes",
  "calamine",
@@ -3163,7 +3163,7 @@ dependencies = [
 
 [[package]]
 name = "webclaw-llm"
-version = "0.3.15"
+version = "0.3.16"
 dependencies = [
  "async-trait",
  "reqwest",
@@ -3176,7 +3176,7 @@ dependencies = [
 
 [[package]]
 name = "webclaw-mcp"
-version = "0.3.15"
+version = "0.3.16"
 dependencies = [
  "dirs",
  "dotenvy",
@@ -3197,7 +3197,7 @@ dependencies = [
 
 [[package]]
 name = "webclaw-pdf"
-version = "0.3.15"
+version = "0.3.16"
 dependencies = [
  "pdf-extract",
  "thiserror",
diff --git a/Cargo.toml b/Cargo.toml
index 97ead31..f8587ca 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,7 +3,7 @@ resolver = "2"
 members = ["crates/*"]
 
 [workspace.package]
-version = "0.3.15"
+version = "0.3.16"
 edition = "2024"
 license = "AGPL-3.0"
 repository = "https://github.com/0xMassi/webclaw"
diff --git a/crates/webclaw-cli/src/main.rs b/crates/webclaw-cli/src/main.rs
index e520d4f..8070d63 100644
--- a/crates/webclaw-cli/src/main.rs
+++ b/crates/webclaw-cli/src/main.rs
@@ -1,4 +1,3 @@
-#![allow(dead_code)]
 /// CLI entry point -- wires webclaw-core and webclaw-fetch into a single command.
 /// All extraction and fetching logic lives in sibling crates; this is pure plumbing.
 mod cloud;
diff --git a/crates/webclaw-fetch/src/client.rs b/crates/webclaw-fetch/src/client.rs
index 2bee533..cc6378a 100644
--- a/crates/webclaw-fetch/src/client.rs
+++ b/crates/webclaw-fetch/src/client.rs
@@ -87,9 +87,27 @@ struct Response {
     body: bytes::Bytes,
 }
 
+/// Maximum fetched body size. A single 50 MB HTML document is already
+/// several orders of magnitude past any realistic page; larger responses
+/// are either malicious (log bomb, zip-bomb decompressed) or streaming
+/// bugs. Caps the blast radius of the HTML → markdown conversion
+/// downstream (which could otherwise allocate multiple full-size Strings
+/// per page in collapse_whitespace + strip_markdown).
+const MAX_BODY_BYTES: u64 = 50 * 1024 * 1024;
+
 impl Response {
-    /// Buffer a wreq response into an owned Response.
+    /// Buffer a wreq response into an owned Response. Rejects bodies that
+    /// advertise a Content-Length beyond [`MAX_BODY_BYTES`] before we pay
+    /// the allocation, and truncates after the fact as a belt-and-braces
+    /// check against a lying server.
     async fn from_wreq(resp: wreq::Response) -> Result<Self, FetchError> {
+        if let Some(len) = resp.content_length()
+            && len > MAX_BODY_BYTES
+        {
+            return Err(FetchError::BodyDecode(format!(
+                "response body {len} bytes exceeds cap {MAX_BODY_BYTES}"
+            )));
+        }
         let status = resp.status().as_u16();
         let url = resp.uri().to_string();
         let headers = resp.headers().clone();
@@ -97,6 +115,12 @@ impl Response {
             .bytes()
             .await
             .map_err(|e| FetchError::BodyDecode(e.to_string()))?;
+        if body.len() as u64 > MAX_BODY_BYTES {
+            return Err(FetchError::BodyDecode(format!(
+                "response body {} bytes exceeds cap {MAX_BODY_BYTES}",
+                body.len()
+            )));
+        }
         Ok(Self {
             status,
             url,
diff --git a/crates/webclaw-fetch/src/crawler.rs b/crates/webclaw-fetch/src/crawler.rs
index bfb86a6..740c479 100644
--- a/crates/webclaw-fetch/src/crawler.rs
+++ b/crates/webclaw-fetch/src/crawler.rs
@@ -137,6 +137,19 @@ impl Crawler {
         let seed_origin = origin_key(&seed);
         let seed_root_domain = root_domain(&seed);
 
+        // Reject pathological user-supplied glob patterns before they can
+        // exercise the recursive `**` handler in glob_match_inner. The
+        // matcher is a straight backtracking implementation; a deeply
+        // nested `**/**/**/...` pattern against a long path can degrade
+        // to exponential time per link checked, per page crawled.
+        for pat in config
+            .include_patterns
+            .iter()
+            .chain(config.exclude_patterns.iter())
+        {
+            validate_glob(pat)?;
+        }
+
         let client = FetchClient::new(config.fetch.clone())?;
 
         Ok(Self {
@@ -387,6 +400,26 @@ impl Crawler {
                 }
             }
 
+            // Cap frontier size independently of max_pages. Pages like
+            // search-result listings or tag clouds can emit thousands of
+            // links per page; without this a single dense page could push
+            // the frontier into the tens of thousands of entries and keep
+            // String allocations alive even after max_pages halts crawling.
+            // Trim aggressively once we exceed 10× max_pages, keeping the
+            // most recently discovered entries which are still on-topic
+            // (breadth-first = siblings of the last page we saw).
+            let frontier_cap = self.config.max_pages.saturating_mul(10).max(100);
+            if next_frontier.len() > frontier_cap {
+                let keep = self.config.max_pages.saturating_mul(5).max(50);
+                warn!(
+                    frontier = next_frontier.len(),
+                    cap = frontier_cap,
+                    trimmed_to = keep,
+                    "frontier exceeded cap, truncating"
+                );
+                next_frontier.truncate(keep);
+            }
+
             frontier = next_frontier;
         }
 
@@ -546,6 +579,49 @@ fn normalize(url: &Url) -> String {
     format!("{scheme}://{host}{port_suffix}{path}{query}")
 }
 
+/// Maximum number of `**` wildcards allowed in a single user glob. Each
+/// additional `**` multiplies the backtracking fan-out of `glob_match_inner`
+/// against adversarial paths; 4 is a practical ceiling for legitimate
+/// nested include/exclude patterns and still keeps the matcher linear-ish.
+const MAX_GLOB_DOUBLESTAR: usize = 4;
+
+/// Maximum glob pattern length. Keeps a single pattern from taking
+/// megabytes of RAM if someone copy-pastes garbage into --include.
+const MAX_GLOB_LEN: usize = 1024;
+
+/// Validate a user-supplied glob pattern before it hits the matcher.
+/// Rejects patterns that would drive `glob_match_inner` into pathological
+/// backtracking (too many `**`, excessive length).
+fn validate_glob(pat: &str) -> Result<(), FetchError> {
+    if pat.len() > MAX_GLOB_LEN {
+        return Err(FetchError::Build(format!(
+            "glob pattern exceeds {MAX_GLOB_LEN} chars ({} given)",
+            pat.len()
+        )));
+    }
+    // Count non-overlapping occurrences of `**`.
+    let bytes = pat.as_bytes();
+    let mut count = 0usize;
+    let mut i = 0;
+    while i + 1 < bytes.len() {
+        if bytes[i] == b'*' && bytes[i + 1] == b'*' {
+            count += 1;
+            // Skip run of consecutive `*` so `***` counts as one.
+            while i < bytes.len() && bytes[i] == b'*' {
+                i += 1;
+            }
+        } else {
+            i += 1;
+        }
+    }
+    if count > MAX_GLOB_DOUBLESTAR {
+        return Err(FetchError::Build(format!(
+            "glob pattern has {count} `**` wildcards (max {MAX_GLOB_DOUBLESTAR})"
+        )));
+    }
+    Ok(())
+}
+
 /// Simple glob matching for URL paths. Supports:
 /// - `*` matches any characters within a single path segment (no `/`)
 /// - `**` matches any characters including `/` (any number of segments)
@@ -700,6 +776,37 @@ mod tests {
         assert_eq!(root_domain(&url), "example.com");
     }
 
+    // -- validate_glob tests --
+
+    #[test]
+    fn validate_glob_accepts_reasonable_patterns() {
+        assert!(validate_glob("/api/*").is_ok());
+        assert!(validate_glob("/api/**").is_ok());
+        assert!(validate_glob("/docs/**/page-*.html").is_ok());
+        assert!(validate_glob("/a/**/b/**/c/**/d/**").is_ok());
+    }
+
+    #[test]
+    fn validate_glob_rejects_too_many_doublestars() {
+        // 5 `**` exceeds MAX_GLOB_DOUBLESTAR = 4.
+        let pat = "/a/**/b/**/c/**/d/**/e/**";
+        let err = validate_glob(pat).unwrap_err();
+        assert!(matches!(err, FetchError::Build(ref m) if m.contains("`**` wildcards")));
+    }
+
+    #[test]
+    fn validate_glob_treats_triple_star_as_one() {
+        // `***` is still one run, should not count as 2.
+        assert!(validate_glob("/a/***/b/***/c/***/d/***").is_ok());
+    }
+
+    #[test]
+    fn validate_glob_rejects_oversized_pattern() {
+        let giant = "x".repeat(2048);
+        let err = validate_glob(&giant).unwrap_err();
+        assert!(matches!(err, FetchError::Build(ref m) if m.contains("exceeds")));
+    }
+
     // -- glob_match tests --
 
     #[test]
diff --git a/crates/webclaw-llm/src/providers/anthropic.rs b/crates/webclaw-llm/src/providers/anthropic.rs
index 9852e27..71ca1f9 100644
--- a/crates/webclaw-llm/src/providers/anthropic.rs
+++ b/crates/webclaw-llm/src/providers/anthropic.rs
@@ -95,7 +95,9 @@ impl LlmProvider for AnthropicProvider {
             )));
         }
 
-        let json: serde_json::Value = resp.json().await?;
+        // Read body with a size cap so a malicious or misbehaving
+        // endpoint can't allocate unbounded memory via resp.json().
+        let json = super::response_json_capped(resp).await?;
 
         // Anthropic response: {"content": [{"type": "text", "text": "..."}]}
         let raw = json["content"][0]["text"]
diff --git a/crates/webclaw-llm/src/providers/mod.rs b/crates/webclaw-llm/src/providers/mod.rs
index 907b88e..1e6412b 100644
--- a/crates/webclaw-llm/src/providers/mod.rs
+++ b/crates/webclaw-llm/src/providers/mod.rs
@@ -2,6 +2,8 @@ pub mod anthropic;
 pub mod ollama;
 pub mod openai;
 
+use crate::error::LlmError;
+
 /// Load an API key from an explicit override or an environment variable.
 /// Returns `None` if neither is set or the value is empty.
 pub(crate) fn load_api_key(override_key: Option<String>, env_var: &str) -> Option<String> {
@@ -9,6 +11,36 @@ pub(crate) fn load_api_key(override_key: Option<String>, env_var: &str) -> Optio
     if key.is_empty() { None } else { Some(key) }
 }
 
+/// Maximum bytes we'll pull from an LLM provider response. 5 MB is already
+/// ~5× the largest real payload any of these providers emits for normal
+/// completions; anything bigger is either a streaming bug on their end or
+/// an adversarial response aimed at exhausting our memory.
+pub(crate) const MAX_RESPONSE_BYTES: u64 = 5 * 1024 * 1024;
+
+/// Read a provider response as JSON, capping total bytes at
+/// [`MAX_RESPONSE_BYTES`]. Rejects via Content-Length if the server is
+/// honest about size; otherwise reads to completion and checks the actual
+/// byte length so an unbounded body still can't swallow unbounded memory.
+pub(crate) async fn response_json_capped(
+    resp: reqwest::Response,
+) -> Result<serde_json::Value, LlmError> {
+    if let Some(len) = resp.content_length()
+        && len > MAX_RESPONSE_BYTES
+    {
+        return Err(LlmError::ProviderError(format!(
+            "response body {len} bytes exceeds cap {MAX_RESPONSE_BYTES}"
+        )));
+    }
+    let bytes = resp.bytes().await?;
+    if bytes.len() as u64 > MAX_RESPONSE_BYTES {
+        return Err(LlmError::ProviderError(format!(
+            "response body {} bytes exceeds cap {MAX_RESPONSE_BYTES}",
+            bytes.len()
+        )));
+    }
+    serde_json::from_slice(&bytes).map_err(|e| LlmError::InvalidJson(format!("response body: {e}")))
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/crates/webclaw-llm/src/providers/ollama.rs b/crates/webclaw-llm/src/providers/ollama.rs
index 4971525..9ee66c9 100644
--- a/crates/webclaw-llm/src/providers/ollama.rs
+++ b/crates/webclaw-llm/src/providers/ollama.rs
@@ -80,7 +80,9 @@ impl LlmProvider for OllamaProvider {
             )));
         }
 
-        let json: serde_json::Value = resp.json().await?;
+        // Cap response body size to defend against adversarial payloads
+        // or a runaway local model streaming gigabytes.
+        let json = super::response_json_capped(resp).await?;
 
         let raw = json["message"]["content"]
             .as_str()
diff --git a/crates/webclaw-llm/src/providers/openai.rs b/crates/webclaw-llm/src/providers/openai.rs
index 49825cd..6422cc4 100644
--- a/crates/webclaw-llm/src/providers/openai.rs
+++ b/crates/webclaw-llm/src/providers/openai.rs
@@ -91,7 +91,8 @@ impl LlmProvider for OpenAiProvider {
             )));
         }
 
-        let json: serde_json::Value = resp.json().await?;
+        // Cap response body size to defend against adversarial payloads.
+        let json = super::response_json_capped(resp).await?;
 
         let raw = json["choices"][0]["message"]["content"]
             .as_str()
diff --git a/packages/create-webclaw/server.json b/packages/create-webclaw/server.json
new file mode 100644
index 0000000..0cfc140
--- /dev/null
+++ b/packages/create-webclaw/server.json
@@ -0,0 +1,17 @@
+{
+  "$schema": "https://static.modelcontextprotocol.io/schemas/2025-12-11/server.schema.json",
+  "name": "io.github.0xMassi/webclaw",
+  "title": "webclaw",
+  "description": "Web extraction MCP server. Scrape, crawl, extract, summarize any URL to clean markdown.",
+  "version": "0.1.4",
+  "packages": [
+    {
+      "registryType": "npm",
+      "identifier": "create-webclaw",
+      "version": "0.1.4",
+      "transport": {
+        "type": "stdio"
+      }
+    }
+  ]
+}