diff --git a/.gitignore b/.gitignore
index 63934d6..f97d040 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,4 @@ target/
 .env
 proxies.txt
 .claude/skills/
+*.json
diff --git a/CHANGELOG.md b/CHANGELOG.md
index cbe6897..c26a382 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,13 @@
 All notable changes to webclaw are documented here.
 Format follows [Keep a Changelog](https://keepachangelog.com/).
 
+## [0.3.12] — 2026-04-10
+
+### Added
+- **Crawl scope control**: new `allow_subdomains` and `allow_external_links` fields on `CrawlConfig`. By default crawls stay same-origin. Enable `allow_subdomains` to follow sibling/child subdomains (e.g. blog.example.com from example.com), or `allow_external_links` for full cross-origin crawling. Root domain extraction uses a heuristic that handles two-part TLDs (co.uk, com.au).
+
+---
+
 ## [0.3.11] — 2026-04-10
 
 ### Added
diff --git a/Cargo.lock b/Cargo.lock
index 0ac74d4..0233c8b 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3102,7 +3102,7 @@ dependencies = [
 
 [[package]]
 name = "webclaw-cli"
-version = "0.3.11"
+version = "0.3.12"
 dependencies = [
  "clap",
  "dotenvy",
@@ -3122,7 +3122,7 @@ dependencies = [
 
 [[package]]
 name = "webclaw-core"
-version = "0.3.11"
+version = "0.3.12"
 dependencies = [
  "ego-tree",
  "once_cell",
@@ -3140,7 +3140,7 @@ dependencies = [
 
 [[package]]
 name = "webclaw-fetch"
-version = "0.3.11"
+version = "0.3.12"
 dependencies = [
  "bytes",
  "calamine",
@@ -3162,7 +3162,7 @@ dependencies = [
 
 [[package]]
 name = "webclaw-llm"
-version = "0.3.11"
+version = "0.3.12"
 dependencies = [
  "async-trait",
  "reqwest",
@@ -3175,7 +3175,7 @@ dependencies = [
 
 [[package]]
 name = "webclaw-mcp"
-version = "0.3.11"
+version = "0.3.12"
 dependencies = [
  "dirs",
  "dotenvy",
@@ -3196,7 +3196,7 @@ dependencies = [
 
 [[package]]
 name = "webclaw-pdf"
-version = "0.3.11"
+version = "0.3.12"
 dependencies = [
  "pdf-extract",
  "thiserror",
diff --git a/Cargo.toml b/Cargo.toml
index bc29fd7..49b0a03 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,7 +3,7 @@ resolver = "2"
 members = ["crates/*"]
 
 [workspace.package]
-version = "0.3.11"
+version = "0.3.12"
 edition = "2024"
 license = "AGPL-3.0"
 repository = "https://github.com/0xMassi/webclaw"
diff --git a/crates/webclaw-cli/src/main.rs b/crates/webclaw-cli/src/main.rs
index cebdb65..d5af713 100644
--- a/crates/webclaw-cli/src/main.rs
+++ b/crates/webclaw-cli/src/main.rs
@@ -1218,6 +1218,8 @@ async fn run_crawl(cli: &Cli) -> Result<(), String> {
         exclude_patterns,
         progress_tx: Some(progress_tx),
         cancel_flag: Some(Arc::clone(&cancel_flag)),
+        allow_subdomains: false,
+        allow_external_links: false,
     };
 
     // Load resume state if --crawl-state file exists
diff --git a/crates/webclaw-fetch/src/crawler.rs b/crates/webclaw-fetch/src/crawler.rs
index 3ef3f86..74eb30f 100644
--- a/crates/webclaw-fetch/src/crawler.rs
+++ b/crates/webclaw-fetch/src/crawler.rs
@@ -1,9 +1,13 @@
-/// Recursive same-origin web crawler built on top of [`FetchClient`].
+/// Recursive web crawler built on top of [`FetchClient`].
 ///
 /// Starts from a seed URL, extracts content, discovers links, and follows
 /// them breadth-first up to a configurable depth/page limit. Uses a semaphore
 /// for bounded concurrency and per-request delays for politeness.
 ///
+/// Scope control: by default only same-origin links are followed. Enable
+/// `allow_subdomains` to include sibling/child subdomains of the seed host,
+/// or `allow_external_links` to follow links to any domain.
+///
 /// When `use_sitemap` is enabled, the crawler first discovers URLs from the
 /// site's sitemaps and seeds the BFS frontier before crawling.
 use std::collections::HashSet;
@@ -39,11 +43,17 @@ pub struct CrawlConfig {
     /// Seed BFS frontier from sitemap discovery before crawling.
     pub use_sitemap: bool,
     /// Glob patterns for paths to include. If non-empty, only matching URLs are crawled.
-    /// E.g. `["/api/*", "/guides/*"]` — matched against the URL path.
+    /// E.g. `["/api/*", "/guides/*"]` -- matched against the URL path.
     pub include_patterns: Vec<String>,
     /// Glob patterns for paths to exclude. Checked after include_patterns.
-    /// E.g. `["/changelog/*", "/blog/*"]` — matching URLs are skipped.
+    /// E.g. `["/changelog/*", "/blog/*"]` -- matching URLs are skipped.
     pub exclude_patterns: Vec<String>,
+    /// Follow links on subdomains of the seed domain (e.g. blog.example.com
+    /// when crawling example.com). Default: false (same-origin only).
+    pub allow_subdomains: bool,
+    /// Follow links to entirely different domains. Default: false.
+    /// When true, the crawler becomes cross-origin. Use with caution.
+    pub allow_external_links: bool,
     /// Optional channel sender for streaming per-page results as they complete.
     /// When set, each `PageResult` is sent on this channel immediately after extraction.
     pub progress_tx: Option<tokio::sync::broadcast::Sender<PageResult>>,
@@ -64,6 +74,8 @@ impl Default for CrawlConfig {
             use_sitemap: false,
             include_patterns: Vec::new(),
             exclude_patterns: Vec::new(),
+            allow_subdomains: false,
+            allow_external_links: false,
             progress_tx: None,
             cancel_flag: None,
         }
@@ -113,6 +125,8 @@ pub struct Crawler {
     client: Arc<FetchClient>,
     config: CrawlConfig,
     seed_origin: String,
+    /// Root domain of the seed URL for subdomain matching (e.g. "example.com").
+    seed_root_domain: String,
 }
 
 impl Crawler {
@@ -121,6 +135,7 @@ impl Crawler {
     pub fn new(seed_url: &str, config: CrawlConfig) -> Result<Self, FetchError> {
         let seed = Url::parse(seed_url).map_err(|_| FetchError::InvalidUrl(seed_url.into()))?;
         let seed_origin = origin_key(&seed);
+        let seed_root_domain = root_domain(&seed);
 
         let client = FetchClient::new(config.fetch.clone())?;
 
@@ -128,6 +143,7 @@ impl Crawler {
             client: Arc::new(client),
             config,
             seed_origin,
+            seed_root_domain,
         })
     }
 
@@ -278,7 +294,7 @@ impl Crawler {
                 let delay = self.config.delay;
 
                 handles.push(tokio::spawn(async move {
-                    // Acquire permit — blocks if concurrency limit reached
+                    // Acquire permit -- blocks if concurrency limit reached
                     let _permit = permit.acquire().await.expect("semaphore closed");
                     tokio::time::sleep(delay).await;
 
@@ -392,9 +408,20 @@ impl Crawler {
             _ => return None,
         }
 
-        // Same-origin check (scheme + host + port)
-        if origin_key(&parsed) != self.seed_origin {
-            return None;
+        // Scope check: same-origin, subdomain, or external
+        if !self.config.allow_external_links {
+            let link_origin = origin_key(&parsed);
+            if link_origin != self.seed_origin {
+                // Not same-origin. Check if subdomain crawling is allowed.
+                if self.config.allow_subdomains {
+                    let link_root = root_domain(&parsed);
+                    if link_root != self.seed_root_domain {
+                        return None;
+                    }
+                } else {
+                    return None;
+                }
+            }
         }
 
         // Path prefix filter
@@ -457,6 +484,29 @@ fn origin_key(url: &Url) -> String {
     format!("{}://{}{}", url.scheme(), host, port_suffix)
 }
 
+/// Extract the root domain from a URL for subdomain comparison.
+/// "blog.docs.example.com" -> "example.com", "example.co.uk" -> "example.co.uk" (best-effort).
+///
+/// Uses a simple heuristic: take the last two labels, or three if the second-to-last
+/// is short (<=3 chars, likely a country SLD like "co.uk", "com.au").
+fn root_domain(url: &Url) -> String {
+    let host = url.host_str().unwrap_or("");
+    let host = host.strip_prefix("www.").unwrap_or(host);
+    let labels: Vec<&str> = host.split('.').collect();
+
+    if labels.len() <= 2 {
+        return host.to_ascii_lowercase();
+    }
+
+    // Heuristic for two-part TLDs (co.uk, com.au, org.br, etc.)
+    let sld = labels[labels.len() - 2];
+    if labels.len() >= 3 && sld.len() <= 3 {
+        labels[labels.len() - 3..].join(".").to_ascii_lowercase()
+    } else {
+        labels[labels.len() - 2..].join(".").to_ascii_lowercase()
+    }
+}
+
 /// Normalize a URL for dedup: strip fragment, remove trailing slash (except root "/"),
 /// lowercase scheme + host. Preserves query params and path case.
 fn normalize(url: &Url) -> String {
@@ -502,7 +552,7 @@ fn glob_match_inner(pat: &[u8], text: &[u8]) -> bool {
 
     while ti < text.len() {
         if pi < pat.len() && pat[pi] == b'*' && pi + 1 < pat.len() && pat[pi + 1] == b'*' {
-            // `**` — match everything including slashes
+            // `**` -- match everything including slashes
             // Skip all consecutive `*`
             while pi < pat.len() && pat[pi] == b'*' {
                 pi += 1;
@@ -522,7 +572,7 @@ fn glob_match_inner(pat: &[u8], text: &[u8]) -> bool {
             }
             return false;
         } else if pi < pat.len() && pat[pi] == b'*' {
-            // `*` — match any chars except `/`
+            // `*` -- match any chars except `/`
             star_pi = pi;
             star_ti = ti;
             pi += 1;
@@ -603,6 +653,38 @@ mod tests {
         assert_ne!(origin_key(&http), origin_key(&https));
     }
 
+    // -- root_domain tests --
+
+    #[test]
+    fn root_domain_simple() {
+        let url = Url::parse("https://example.com/page").unwrap();
+        assert_eq!(root_domain(&url), "example.com");
+    }
+
+    #[test]
+    fn root_domain_subdomain() {
+        let url = Url::parse("https://blog.example.com/page").unwrap();
+        assert_eq!(root_domain(&url), "example.com");
+    }
+
+    #[test]
+    fn root_domain_deep_subdomain() {
+        let url = Url::parse("https://a.b.c.example.com/").unwrap();
+        assert_eq!(root_domain(&url), "example.com");
+    }
+
+    #[test]
+    fn root_domain_country_tld() {
+        let url = Url::parse("https://blog.example.co.uk/").unwrap();
+        assert_eq!(root_domain(&url), "example.co.uk");
+    }
+
+    #[test]
+    fn root_domain_strips_www() {
+        let url = Url::parse("https://www.example.com/").unwrap();
+        assert_eq!(root_domain(&url), "example.com");
+    }
+
     // -- glob_match tests --
 
     #[test]
diff --git a/packages/create-webclaw/package.json b/packages/create-webclaw/package.json
index f1e9c10..ede4cb3 100644
--- a/packages/create-webclaw/package.json
+++ b/packages/create-webclaw/package.json
@@ -1,6 +1,6 @@
 {
   "name": "create-webclaw",
-  "version": "0.1.3",
+  "version": "0.1.4",
   "mcpName": "io.github.0xMassi/webclaw",
   "description": "Set up webclaw MCP server for AI agents (Claude, Cursor, Windsurf, OpenCode, Codex, Antigravity)",
   "bin": {