Merge pull request #2 from jmagar/feature/noxa-mcp-subcommand

refactor: add noxa mcp subcommand
2026-07-21 07:01:01 +02:00 · 2026-04-11 21:38:16 -04:00 · 2026-04-11 21:38:16 -04:00 · 464eb1baec
commit 464eb1baec
parent 8674b60b4e a25103667e
23 changed files with 1874 additions and 199 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,5 +1,21 @@
 target/
 .DS_Store
 .env
+config.json
 proxies.txt
 .claude/skills/
+.omc
+.lavra
+.beads
+.cache
+docs/plans
+docs/superpowers
+docs/reports
+docs/sessions
+benchmarks
+docs
+
+# Beads / Dolt files (added by bd init)
+.dolt/
+*.db
+.beads-credential-key
--- a/CLAUDE.md
+++ b/CLAUDE.md
@ -15,8 +15,8 @@ noxa/
                      # + proxy pool rotation (per-request)
                      # + PDF content-type detection
                      # + document parsing (DOCX, XLSX, CSV)
-    noxa-llm/      # LLM provider chain (Ollama -> OpenAI -> Anthropic)
-                      # + JSON schema extraction, prompt extraction, summarization
+    noxa-llm/      # LLM provider chain (Gemini CLI -> OpenAI -> Ollama -> Anthropic)
+                      # + JSON schema extraction (validated + retry), prompt extraction, summarization
    noxa-pdf/      # PDF text extraction via pdf-extract
    noxa-mcp/      # MCP server (Model Context Protocol) for AI agents
    noxa/      # CLI binary
@ -48,8 +48,10 @@ Two binaries: `noxa` (CLI), `noxa-mcp` (MCP server).
 - `search.rs` — Web search via Serper.dev with parallel result scraping

 ### LLM Modules (`noxa-llm`)
- Provider chain: Ollama (local-first) -> OpenAI -> Anthropic
- JSON schema extraction, prompt-based extraction, summarization
+- Provider chain: Gemini CLI (primary) -> OpenAI -> Ollama -> Anthropic
+- Gemini CLI requires the `gemini` binary on PATH; `GEMINI_MODEL` env var controls model (default: `gemini-2.5-pro`)
+- JSON schema extraction with jsonschema validation; parse failures retry once; schema mismatches fail immediately
+- Prompt-based extraction, summarization

 ### PDF Modules (`noxa-pdf`)
 - PDF text extraction via pdf-extract crate
@ -105,11 +107,15 @@ noxa https://example.com --diff-with snap.json
 # Brand extraction
 noxa https://example.com --brand

-# LLM features (Ollama local-first)
+# LLM features (Gemini CLI primary; requires `gemini` on PATH)
 noxa https://example.com --summarize
 noxa https://example.com --extract-prompt "Get all pricing tiers"
 noxa https://example.com --extract-json '{"type":"object","properties":{"title":{"type":"string"}}}'

+# Force a specific LLM provider
+noxa https://example.com --llm-provider gemini --summarize
+noxa https://example.com --llm-provider openai --summarize
+
 # PDF (auto-detected via Content-Type)
 noxa https://example.com/report.pdf

--- a/Cargo.lock
+++ b/Cargo.lock
@ -35,7 +35,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
 dependencies = [
 "cfg-if",
+ "getrandom 0.3.4",
 "once_cell",
+ "serde",
 "version_check",
 "zerocopy",
 ]
@ -64,6 +66,12 @@ dependencies = [
 "alloc-no-stdlib",
 ]

+[[package]]
+name = "allocator-api2"
+version = "0.2.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
+
 [[package]]
 name = "android_system_properties"
 version = "0.1.5"
@ -206,6 +214,21 @@ dependencies = [
 "syn",
 ]

+[[package]]
+name = "bit-set"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3"
+dependencies = [
+ "bit-vec",
+]
+
+[[package]]
+name = "bit-vec"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
+
 [[package]]
 name = "bitflags"
 version = "2.11.0"
@ -246,6 +269,12 @@ dependencies = [
 "openssl-macros",
 ]

+[[package]]
+name = "borrow-or-share"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc0b364ead1874514c8c2855ab558056ebfeb775653e7ae45ff72f28f8f3166c"
+
 [[package]]
 name = "brotli"
 version = "8.0.2"
@ -273,6 +302,12 @@ version = "3.20.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb"

+[[package]]
+name = "bytecount"
+version = "0.6.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "175812e0be2bccb6abe50bb8d566126198344f707e304f45c648fd8f2cc0365e"
+
 [[package]]
 name = "byteorder"
 version = "1.5.0"
@ -601,6 +636,12 @@ dependencies = [
 "syn",
 ]

+[[package]]
+name = "data-encoding"
+version = "2.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d7a1e2f27636f116493b8b860f5546edb47c8d8f8ea73e1d2a20be88e28d1fea"
+
 [[package]]
 name = "debug_unsafe"
 version = "0.1.4"
@ -726,6 +767,15 @@ version = "1.15.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"

+[[package]]
+name = "email_address"
+version = "0.2.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e079f19b08ca6239f47f8ba8509c11cf3ea30095831f7fed61441475edd8c449"
+dependencies = [
+ "serde",
+]
+
 [[package]]
 name = "encoding_rs"
 version = "0.8.35"
@ -760,6 +810,17 @@ dependencies = [
 "num-traits",
 ]

+[[package]]
+name = "fancy-regex"
+version = "0.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72cf461f865c862bb7dc573f643dd6a2b6842f7c30b07882b56bd148cc2761b8"
+dependencies = [
+ "bit-set",
+ "regex-automata",
+ "regex-syntax",
+]
+
 [[package]]
 name = "fast-float2"
 version = "0.2.3"
@ -789,6 +850,17 @@ dependencies = [
 "zlib-rs",
 ]

+[[package]]
+name = "fluent-uri"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bc74ac4d8359ae70623506d512209619e5cf8f347124910440dbc221714b328e"
+dependencies = [
+ "borrow-or-share",
+ "ref-cast",
+ "serde",
+]
+
 [[package]]
 name = "fnv"
 version = "1.0.7"
@ -801,6 +873,12 @@ version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"

+[[package]]
+name = "foldhash"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb"
+
 [[package]]
 name = "foreign-types"
 version = "0.5.0"
@ -837,6 +915,16 @@ dependencies = [
 "percent-encoding",
 ]

+[[package]]
+name = "fraction"
+version = "0.15.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0f158e3ff0a1b334408dc9fb811cd99b446986f4d8b741bb08f9df1604085ae7"
+dependencies = [
+ "lazy_static",
+ "num",
+]
+
 [[package]]
 name = "fs_extra"
 version = "1.3.0"
@ -1037,7 +1125,7 @@ version = "0.15.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
 dependencies = [
- "foldhash",
+ "foldhash 0.1.5",
 ]

 [[package]]
@ -1045,6 +1133,11 @@ name = "hashbrown"
 version = "0.16.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
+dependencies = [
+ "allocator-api2",
+ "equivalent",
+ "foldhash 0.2.0",
+]

 [[package]]
 name = "heck"
@ -1410,6 +1503,33 @@ dependencies = [
 "wasm-bindgen",
 ]

+[[package]]
+name = "jsonschema"
+version = "0.46.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "84695c6689b01384700a3d93acecbd07231ee6fff1bf22ae980b4c307e6ddfd5"
+dependencies = [
+ "ahash",
+ "bytecount",
+ "data-encoding",
+ "email_address",
+ "fancy-regex",
+ "fraction",
+ "getrandom 0.3.4",
+ "idna",
+ "itoa",
+ "num-cmp",
+ "num-traits",
+ "percent-encoding",
+ "referencing",
+ "regex",
+ "regex-syntax",
+ "serde",
+ "serde_json",
+ "unicode-general-category",
+ "uuid-simd",
+]
+
 [[package]]
 name = "lazy_static"
 version = "1.5.0"
@ -1575,6 +1695,12 @@ version = "2.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"

+[[package]]
+name = "micromap"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c2a86d3146ed3995b5913c414f6664344b9617457320782e64f0bb44afd49d74"
+
 [[package]]
 name = "minimal-lexical"
 version = "0.2.1"
@ -1627,10 +1753,12 @@ dependencies = [
 "noxa-core",
 "noxa-fetch",
 "noxa-llm",
+ "noxa-mcp",
 "noxa-pdf",
 "rand 0.8.5",
 "regex",
 "reqwest",
+ "serde",
 "serde_json",
 "tokio",
 "tracing",
@ -1683,6 +1811,7 @@ name = "noxa-llm"
 version = "0.3.11"
 dependencies = [
 "async-trait",
+ "jsonschema",
 "reqwest",
 "serde",
 "serde_json",
@ -1730,12 +1859,82 @@ dependencies = [
 "windows-sys 0.61.2",
 ]

+[[package]]
+name = "num"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23"
+dependencies = [
+ "num-bigint",
+ "num-complex",
+ "num-integer",
+ "num-iter",
+ "num-rational",
+ "num-traits",
+]
+
+[[package]]
+name = "num-bigint"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9"
+dependencies = [
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-cmp"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "63335b2e2c34fae2fb0aa2cecfd9f0832a1e24b3b32ecec612c3426d46dc8aaa"
+
+[[package]]
+name = "num-complex"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495"
+dependencies = [
+ "num-traits",
+]
+
 [[package]]
 name = "num-conv"
 version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c6673768db2d862beb9b39a78fdcb1a69439615d5794a1be50caa9bc92c81967"

+[[package]]
+name = "num-integer"
+version = "0.1.46"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "num-iter"
+version = "0.1.45"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf"
+dependencies = [
+ "autocfg",
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-rational"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824"
+dependencies = [
+ "num-bigint",
+ "num-integer",
+ "num-traits",
+]
+
 [[package]]
 name = "num-traits"
 version = "0.2.19"
@ -1774,6 +1973,12 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d"

+[[package]]
+name = "outref"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e"
+
 [[package]]
 name = "parking_lot"
 version = "0.12.5"
@ -2160,6 +2365,23 @@ dependencies = [
 "syn",
 ]

+[[package]]
+name = "referencing"
+version = "0.46.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a2d5554bf79f4acf770dc3193b44b2d63b348f5f7b7448a0ea1191b37b620728"
+dependencies = [
+ "ahash",
+ "fluent-uri",
+ "getrandom 0.3.4",
+ "hashbrown 0.16.1",
+ "itoa",
+ "micromap",
+ "parking_lot",
+ "percent-encoding",
+ "serde_json",
+]
+
 [[package]]
 name = "regex"
 version = "1.12.3"
@ -2985,6 +3207,12 @@ version = "1.19.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"

+[[package]]
+name = "unicode-general-category"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b993bddc193ae5bd0d623b49ec06ac3e9312875fdae725a975c51db1cc1677f"
+
 [[package]]
 name = "unicode-ident"
 version = "1.0.24"
@ -3049,6 +3277,16 @@ version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"

+[[package]]
+name = "uuid-simd"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "23b082222b4f6619906941c17eb2297fff4c2fb96cb60164170522942a200bd8"
+dependencies = [
+ "outref",
+ "vsimd",
+]
+
 [[package]]
 name = "valuable"
 version = "0.1.1"
@ -3061,6 +3299,12 @@ version = "0.9.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"

+[[package]]
+name = "vsimd"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64"
+
 [[package]]
 name = "want"
 version = "0.3.1"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -13,6 +13,7 @@ noxa-core = { path = "crates/noxa-core" }
 noxa-fetch = { path = "crates/noxa-fetch" }
 noxa-llm = { path = "crates/noxa-llm" }
 noxa-pdf = { path = "crates/noxa-pdf" }
+noxa-mcp = { path = "crates/noxa-mcp" }
 tokio = { version = "1", features = ["full"] }
 serde = { version = "1", features = ["derive"] }
 serde_json = "1"
@ -21,3 +22,6 @@ tracing = "0.1"
 tracing-subscriber = { version = "0.3", features = ["env-filter"] }
 clap = { version = "4", features = ["derive", "env"] }
 dotenvy = "0.15"
+rmcp = { version = "1.2", features = ["server", "macros", "transport-io", "schemars"] }
+schemars = "1.0"
+dirs = "6.0.0"
--- a/README.md
+++ b/README.md
@ -77,7 +77,7 @@ Download from [GitHub Releases](https://github.com/jmagar/noxa/releases) for mac
 ### Cargo (from source)

 ```bash
-cargo install --git https://github.com/jmagar/noxa.git noxa
+cargo install --git https://github.com/jmagar/noxa.git noxa-cli --bin noxa
 cargo install --git https://github.com/jmagar/noxa.git noxa-mcp
 ```

@ -159,6 +159,271 @@ Crawling... 50/50 pages extracted

 ---

+## Examples
+
+### Basic Extraction
+
+```bash
+# Extract as markdown (default)
+noxa https://example.com
+
+# Multiple output formats
+noxa https://example.com -f markdown    # Clean markdown
+noxa https://example.com -f json        # Full structured JSON
+noxa https://example.com -f text        # Plain text (no formatting)
+noxa https://example.com -f llm         # Token-optimized for LLMs (67% fewer tokens)
+
+# Bare domains work (auto-prepends https://)
+noxa example.com
+```
+
+### Content Filtering
+
+```bash
+# Only extract main content (skip nav, sidebar, footer)
+noxa https://docs.rs/tokio --only-main-content
+
+# Include specific CSS selectors
+noxa https://news.ycombinator.com --include ".titleline,.score"
+
+# Exclude specific elements
+noxa https://example.com --exclude "nav,footer,.ads,.sidebar"
+
+# Combine both
+noxa https://docs.rs/reqwest --only-main-content --exclude ".sidebar"
+```
+
+### Brand Identity Extraction
+
+```bash
+# Extract colors, fonts, logos from any website
+noxa --brand https://stripe.com
+# Output: { "name": "Stripe", "colors": [...], "fonts": ["Sohne"], "logos": [...] }
+
+noxa --brand https://github.com
+# Output: { "name": "GitHub", "colors": [{"hex": "#1F2328", ...}], "fonts": ["Mona Sans"], ... }
+
+noxa --brand wikipedia.org
+# Output: 10 colors, 5 fonts, favicon, logo URL
+```
+
+### Sitemap Discovery
+
+```bash
+# Discover all URLs from a site's sitemaps
+noxa --map https://sitemaps.org
+# Output: one URL per line (84 URLs found)
+
+# JSON output with metadata
+noxa --map https://sitemaps.org -f json
+# Output: [{ "url": "...", "last_modified": "...", "priority": 0.8 }]
+```
+
+### Recursive Crawling
+
+```bash
+# Crawl a site (default: depth 1, max 20 pages)
+noxa --crawl https://example.com
+
+# Control depth and page limit
+noxa --crawl --depth 2 --max-pages 50 https://docs.rs/tokio
+
+# Crawl with sitemap seeding (finds more pages)
+noxa --crawl --sitemap --depth 2 https://docs.rs/tokio
+
+# Filter crawl paths
+noxa --crawl --include-paths "/api/*,/guide/*" https://docs.example.com
+noxa --crawl --exclude-paths "/changelog/*,/blog/*" https://docs.example.com
+
+# Control concurrency and delay
+noxa --crawl --concurrency 10 --delay 200 https://example.com
+```
+
+### Change Detection (Diff)
+
+```bash
+# Step 1: Save a snapshot
+noxa https://example.com -f json > snapshot.json
+
+# Step 2: Later, compare against the snapshot
+noxa --diff-with snapshot.json https://example.com
+# Output:
+#   Status: Same
+#   Word count delta: +0
+
+# If the page changed:
+#   Status: Changed
+#   Word count delta: +42
+#   --- old
+#   +++ new
+#   @@ -1,3 +1,3 @@
+#   -Old content here
+#   +New content here
+```
+
+### PDF Extraction
+
+```bash
+# PDF URLs are auto-detected via Content-Type
+noxa https://example.com/report.pdf
+
+# Control PDF mode
+noxa --pdf-mode auto https://example.com/report.pdf  # Error on empty (catches scanned PDFs)
+noxa --pdf-mode fast https://example.com/report.pdf  # Return whatever text is found
+```
+
+### Batch Processing
+
+```bash
+# Multiple URLs in one command
+noxa https://example.com https://httpbin.org/html https://rust-lang.org
+
+# URLs from a file (one per line, # comments supported)
+noxa --urls-file urls.txt
+
+# Batch with JSON output
+noxa --urls-file urls.txt -f json
+
+# Proxy rotation for large batches
+noxa --urls-file urls.txt --proxy-file proxies.txt --concurrency 10
+```
+
+### Local Files & Stdin
+
+```bash
+# Extract from a local HTML file
+noxa --file page.html
+
+# Pipe HTML from another command
+curl -s https://example.com | noxa --stdin
+
+# Chain with other tools
+noxa https://example.com -f text | wc -w    # Word count
+noxa https://example.com -f json | jq '.metadata.title'  # Extract title with jq
+```
+
+### Browser Impersonation
+
+```bash
+# Chrome (default) — latest Chrome TLS fingerprint
+noxa https://example.com
+
+# Firefox fingerprint
+noxa --browser firefox https://example.com
+
+# Random browser per request (good for batch)
+noxa --browser random --urls-file urls.txt
+```
+
+### Custom Headers & Cookies
+
+```bash
+# Custom headers
+noxa -H "Authorization: Bearer token123" https://api.example.com
+noxa -H "Accept-Language: de-DE" https://example.com
+
+# Cookies
+noxa --cookie "session=abc123; theme=dark" https://example.com
+
+# Multiple headers
+noxa -H "X-Custom: value" -H "Authorization: Bearer token" https://example.com
+```
+
+### LLM-Powered Features
+
+These require an LLM provider (Ollama local, or OpenAI/Anthropic API key).
+
+```bash
+# Summarize a page (default: 3 sentences)
+noxa --summarize https://example.com
+
+# Control summary length
+noxa --summarize 5 https://example.com
+
+# Extract structured JSON with a schema
+noxa --extract-json '{"type":"object","properties":{"title":{"type":"string"},"price":{"type":"number"}}}' https://example.com/product
+
+# Extract with a schema from file
+noxa --extract-json @schema.json https://example.com/product
+
+# Extract with natural language prompt
+noxa --extract-prompt "Get all pricing tiers with name, price, and features" https://stripe.com/pricing
+
+# Use a specific LLM provider
+noxa --llm-provider ollama --summarize https://example.com
+noxa --llm-provider openai --llm-model gpt-4o --extract-prompt "..." https://example.com
+noxa --llm-provider anthropic --summarize https://example.com
+```
+
+### Raw HTML Output
+
+```bash
+# Get the raw fetched HTML (no extraction)
+noxa --raw-html https://example.com
+
+# Useful for debugging extraction issues
+noxa --raw-html https://example.com > raw.html
+noxa --file raw.html  # Then extract locally
+```
+
+### Metadata & Verbose Mode
+
+```bash
+# Include YAML frontmatter with metadata
+noxa --metadata https://example.com
+# Output:
+#   ---
+#   title: "Example Domain"
+#   source: "https://example.com"
+#   word_count: 20
+#   ---
+#   # Example Domain
+#   ...
+
+# Verbose logging (debug extraction pipeline)
+noxa -v https://example.com
+```
+
+### Proxy Usage
+
+```bash
+# Single proxy
+noxa --proxy http://user:pass@proxy.example.com:8080 https://example.com
+
+# SOCKS5 proxy
+noxa --proxy socks5://proxy.example.com:1080 https://example.com
+
+# Proxy rotation from file (one per line: host:port:user:pass)
+noxa --proxy-file proxies.txt https://example.com
+
+# Auto-load proxies.txt from current directory
+echo "proxy1.com:8080:user:pass" > proxies.txt
+noxa https://example.com  # Automatically detects and uses proxies.txt
+```
+
+### Real-World Recipes
+
+```bash
+# Monitor competitor pricing — save today's pricing
+noxa --extract-json '{"type":"array","items":{"type":"object","properties":{"plan":{"type":"string"},"price":{"type":"string"}}}}' \
+  https://competitor.com/pricing -f json > pricing-$(date +%Y%m%d).json
+
+# Build a documentation search index
+noxa --crawl --sitemap --depth 3 --max-pages 500 -f llm https://docs.example.com > docs.txt
+
+# Extract all images from a page
+noxa https://example.com -f json | jq -r '.content.images[].src'
+
+# Get all external links
+noxa https://example.com -f json | jq -r '.content.links[] | select(.href | startswith("http")) | .href'
+
+# Compare two pages
+noxa https://site-a.com -f json > a.json
+noxa https://site-b.com --diff-with a.json
+```
+
+---
+
 ## MCP Server — 10 tools for AI agents

 <a href="https://glama.ai/mcp/servers/jmagar/noxa"><img src="https://glama.ai/mcp/servers/jmagar/noxa/badge" alt="noxa MCP server" /></a>
@ -327,6 +592,31 @@ noxa/

 ## Configuration

+Non-secret defaults live in `config.json` in your working directory. Copy the example:
+
+```bash
+cp config.example.json config.json
+```
+
+**Precedence:** CLI flags > `config.json` > built-in defaults
+
+**Secrets and URLs** (API keys, proxy, webhook, LLM base URL) always go in `.env`, not `config.json`:
+
+```bash
+cp env.example .env
+```
+
+**Override config path** for a single run:
+
+```bash
+NOXA_CONFIG=/path/to/other-config.json noxa https://example.com
+NOXA_CONFIG=/dev/null noxa https://example.com  # bypass config entirely
+```
+
+**Bool flag limitation:** flags like `--metadata`, `--only-main-content`, `--verbose` set to `true` in `config.json` cannot be overridden to `false` from the CLI for a single run (clap has no `--no-flag` variant). Use `NOXA_CONFIG=/dev/null` to bypass.
+
+### Environment variables
+
 | Variable | Description |
 |----------|-------------|
 | `NOXA_API_KEY` | Cloud API key (enables bot bypass, JS rendering, search, research) |
--- a/config.example.json
+++ b/config.example.json
@ -0,0 +1,34 @@
+{
+  "_doc": [
+    "Copy to config.json and remove fields you don't need.",
+    "Secrets (api_key, proxy, webhook, llm_base_url) go in .env — NOT here.",
+    "BOOL FLAG LIMITATION: once set to true here, cannot be overridden to false",
+    "from the CLI for a single run (no --no-flag support). Use NOXA_CONFIG=/dev/null",
+    "on the command line to bypass this config entirely.",
+    "on_change is intentionally absent — it must remain a CLI-only flag.",
+    "Unknown fields are silently ignored, so this file works across noxa versions."
+  ],
+
+  "format": "markdown",
+  "browser": "chrome",
+  "timeout": 30,
+  "pdf_mode": "auto",
+  "metadata": false,
+  "verbose": false,
+  "only_main_content": false,
+
+  "include_selectors": [],
+  "exclude_selectors": ["nav", "footer", ".sidebar", ".cookie-banner"],
+
+  "depth": 1,
+  "max_pages": 20,
+  "concurrency": 5,
+  "delay": 100,
+  "path_prefix": null,
+  "include_paths": [],
+  "exclude_paths": ["/changelog/*", "/blog/*", "/releases/*"],
+  "use_sitemap": false,
+
+  "llm_provider": "gemini",
+  "llm_model": "gemini-2.5-pro"
+}
--- a/crates/noxa-cli/Cargo.toml
+++ b/crates/noxa-cli/Cargo.toml
@ -14,9 +14,11 @@ noxa-core = { workspace = true }
 noxa-fetch = { workspace = true }
 noxa-llm = { workspace = true }
 noxa-pdf = { workspace = true }
+noxa-mcp = { workspace = true }
 dotenvy = { workspace = true }
 rand = "0.8"
 serde_json = { workspace = true }
+serde = { workspace = true }
 tokio = { workspace = true }
 clap = { workspace = true }
 tracing = { workspace = true }
--- a/crates/noxa-cli/src/config.rs
+++ b/crates/noxa-cli/src/config.rs
@ -0,0 +1,315 @@
+use serde::Deserialize;
+use std::path::Path;
+
+use crate::{Browser, OutputFormat, PdfModeArg};
+
+/// Non-secret, non-URL configuration defaults loaded from config.json.
+/// All fields optional — absent means "use the hard default".
+/// Unknown fields are silently ignored (serde default) so config files
+/// written for a newer version of noxa work on older binaries.
+///
+/// DELIBERATELY EXCLUDED:
+/// - on_change: passes content to sh -c; must remain CLI-only to prevent
+///   shell injection via config file writes.
+/// - Secrets/URLs (api_key, proxy, webhook, llm_base_url): stay in .env.
+///
+/// BOOL FLAG LIMITATION:
+/// only_main_content, metadata, verbose, use_sitemap set to true here
+/// cannot be overridden to false from the CLI for a single run (no --no-flag
+/// variant in clap). Edit config.json or use NOXA_CONFIG=/dev/null to bypass.
+#[derive(Debug, Default, Deserialize)]
+pub struct NoxaConfig {
+    // Output
+    pub format: Option<OutputFormat>,
+    pub metadata: Option<bool>,
+    pub verbose: Option<bool>,
+
+    // Fetch
+    pub browser: Option<Browser>,
+    pub timeout: Option<u64>,
+    pub pdf_mode: Option<PdfModeArg>,
+    pub only_main_content: Option<bool>,
+
+    // CSS selectors
+    pub include_selectors: Option<Vec<String>>,
+    pub exclude_selectors: Option<Vec<String>>,
+
+    // Crawl
+    pub depth: Option<usize>,
+    pub max_pages: Option<usize>,
+    pub concurrency: Option<usize>,
+    pub delay: Option<u64>,
+    pub path_prefix: Option<String>,
+    pub include_paths: Option<Vec<String>>,
+    pub exclude_paths: Option<Vec<String>>,
+    pub use_sitemap: Option<bool>,
+
+    // LLM (non-secret: provider name and model only; base URL stays in .env)
+    pub llm_provider: Option<String>,
+    pub llm_model: Option<String>,
+}
+
+impl NoxaConfig {
+    /// Load config from an explicit path, NOXA_CONFIG env var, or ./config.json.
+    /// Returns an empty (all-None) config if the file doesn't exist.
+    /// Prints an error and exits if the file exists but is invalid JSON.
+    pub fn load(explicit_path: Option<&str>) -> Self {
+        let noxa_config_env = std::env::var("NOXA_CONFIG").ok();
+        let was_explicit = explicit_path.is_some() || noxa_config_env.is_some();
+
+        let path_str = explicit_path
+            .map(String::from)
+            .or(noxa_config_env)
+            .unwrap_or_else(|| "config.json".to_string());
+
+        let path = Path::new(&path_str);
+        if !path.exists() {
+            if was_explicit {
+                let display_name = path.file_name()
+                    .and_then(|n| n.to_str())
+                    .unwrap_or(&path_str);
+                eprintln!("error: config file not found: {display_name}");
+                std::process::exit(1);
+            }
+            return Self::default();
+        }
+
+        let display_name = path.file_name()
+            .and_then(|n| n.to_str())
+            .unwrap_or(&path_str);
+        eprintln!(
+            "noxa: config loaded from {display_name} \
+             (API keys and secrets belong in .env, not config.json)"
+        );
+        tracing::debug!("config path: {}", path.display());
+
+        let content = match std::fs::read_to_string(path) {
+            Ok(s) => s,
+            Err(e) => {
+                eprintln!("error: cannot read config file {display_name}: {e}");
+                std::process::exit(1);
+            }
+        };
+
+        match serde_json::from_str(&content) {
+            Ok(cfg) => cfg,
+            Err(e) => {
+                eprintln!("error: invalid JSON in config file {display_name}: {e}");
+                std::process::exit(1);
+            }
+        }
+    }
+}
+
+/// Fully resolved configuration after merging CLI flags > config file > hard defaults.
+/// All fields are concrete — no Option<T>. This is what the rest of main.rs reads.
+///
+/// The merge uses clap's ValueSource to detect which fields were explicitly set on
+/// the command line. CLI-explicit values always win. Config fills in the rest.
+/// Hard defaults are the fallback of last resort.
+pub struct ResolvedConfig {
+    // Output
+    pub format: OutputFormat,
+    pub metadata: bool,
+    pub verbose: bool,
+
+    // Fetch
+    pub browser: Browser,
+    pub timeout: u64,
+    pub pdf_mode: PdfModeArg,
+    pub only_main_content: bool,
+    /// CLI-only output flag — not configurable via config.json (it is a per-run mode, not a persistent default).
+    pub raw_html: bool,
+
+    // CSS selectors
+    /// Vec<String> — CSS selectors passed directly to extraction filter.
+    pub include_selectors: Vec<String>,
+    /// Vec<String> — CSS selectors passed directly to extraction filter.
+    pub exclude_selectors: Vec<String>,
+
+    // Crawl
+    pub depth: usize,
+    pub max_pages: usize,
+    pub concurrency: usize,
+    pub delay: u64,
+    pub path_prefix: Option<String>,
+    /// Vec<String> — never joined to a comma-string. Passed directly to CrawlConfig.
+    pub include_paths: Vec<String>,
+    /// Vec<String> — never joined to a comma-string. Passed directly to CrawlConfig.
+    pub exclude_paths: Vec<String>,
+    pub use_sitemap: bool,
+
+    // LLM
+    pub llm_provider: Option<String>,
+    pub llm_model: Option<String>,
+}
+
+use clap::parser::ValueSource;
+
+/// Merge CLI flags (detected via ValueSource), config file, and hard defaults
+/// into a single ResolvedConfig. CLI explicit values always win.
+pub fn resolve(
+    cli: &crate::Cli,
+    matches: &clap::ArgMatches,
+    cfg: &NoxaConfig,
+) -> ResolvedConfig {
+    let explicit = |name: &str| {
+        matches.value_source(name) == Some(ValueSource::CommandLine)
+    };
+
+    ResolvedConfig {
+        format: if explicit("format") {
+            cli.format.clone()
+        } else {
+            cfg.format.clone().unwrap_or(crate::OutputFormat::Markdown)
+        },
+        browser: if explicit("browser") {
+            cli.browser.clone()
+        } else {
+            cfg.browser.clone().unwrap_or(crate::Browser::Chrome)
+        },
+        pdf_mode: if explicit("pdf_mode") {
+            cli.pdf_mode.clone()
+        } else {
+            cfg.pdf_mode.clone().unwrap_or(crate::PdfModeArg::Auto)
+        },
+        timeout: if explicit("timeout") {
+            cli.timeout
+        } else {
+            cfg.timeout.unwrap_or(30)
+        },
+        depth: if explicit("depth") {
+            cli.depth
+        } else {
+            cfg.depth.unwrap_or(1)
+        },
+        max_pages: if explicit("max_pages") {
+            cli.max_pages
+        } else {
+            cfg.max_pages.unwrap_or(20)
+        },
+        concurrency: if explicit("concurrency") {
+            cli.concurrency
+        } else {
+            cfg.concurrency.unwrap_or(5)
+        },
+        delay: if explicit("delay") {
+            cli.delay
+        } else {
+            cfg.delay.unwrap_or(100)
+        },
+        path_prefix: if explicit("path_prefix") {
+            cli.path_prefix.clone()
+        } else {
+            cfg.path_prefix.clone()
+        },
+        include_paths: if explicit("include_paths") {
+            cli.include_paths
+                .as_deref()
+                .map(|s| s.split(',').map(|p| p.trim().to_string()).collect())
+                .unwrap_or_default()
+        } else {
+            cfg.include_paths.clone().unwrap_or_default()
+        },
+        exclude_paths: if explicit("exclude_paths") {
+            cli.exclude_paths
+                .as_deref()
+                .map(|s| s.split(',').map(|p| p.trim().to_string()).collect())
+                .unwrap_or_default()
+        } else {
+            cfg.exclude_paths.clone().unwrap_or_default()
+        },
+        include_selectors: if explicit("include") {
+            cli.include
+                .as_deref()
+                .map(|s| s.split(',').map(|p| p.trim().to_string()).collect())
+                .unwrap_or_default()
+        } else {
+            cfg.include_selectors.clone().unwrap_or_default()
+        },
+        exclude_selectors: if explicit("exclude") {
+            cli.exclude
+                .as_deref()
+                .map(|s| s.split(',').map(|p| p.trim().to_string()).collect())
+                .unwrap_or_default()
+        } else {
+            cfg.exclude_selectors.clone().unwrap_or_default()
+        },
+        only_main_content: cli.only_main_content || cfg.only_main_content.unwrap_or(false),
+        metadata: cli.metadata || cfg.metadata.unwrap_or(false),
+        verbose: cli.verbose || cfg.verbose.unwrap_or(false),
+        use_sitemap: cli.sitemap || cfg.use_sitemap.unwrap_or(false),
+        raw_html: cli.raw_html,
+        llm_provider: if cli.llm_provider.is_some() {
+            cli.llm_provider.clone()
+        } else {
+            cfg.llm_provider.clone()
+        },
+        llm_model: if cli.llm_model.is_some() {
+            cli.llm_model.clone()
+        } else {
+            cfg.llm_model.clone()
+        },
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_noxa_config_deserialize_full() {
+        let json = r#"{
+            "format": "llm",
+            "depth": 3,
+            "max_pages": 100,
+            "concurrency": 10,
+            "delay": 200,
+            "browser": "firefox",
+            "timeout": 60,
+            "only_main_content": true,
+            "use_sitemap": true,
+            "path_prefix": "/docs/",
+            "include_paths": ["/docs/*", "/api/*"],
+            "exclude_paths": ["/changelog/*", "/blog/*"],
+            "include_selectors": ["article", ".content"],
+            "exclude_selectors": ["nav", "footer"],
+            "llm_provider": "gemini",
+            "llm_model": "gemini-2.5-pro",
+            "pdf_mode": "fast",
+            "metadata": true,
+            "verbose": false
+        }"#;
+        let cfg: NoxaConfig = serde_json::from_str(json).unwrap();
+        assert!(matches!(cfg.format, Some(crate::OutputFormat::Llm)));
+        assert_eq!(cfg.depth, Some(3));
+        assert_eq!(cfg.exclude_paths, Some(vec!["/changelog/*".to_string(), "/blog/*".to_string()]));
+        assert!(matches!(cfg.pdf_mode, Some(crate::PdfModeArg::Fast)));
+    }
+
+    #[test]
+    fn test_noxa_config_empty() {
+        let cfg: NoxaConfig = serde_json::from_str("{}").unwrap();
+        assert!(cfg.format.is_none());
+        assert!(cfg.depth.is_none());
+    }
+
+    #[test]
+    fn test_noxa_config_unknown_fields_ignored() {
+        // Unknown fields must NOT cause a parse failure
+        let cfg: NoxaConfig = serde_json::from_str(r#"{"depth": 2, "future_field": true}"#).unwrap();
+        assert_eq!(cfg.depth, Some(2));
+    }
+
+    #[test]
+    fn test_load_implicit_missing_file_returns_default() {
+        // When no explicit path and ./config.json doesn't exist, silently return default.
+        // The simplest test: call with None and rely on ./config.json not existing in test env.
+        // If CWD has config.json this test is skipped to avoid flakiness.
+        if std::path::Path::new("config.json").exists() {
+            return; // skip: CWD has config.json
+        }
+        let cfg = NoxaConfig::load(None);
+        assert!(cfg.format.is_none());
+    }
+}
--- a/crates/noxa-cli/src/main.rs
+++ b/crates/noxa-cli/src/main.rs
@ -2,6 +2,7 @@
 /// CLI entry point -- wires noxa-core and noxa-fetch into a single command.
 /// All extraction and fetching logic lives in sibling crates; this is pure plumbing.
 mod cloud;
+mod config;

 use std::io::{self, Read as _};
 use std::path::{Path, PathBuf};
@ -9,8 +10,7 @@ use std::process;
 use std::sync::Arc;
 use std::sync::atomic::{AtomicBool, Ordering};

-use clap::{Parser, ValueEnum};
-use tracing_subscriber::EnvFilter;
+use clap::{CommandFactory, FromArgMatches, Parser, ValueEnum};
 use noxa_core::{
    ChangeStatus, ContentDiff, ExtractionOptions, ExtractionResult, Metadata, extract_with_options,
    to_llm_text,
@ -20,7 +20,10 @@ use noxa_fetch::{
    FetchConfig, FetchResult, PageResult, SitemapEntry,
 };
 use noxa_llm::LlmProvider;
+use noxa_mcp;
 use noxa_pdf::PdfMode;
+use serde::Deserialize;
+use tracing_subscriber::EnvFilter;

 /// Known anti-bot challenge page titles (case-insensitive prefix match).
 const ANTIBOT_TITLES: &[&str] = &[
@ -87,6 +90,10 @@ fn warn_empty(url: &str, reason: &EmptyReason) {
 #[derive(Parser)]
 #[command(name = "noxa", about = "Extract web content for LLMs", version)]
 struct Cli {
+    /// Path to config.json (default: ./config.json, override with NOXA_CONFIG env var)
+    #[arg(long, global = true)]
+    config: Option<String>,
+
    /// URLs to fetch (multiple allowed)
    #[arg()]
    urls: Vec<String>,
@ -247,7 +254,7 @@ struct Cli {
    #[arg(long, num_args = 0..=1, default_missing_value = "3")]
    summarize: Option<usize>,

-    /// Force a specific LLM provider (ollama, openai, anthropic)
+    /// Force a specific LLM provider (gemini, ollama, openai, anthropic)
    #[arg(long, env = "NOXA_LLM_PROVIDER")]
    llm_provider: Option<String>,

@ -284,7 +291,8 @@ struct Cli {
    output_dir: Option<PathBuf>,
 }

-#[derive(Clone, ValueEnum)]
+#[derive(Clone, Debug, ValueEnum, Deserialize)]
+#[serde(rename_all = "lowercase")]
 enum OutputFormat {
    Markdown,
    Json,
@ -293,14 +301,16 @@ enum OutputFormat {
    Html,
 }

-#[derive(Clone, ValueEnum)]
+#[derive(Clone, Debug, ValueEnum, Deserialize)]
+#[serde(rename_all = "lowercase")]
 enum Browser {
    Chrome,
    Firefox,
    Random,
 }

-#[derive(Clone, ValueEnum, Default)]
+#[derive(Clone, Debug, ValueEnum, Default, Deserialize)]
+#[serde(rename_all = "lowercase")]
 enum PdfModeArg {
    /// Error if PDF has no extractable text (catches scanned PDFs)
    #[default]
@ -338,12 +348,21 @@ fn init_logging(verbose: bool) {
    tracing_subscriber::fmt().with_env_filter(filter).init();
 }

+fn init_mcp_logging() {
+    tracing_subscriber::fmt()
+        .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+        .with_writer(std::io::stderr)
+        .with_ansi(false)
+        .try_init()
+        .ok();
+}
+
 /// Build FetchConfig from CLI flags.
 ///
 /// `--proxy` sets a single static proxy (no rotation).
 /// `--proxy-file` loads a pool of proxies and rotates per-request.
 /// `--proxy` takes priority: if both are set, only the single proxy is used.
-fn build_fetch_config(cli: &Cli) -> FetchConfig {
+fn build_fetch_config(cli: &Cli, resolved: &config::ResolvedConfig) -> FetchConfig {
    let (proxy, proxy_pool) = if cli.proxy.is_some() {
        (cli.proxy.clone(), Vec::new())
    } else if let Some(ref path) = cli.proxy_file {
@ -403,11 +422,11 @@ fn build_fetch_config(cli: &Cli) -> FetchConfig {
    }

    FetchConfig {
-        browser: cli.browser.clone().into(),
+        browser: resolved.browser.clone().into(),
        proxy,
        proxy_pool,
-        timeout: std::time::Duration::from_secs(cli.timeout),
-        pdf_mode: cli.pdf_mode.clone().into(),
+        timeout: std::time::Duration::from_secs(resolved.timeout),
+        pdf_mode: resolved.pdf_mode.clone().into(),
        headers,
        ..Default::default()
    }
@ -436,20 +455,12 @@ fn parse_cookie_file(path: &str) -> Result<String, String> {
    Ok(pairs.join("; "))
 }

-fn build_extraction_options(cli: &Cli) -> ExtractionOptions {
+fn build_extraction_options(resolved: &config::ResolvedConfig) -> ExtractionOptions {
    ExtractionOptions {
-        include_selectors: cli
-            .include
-            .as_deref()
-            .map(|s| s.split(',').map(|s| s.trim().to_string()).collect())
-            .unwrap_or_default(),
-        exclude_selectors: cli
-            .exclude
-            .as_deref()
-            .map(|s| s.split(',').map(|s| s.trim().to_string()).collect())
-            .unwrap_or_default(),
-        only_main_content: cli.only_main_content,
-        include_raw_html: cli.raw_html || matches!(cli.format, OutputFormat::Html),
+        include_selectors: resolved.include_selectors.clone(),
+        exclude_selectors: resolved.exclude_selectors.clone(),
+        only_main_content: resolved.only_main_content,
+        include_raw_html: resolved.raw_html || matches!(resolved.format, OutputFormat::Html),
    }
 }

@ -618,14 +629,17 @@ impl FetchOutput {

 /// Fetch a URL and extract content, handling PDF detection automatically.
 /// Falls back to cloud API when bot protection or JS rendering is detected.
-async fn fetch_and_extract(cli: &Cli) -> Result<FetchOutput, String> {
+async fn fetch_and_extract(
+    cli: &Cli,
+    resolved: &config::ResolvedConfig,
+) -> Result<FetchOutput, String> {
    // Local sources: read and extract as HTML
    if cli.stdin {
        let mut buf = String::new();
        io::stdin()
            .read_to_string(&mut buf)
            .map_err(|e| format!("failed to read stdin: {e}"))?;
-        let options = build_extraction_options(cli);
+        let options = build_extraction_options(resolved);
        return extract_with_options(&buf, None, &options)
            .map(|r| FetchOutput::Local(Box::new(r)))
            .map_err(|e| format!("extraction error: {e}"));
@ -634,7 +648,7 @@ async fn fetch_and_extract(cli: &Cli) -> Result<FetchOutput, String> {
    if let Some(ref path) = cli.file {
        let html =
            std::fs::read_to_string(path).map_err(|e| format!("failed to read {path}: {e}"))?;
-        let options = build_extraction_options(cli);
+        let options = build_extraction_options(resolved);
        return extract_with_options(&html, None, &options)
            .map(|r| FetchOutput::Local(Box::new(r)))
            .map_err(|e| format!("extraction error: {e}"));
@ -651,10 +665,9 @@ async fn fetch_and_extract(cli: &Cli) -> Result<FetchOutput, String> {

    // --cloud: skip local, go straight to cloud API
    if cli.cloud {
-        let c =
-            cloud_client.ok_or("--cloud requires NOXA_API_KEY (set via env or --api-key)")?;
-        let options = build_extraction_options(cli);
-        let format_str = match cli.format {
+        let c = cloud_client.ok_or("--cloud requires NOXA_API_KEY (set via env or --api-key)")?;
+        let options = build_extraction_options(resolved);
+        let format_str = match resolved.format {
            OutputFormat::Markdown => "markdown",
            OutputFormat::Json => "json",
            OutputFormat::Text => "text",
@ -674,9 +687,9 @@ async fn fetch_and_extract(cli: &Cli) -> Result<FetchOutput, String> {
    }

    // Normal path: try local first
-    let client =
-        FetchClient::new(build_fetch_config(cli)).map_err(|e| format!("client error: {e}"))?;
-    let options = build_extraction_options(cli);
+    let client = FetchClient::new(build_fetch_config(cli, resolved))
+        .map_err(|e| format!("client error: {e}"))?;
+    let options = build_extraction_options(resolved);
    let result = client
        .fetch_and_extract_with_options(url, &options)
        .await
@ -687,7 +700,7 @@ async fn fetch_and_extract(cli: &Cli) -> Result<FetchOutput, String> {
    if !matches!(reason, EmptyReason::None) {
        if let Some(ref c) = cloud_client {
            eprintln!("\x1b[36minfo:\x1b[0m falling back to cloud API...");
-            let format_str = match cli.format {
+            let format_str = match resolved.format {
                OutputFormat::Markdown => "markdown",
                OutputFormat::Json => "json",
                OutputFormat::Text => "text",
@ -718,7 +731,7 @@ async fn fetch_and_extract(cli: &Cli) -> Result<FetchOutput, String> {
 }

 /// Fetch raw HTML from a URL (no extraction). Used for --raw-html and brand extraction.
-async fn fetch_html(cli: &Cli) -> Result<FetchResult, String> {
+async fn fetch_html(cli: &Cli, resolved: &config::ResolvedConfig) -> Result<FetchResult, String> {
    if cli.stdin {
        let mut buf = String::new();
        io::stdin()
@ -751,8 +764,8 @@ async fn fetch_html(cli: &Cli) -> Result<FetchResult, String> {
        .ok_or("no input provided -- pass a URL, --file, or --stdin")?;
    let url = normalize_url(raw_url);

-    let client =
-        FetchClient::new(build_fetch_config(cli)).map_err(|e| format!("client error: {e}"))?;
+    let client = FetchClient::new(build_fetch_config(cli, resolved))
+        .map_err(|e| format!("client error: {e}"))?;
    client
        .fetch(&url)
        .await
@ -1166,7 +1179,7 @@ fn format_progress(page: &PageResult, index: usize, max_pages: usize) -> String
    )
 }

-async fn run_crawl(cli: &Cli) -> Result<(), String> {
+async fn run_crawl(cli: &Cli, resolved: &config::ResolvedConfig) -> Result<(), String> {
    let url = cli
        .urls
        .first()
@ -1178,16 +1191,8 @@ async fn run_crawl(cli: &Cli) -> Result<(), String> {
        return Err("--crawl cannot be used with --file or --stdin".into());
    }

-    let include_patterns: Vec<String> = cli
-        .include_paths
-        .as_deref()
-        .map(|s| s.split(',').map(|p| p.trim().to_string()).collect())
-        .unwrap_or_default();
-    let exclude_patterns: Vec<String> = cli
-        .exclude_paths
-        .as_deref()
-        .map(|s| s.split(',').map(|p| p.trim().to_string()).collect())
-        .unwrap_or_default();
+    let include_patterns = resolved.include_paths.clone();
+    let exclude_patterns = resolved.exclude_paths.clone();

    // Set up streaming progress channel
    let (progress_tx, mut progress_rx) = tokio::sync::broadcast::channel::<PageResult>(100);
@ -1207,13 +1212,13 @@ async fn run_crawl(cli: &Cli) -> Result<(), String> {
    }

    let config = CrawlConfig {
-        fetch: build_fetch_config(cli),
-        max_depth: cli.depth,
-        max_pages: cli.max_pages,
-        concurrency: cli.concurrency,
-        delay: std::time::Duration::from_millis(cli.delay),
-        path_prefix: cli.path_prefix.clone(),
-        use_sitemap: cli.sitemap,
+        fetch: build_fetch_config(cli, resolved),
+        max_depth: resolved.depth,
+        max_pages: resolved.max_pages,
+        concurrency: resolved.concurrency,
+        delay: std::time::Duration::from_millis(resolved.delay),
+        path_prefix: resolved.path_prefix.clone(),
+        use_sitemap: resolved.use_sitemap,
        include_patterns,
        exclude_patterns,
        progress_tx: Some(progress_tx),
@ -1232,7 +1237,7 @@ async fn run_crawl(cli: &Cli) -> Result<(), String> {
            );
        });

-    let max_pages = cli.max_pages;
+    let max_pages = resolved.max_pages;
    let completed_offset = resume_state.as_ref().map_or(0, |s| s.completed_pages);

    // Spawn background task to print streaming progress to stderr
@ -1261,8 +1266,8 @@ async fn run_crawl(cli: &Cli) -> Result<(), String> {
                &result.visited,
                &result.remaining_frontier,
                completed_offset + result.pages.len(),
-                cli.max_pages,
-                cli.depth,
+                resolved.max_pages,
+                resolved.depth,
            )?;
            eprintln!(
                "Crawl state saved to {} ({} pages completed). Resume with --crawl-state {}",
@ -1294,15 +1299,15 @@ async fn run_crawl(cli: &Cli) -> Result<(), String> {
        let mut saved = 0usize;
        for page in &result.pages {
            if let Some(ref extraction) = page.extraction {
-                let filename = url_to_filename(&page.url, &cli.format);
-                let content = format_output(extraction, &cli.format, cli.metadata);
+                let filename = url_to_filename(&page.url, &resolved.format);
+                let content = format_output(extraction, &resolved.format, resolved.metadata);
                write_to_file(dir, &filename, &content)?;
                saved += 1;
            }
        }
        eprintln!("Saved {saved} files to {}", dir.display());
    } else {
-        print_crawl_output(&result, &cli.format, cli.metadata);
+        print_crawl_output(&result, &resolved.format, resolved.metadata);
    }

    eprintln!(
@ -1338,7 +1343,7 @@ async fn run_crawl(cli: &Cli) -> Result<(), String> {
    }
 }

-async fn run_map(cli: &Cli) -> Result<(), String> {
+async fn run_map(cli: &Cli, resolved: &config::ResolvedConfig) -> Result<(), String> {
    let url = cli
        .urls
        .first()
@ -1346,8 +1351,8 @@ async fn run_map(cli: &Cli) -> Result<(), String> {
        .map(|u| normalize_url(u))?;
    let url = url.as_str();

-    let client =
-        FetchClient::new(build_fetch_config(cli)).map_err(|e| format!("client error: {e}"))?;
+    let client = FetchClient::new(build_fetch_config(cli, resolved))
+        .map_err(|e| format!("client error: {e}"))?;

    let entries = noxa_fetch::sitemap::discover(&client, url)
        .await
@ -1359,19 +1364,24 @@ async fn run_map(cli: &Cli) -> Result<(), String> {
        eprintln!("discovered {} URLs", entries.len());
    }

-    print_map_output(&entries, &cli.format);
+    print_map_output(&entries, &resolved.format);
    Ok(())
 }

-async fn run_batch(cli: &Cli, entries: &[(String, Option<String>)]) -> Result<(), String> {
+async fn run_batch(
+    cli: &Cli,
+    resolved: &config::ResolvedConfig,
+    entries: &[(String, Option<String>)],
+) -> Result<(), String> {
    let client = Arc::new(
-        FetchClient::new(build_fetch_config(cli)).map_err(|e| format!("client error: {e}"))?,
+        FetchClient::new(build_fetch_config(cli, resolved))
+            .map_err(|e| format!("client error: {e}"))?,
    );

    let urls: Vec<&str> = entries.iter().map(|(u, _)| u.as_str()).collect();
-    let options = build_extraction_options(cli);
+    let options = build_extraction_options(resolved);
    let results = client
-        .fetch_and_extract_batch_with_options(&urls, cli.concurrency, &options)
+        .fetch_and_extract_batch_with_options(&urls, resolved.concurrency, &options)
        .await;

    let ok = results.iter().filter(|r| r.result.is_ok()).count();
@ -1402,15 +1412,15 @@ async fn run_batch(cli: &Cli, entries: &[(String, Option<String>)]) -> Result<()
                let filename = custom_names
                    .get(r.url.as_str())
                    .map(|s| s.to_string())
-                    .unwrap_or_else(|| url_to_filename(&r.url, &cli.format));
-                let content = format_output(extraction, &cli.format, cli.metadata);
+                    .unwrap_or_else(|| url_to_filename(&r.url, &resolved.format));
+                let content = format_output(extraction, &resolved.format, resolved.metadata);
                write_to_file(dir, &filename, &content)?;
                saved += 1;
            }
        }
        eprintln!("Saved {saved} files to {}", dir.display());
    } else {
-        print_batch_output(&results, &cli.format, cli.metadata);
+        print_batch_output(&results, &resolved.format, resolved.metadata);
    }

    eprintln!(
@ -1514,15 +1524,20 @@ fn fire_webhook(url: &str, payload: &serde_json::Value) {
    });
 }

-async fn run_watch(cli: &Cli, urls: &[String]) -> Result<(), String> {
+async fn run_watch(
+    cli: &Cli,
+    resolved: &config::ResolvedConfig,
+    urls: &[String],
+) -> Result<(), String> {
    if urls.is_empty() {
        return Err("--watch requires at least one URL".into());
    }

    let client = Arc::new(
-        FetchClient::new(build_fetch_config(cli)).map_err(|e| format!("client error: {e}"))?,
+        FetchClient::new(build_fetch_config(cli, resolved))
+            .map_err(|e| format!("client error: {e}"))?,
    );
-    let options = build_extraction_options(cli);
+    let options = build_extraction_options(resolved);

    // Ctrl+C handler
    let cancelled = Arc::new(AtomicBool::new(false));
@ -1534,16 +1549,17 @@ async fn run_watch(cli: &Cli, urls: &[String]) -> Result<(), String> {

    // Single-URL mode: preserve original behavior exactly
    if urls.len() == 1 {
-        return run_watch_single(cli, &client, &options, &urls[0], &cancelled).await;
+        return run_watch_single(cli, resolved, &client, &options, &urls[0], &cancelled).await;
    }

    // Multi-URL mode: batch fetch, diff each, report aggregate
-    run_watch_multi(cli, &client, &options, urls, &cancelled).await
+    run_watch_multi(cli, resolved, &client, &options, urls, &cancelled).await
 }

 /// Original single-URL watch loop -- backward compatible.
 async fn run_watch_single(
    cli: &Cli,
+    resolved: &config::ResolvedConfig,
    client: &Arc<FetchClient>,
    options: &ExtractionOptions,
    url: &str,
@ -1580,7 +1596,7 @@ async fn run_watch_single(
        if diff.status == ChangeStatus::Same {
            eprintln!("[watch] No changes ({})", timestamp());
        } else {
-            print_diff_output(&diff, &cli.format);
+            print_diff_output(&diff, &resolved.format);
            eprintln!("[watch] Changes detected! ({})", timestamp());

            if let Some(ref cmd) = cli.on_change {
@ -1627,6 +1643,7 @@ async fn run_watch_single(
 /// Multi-URL watch loop -- batch fetch all URLs, diff each, report aggregate.
 async fn run_watch_multi(
    cli: &Cli,
+    resolved: &config::ResolvedConfig,
    client: &Arc<FetchClient>,
    options: &ExtractionOptions,
    urls: &[String],
@ -1636,7 +1653,7 @@ async fn run_watch_multi(

    // Initial pass: fetch all URLs in parallel
    let initial_results = client
-        .fetch_and_extract_batch_with_options(&url_refs, cli.concurrency, options)
+        .fetch_and_extract_batch_with_options(&url_refs, resolved.concurrency, options)
        .await;

    let mut snapshots = std::collections::HashMap::new();
@ -1676,7 +1693,7 @@ async fn run_watch_multi(
        check_number += 1;

        let current_results = client
-            .fetch_and_extract_batch_with_options(&url_refs, cli.concurrency, options)
+            .fetch_and_extract_batch_with_options(&url_refs, resolved.concurrency, options)
            .await;

        let mut changed: Vec<serde_json::Value> = Vec::new();
@ -1780,7 +1797,11 @@ async fn run_watch_multi(
    Ok(())
 }

-async fn run_diff(cli: &Cli, snapshot_path: &str) -> Result<(), String> {
+async fn run_diff(
+    cli: &Cli,
+    resolved: &config::ResolvedConfig,
+    snapshot_path: &str,
+) -> Result<(), String> {
    // Load previous snapshot
    let snapshot_json = std::fs::read_to_string(snapshot_path)
        .map_err(|e| format!("failed to read snapshot {snapshot_path}: {e}"))?;
@ -1788,16 +1809,16 @@ async fn run_diff(cli: &Cli, snapshot_path: &str) -> Result<(), String> {
        .map_err(|e| format!("failed to parse snapshot JSON: {e}"))?;

    // Extract current version (handles PDF detection for URLs)
-    let new_result = fetch_and_extract(cli).await?.into_extraction()?;
+    let new_result = fetch_and_extract(cli, resolved).await?.into_extraction()?;

    let diff = noxa_core::diff::diff(&old, &new_result);
-    print_diff_output(&diff, &cli.format);
+    print_diff_output(&diff, &resolved.format);

    Ok(())
 }

-async fn run_brand(cli: &Cli) -> Result<(), String> {
-    let result = fetch_html(cli).await?;
+async fn run_brand(cli: &Cli, resolved: &config::ResolvedConfig) -> Result<(), String> {
+    let result = fetch_html(cli, resolved).await?;
    let enriched = enrich_html_with_stylesheets(&result.html, &result.url).await;
    let brand = noxa_core::brand::extract_brand(
        &enriched,
@ -1811,13 +1832,27 @@ async fn run_brand(cli: &Cli) -> Result<(), String> {
 }

 /// Build an LLM provider based on CLI flags, or fall back to the default chain.
-async fn build_llm_provider(cli: &Cli) -> Result<Box<dyn LlmProvider>, String> {
-    if let Some(ref name) = cli.llm_provider {
+async fn build_llm_provider(
+    cli: &Cli,
+    resolved: &config::ResolvedConfig,
+) -> Result<Box<dyn LlmProvider>, String> {
+    if let Some(ref name) = resolved.llm_provider {
        match name.as_str() {
+            "gemini" => {
+                let provider = noxa_llm::providers::gemini_cli::GeminiCliProvider::new(
+                    resolved.llm_model.clone(),
+                );
+                if !provider.is_available().await {
+                    return Err(
+                        "gemini CLI not found on PATH -- install it or omit --llm-provider".into(),
+                    );
+                }
+                Ok(Box::new(provider))
+            }
            "ollama" => {
                let provider = noxa_llm::providers::ollama::OllamaProvider::new(
                    cli.llm_base_url.clone(),
-                    cli.llm_model.clone(),
+                    resolved.llm_model.clone(),
                );
                if !provider.is_available().await {
                    return Err("ollama is not running or unreachable".into());
@ -1828,7 +1863,7 @@ async fn build_llm_provider(cli: &Cli) -> Result<Box<dyn LlmProvider>, String> {
                let provider = noxa_llm::providers::openai::OpenAiProvider::new(
                    None,
                    cli.llm_base_url.clone(),
-                    cli.llm_model.clone(),
+                    resolved.llm_model.clone(),
                )
                .ok_or("OPENAI_API_KEY not set")?;
                Ok(Box::new(provider))
@ -1836,20 +1871,20 @@ async fn build_llm_provider(cli: &Cli) -> Result<Box<dyn LlmProvider>, String> {
            "anthropic" => {
                let provider = noxa_llm::providers::anthropic::AnthropicProvider::new(
                    None,
-                    cli.llm_model.clone(),
+                    resolved.llm_model.clone(),
                )
                .ok_or("ANTHROPIC_API_KEY not set")?;
                Ok(Box::new(provider))
            }
            other => Err(format!(
-                "unknown LLM provider: {other} (use ollama, openai, or anthropic)"
+                "unknown LLM provider: {other} (use gemini, ollama, openai, or anthropic)"
            )),
        }
    } else {
        let chain = noxa_llm::ProviderChain::default().await;
        if chain.is_empty() {
            return Err(
-                "no LLM providers available -- start Ollama or set OPENAI_API_KEY / ANTHROPIC_API_KEY"
+                "no LLM providers available -- install the gemini CLI, start Ollama, or set OPENAI_API_KEY / ANTHROPIC_API_KEY"
                    .into(),
            );
        }
@ -1857,12 +1892,12 @@ async fn build_llm_provider(cli: &Cli) -> Result<Box<dyn LlmProvider>, String> {
    }
 }

-async fn run_llm(cli: &Cli) -> Result<(), String> {
+async fn run_llm(cli: &Cli, resolved: &config::ResolvedConfig) -> Result<(), String> {
    // Extract content from source first (handles PDF detection for URLs)
-    let result = fetch_and_extract(cli).await?.into_extraction()?;
+    let result = fetch_and_extract(cli, resolved).await?.into_extraction()?;

-    let provider = build_llm_provider(cli).await?;
-    let model = cli.llm_model.as_deref();
+    let provider = build_llm_provider(cli, resolved).await?;
+    let model = resolved.llm_model.as_deref();

    if let Some(ref schema_input) = cli.extract_json {
        // Support @file syntax for loading schema from file
@ -1876,6 +1911,7 @@ async fn run_llm(cli: &Cli) -> Result<(), String> {
        let schema: serde_json::Value =
            serde_json::from_str(&schema_str).map_err(|e| format!("invalid JSON schema: {e}"))?;

+        let t = std::time::Instant::now();
        let extracted = noxa_llm::extract::extract_json(
            &result.content.plain_text,
            &schema,
@ -1884,12 +1920,14 @@ async fn run_llm(cli: &Cli) -> Result<(), String> {
        )
        .await
        .map_err(|e| format!("LLM extraction failed: {e}"))?;
+        eprintln!("LLM: {:.1}s", t.elapsed().as_secs_f64());

        println!(
            "{}",
            serde_json::to_string_pretty(&extracted).expect("serialization failed")
        );
    } else if let Some(ref prompt) = cli.extract_prompt {
+        let t = std::time::Instant::now();
        let extracted = noxa_llm::extract::extract_with_prompt(
            &result.content.plain_text,
            prompt,
@ -1898,12 +1936,14 @@ async fn run_llm(cli: &Cli) -> Result<(), String> {
        )
        .await
        .map_err(|e| format!("LLM extraction failed: {e}"))?;
+        eprintln!("LLM: {:.1}s", t.elapsed().as_secs_f64());

        println!(
            "{}",
            serde_json::to_string_pretty(&extracted).expect("serialization failed")
        );
    } else if let Some(sentences) = cli.summarize {
+        let t = std::time::Instant::now();
        let summary = noxa_llm::summarize::summarize(
            &result.content.plain_text,
            Some(sentences),
@ -1912,6 +1952,7 @@ async fn run_llm(cli: &Cli) -> Result<(), String> {
        )
        .await
        .map_err(|e| format!("LLM summarization failed: {e}"))?;
+        eprintln!("LLM: {:.1}s", t.elapsed().as_secs_f64());

        println!("{summary}");
    }
@ -1921,12 +1962,16 @@ async fn run_llm(cli: &Cli) -> Result<(), String> {

 /// Batch LLM extraction: fetch each URL, run LLM on extracted content, save/print results.
 /// URLs are processed sequentially to respect LLM provider rate limits.
-async fn run_batch_llm(cli: &Cli, entries: &[(String, Option<String>)]) -> Result<(), String> {
-    let client =
-        FetchClient::new(build_fetch_config(cli)).map_err(|e| format!("client error: {e}"))?;
-    let options = build_extraction_options(cli);
-    let provider = build_llm_provider(cli).await?;
-    let model = cli.llm_model.as_deref();
+async fn run_batch_llm(
+    cli: &Cli,
+    resolved: &config::ResolvedConfig,
+    entries: &[(String, Option<String>)],
+) -> Result<(), String> {
+    let client = FetchClient::new(build_fetch_config(cli, resolved))
+        .map_err(|e| format!("client error: {e}"))?;
+    let options = build_extraction_options(resolved);
+    let provider = build_llm_provider(cli, resolved).await?;
+    let model = resolved.llm_model.as_deref();

    // Pre-parse schema once if --extract-json is used
    let schema = if let Some(ref schema_input) = cli.extract_json {
@ -1974,6 +2019,7 @@ async fn run_batch_llm(cli: &Cli, entries: &[(String, Option<String>)]) -> Resul
        let text = &extraction.content.plain_text;

        // Run the appropriate LLM operation
+        let llm_start = std::time::Instant::now();
        let llm_result = if let Some(ref schema) = schema {
            noxa_llm::extract::extract_json(text, schema, provider.as_ref(), model)
                .await
@ -1989,6 +2035,7 @@ async fn run_batch_llm(cli: &Cli, entries: &[(String, Option<String>)]) -> Resul
        } else {
            unreachable!("run_batch_llm called without LLM flags")
        };
+        let llm_elapsed = llm_start.elapsed();

        match llm_result {
            Ok(output) => {
@ -2018,7 +2065,7 @@ async fn run_batch_llm(cli: &Cli, entries: &[(String, Option<String>)]) -> Resul
                        format!("{words} words")
                    }
                };
-                eprintln!("-> extracted {detail}");
+                eprintln!("-> extracted {detail} ({:.1}s)", llm_elapsed.as_secs_f64());

                if let Some(ref dir) = cli.output_dir {
                    let filename = custom_names
@ -2215,12 +2262,29 @@ async fn run_research(cli: &Cli, query: &str) -> Result<(), String> {
 async fn main() {
    dotenvy::dotenv().ok();

-    let cli = Cli::parse();
-    init_logging(cli.verbose);
+    if matches!(std::env::args().nth(1).as_deref(), Some("mcp")) {
+        init_mcp_logging();
+
+        if let Err(e) = noxa_mcp::run().await {
+            eprintln!("error: {e}");
+            process::exit(1);
+        }
+        return;
+    }
+
+    // Use low-level API to get both typed Cli and ArgMatches for ValueSource detection.
+    let matches = Cli::command().get_matches();
+    let cli = Cli::from_arg_matches(&matches).unwrap_or_else(|e| e.exit());
+
+    // Load config BEFORE init_logging so verbose from config takes effect.
+    let cfg = config::NoxaConfig::load(cli.config.as_deref());
+    let resolved = config::resolve(&cli, &matches, &cfg);
+
+    init_logging(resolved.verbose);

    // --map: sitemap discovery mode
    if cli.map {
-        if let Err(e) = run_map(&cli).await {
+        if let Err(e) = run_map(&cli, &resolved).await {
            eprintln!("error: {e}");
            process::exit(1);
        }
@ -2229,7 +2293,7 @@ async fn main() {

    // --crawl: recursive crawl mode
    if cli.crawl {
-        if let Err(e) = run_crawl(&cli).await {
+        if let Err(e) = run_crawl(&cli, &resolved).await {
            eprintln!("error: {e}");
            process::exit(1);
        }
@ -2245,7 +2309,7 @@ async fn main() {
                process::exit(1);
            }
        };
-        if let Err(e) = run_watch(&cli, &watch_urls).await {
+        if let Err(e) = run_watch(&cli, &resolved, &watch_urls).await {
            eprintln!("error: {e}");
            process::exit(1);
        }
@ -2254,7 +2318,7 @@ async fn main() {

    // --diff-with: change tracking mode
    if let Some(ref snapshot_path) = cli.diff_with {
-        if let Err(e) = run_diff(&cli, snapshot_path).await {
+        if let Err(e) = run_diff(&cli, &resolved, snapshot_path).await {
            eprintln!("error: {e}");
            process::exit(1);
        }
@ -2263,7 +2327,7 @@ async fn main() {

    // --brand: brand identity extraction mode
    if cli.brand {
-        if let Err(e) = run_brand(&cli).await {
+        if let Err(e) = run_brand(&cli, &resolved).await {
            eprintln!("error: {e}");
            process::exit(1);
        }
@ -2292,11 +2356,11 @@ async fn main() {
    // When multiple URLs are provided, run batch LLM extraction over all of them.
    if has_llm_flags(&cli) {
        if entries.len() > 1 {
-            if let Err(e) = run_batch_llm(&cli, &entries).await {
+            if let Err(e) = run_batch_llm(&cli, &resolved, &entries).await {
                eprintln!("error: {e}");
                process::exit(1);
            }
-        } else if let Err(e) = run_llm(&cli).await {
+        } else if let Err(e) = run_llm(&cli, &resolved).await {
            eprintln!("error: {e}");
            process::exit(1);
        }
@ -2305,7 +2369,7 @@ async fn main() {

    // Multi-URL batch mode
    if entries.len() > 1 {
-        if let Err(e) = run_batch(&cli, &entries).await {
+        if let Err(e) = run_batch(&cli, &resolved, &entries).await {
            eprintln!("error: {e}");
            process::exit(1);
        }
@ -2313,8 +2377,11 @@ async fn main() {
    }

    // --raw-html: skip extraction, dump the fetched HTML
-    if cli.raw_html && cli.include.is_none() && cli.exclude.is_none() {
-        match fetch_html(&cli).await {
+    if resolved.raw_html
+        && resolved.include_selectors.is_empty()
+        && resolved.exclude_selectors.is_empty()
+    {
+        match fetch_html(&cli, &resolved).await {
            Ok(r) => println!("{}", r.html),
            Err(e) => {
                eprintln!("error: {e}");
@ -2325,7 +2392,7 @@ async fn main() {
    }

    // Single-page extraction (handles both HTML and PDF via content-type detection)
-    match fetch_and_extract(&cli).await {
+    match fetch_and_extract(&cli, &resolved).await {
        Ok(FetchOutput::Local(result)) => {
            if let Some(ref dir) = cli.output_dir {
                let url = cli
@ -2334,18 +2401,19 @@ async fn main() {
                    .map(|u| normalize_url(u))
                    .unwrap_or_default();
                let custom_name = entries.first().and_then(|(_, name)| name.clone());
-                let filename = custom_name.unwrap_or_else(|| url_to_filename(&url, &cli.format));
-                let content = format_output(&result, &cli.format, cli.metadata);
+                let filename =
+                    custom_name.unwrap_or_else(|| url_to_filename(&url, &resolved.format));
+                let content = format_output(&result, &resolved.format, resolved.metadata);
                if let Err(e) = write_to_file(dir, &filename, &content) {
                    eprintln!("error: {e}");
                    process::exit(1);
                }
            } else {
-                print_output(&result, &cli.format, cli.metadata);
+                print_output(&result, &resolved.format, resolved.metadata);
            }
        }
        Ok(FetchOutput::Cloud(resp)) => {
-            print_cloud_output(&resp, &cli.format);
+            print_cloud_output(&resp, &resolved.format);
        }
        Err(e) => {
            eprintln!("{e}");
@ -2456,3 +2524,28 @@ mod tests {
        let _ = std::fs::remove_dir_all(&dir);
    }
 }
+
+#[cfg(test)]
+mod enum_deserialize_tests {
+    use super::*;
+
+    #[test]
+    fn test_output_format_deserialize() {
+        let f: OutputFormat = serde_json::from_str("\"llm\"").unwrap();
+        assert!(matches!(f, OutputFormat::Llm));
+        let f: OutputFormat = serde_json::from_str("\"markdown\"").unwrap();
+        assert!(matches!(f, OutputFormat::Markdown));
+    }
+
+    #[test]
+    fn test_browser_deserialize() {
+        let b: Browser = serde_json::from_str("\"firefox\"").unwrap();
+        assert!(matches!(b, Browser::Firefox));
+    }
+
+    #[test]
+    fn test_pdf_mode_deserialize() {
+        let p: PdfModeArg = serde_json::from_str("\"fast\"").unwrap();
+        assert!(matches!(p, PdfModeArg::Fast));
+    }
+}
--- a/crates/noxa-llm/Cargo.toml
+++ b/crates/noxa-llm/Cargo.toml
@ -8,6 +8,7 @@ license.workspace = true
 [dependencies]
 reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
 async-trait = "0.1"
+jsonschema = { version = "0.46", default-features = false }
 serde = { workspace = true }
 serde_json = { workspace = true }
 tokio = { workspace = true }
--- a/crates/noxa-llm/src/chain.rs
+++ b/crates/noxa-llm/src/chain.rs
@ -2,12 +2,15 @@
 /// Default order: Ollama (local, free) -> OpenAI -> Anthropic.
 /// Only includes providers that are actually configured/available.
 use async_trait::async_trait;
-use tracing::{debug, warn};
+use tracing::{debug, info, warn};

 use crate::error::LlmError;
 use crate::provider::{CompletionRequest, LlmProvider};
 use crate::providers::{
-    anthropic::AnthropicProvider, ollama::OllamaProvider, openai::OpenAiProvider,
+    anthropic::AnthropicProvider,
+    gemini_cli::GeminiCliProvider,
+    ollama::OllamaProvider,
+    openai::OpenAiProvider,
 };

 pub struct ProviderChain {
@ -15,12 +18,26 @@ pub struct ProviderChain {
 }

 impl ProviderChain {
-    /// Build the default chain: Ollama -> OpenAI -> Anthropic.
-    /// Ollama is always added (availability checked at call time).
+    /// Build the default chain: Gemini CLI -> OpenAI -> Ollama -> Anthropic.
+    /// Gemini CLI is the primary backend (subprocess-based, requires `gemini` on PATH).
    /// Cloud providers are only added if their API keys are configured.
+    /// Ollama is added if reachable at call time.
    pub async fn default() -> Self {
        let mut providers: Vec<Box<dyn LlmProvider>> = Vec::new();

+        let gemini = GeminiCliProvider::new(None);
+        if gemini.is_available().await {
+            debug!("gemini cli available, adding as primary provider");
+            providers.push(Box::new(gemini));
+        } else {
+            debug!("gemini cli not found on PATH, skipping");
+        }
+
+        if let Some(openai) = OpenAiProvider::new(None, None, None) {
+            debug!("openai configured, adding to chain");
+            providers.push(Box::new(openai));
+        }
+
        let ollama = OllamaProvider::new(None, None);
        if ollama.is_available().await {
            debug!("ollama is available, adding to chain");
@ -29,11 +46,6 @@ impl ProviderChain {
            debug!("ollama not available, skipping");
        }

-        if let Some(openai) = OpenAiProvider::new(None, None, None) {
-            debug!("openai configured, adding to chain");
-            providers.push(Box::new(openai));
-        }
-
        if let Some(anthropic) = AnthropicProvider::new(None, None) {
            debug!("anthropic configured, adding to chain");
            providers.push(Box::new(anthropic));
@ -79,9 +91,10 @@ impl LlmProvider for ProviderChain {
        for provider in &self.providers {
            debug!(provider = provider.name(), "attempting completion");

+            let t = std::time::Instant::now();
            match provider.complete(request).await {
                Ok(response) => {
-                    debug!(provider = provider.name(), "completion succeeded");
+                    info!(provider = provider.name(), elapsed_ms = t.elapsed().as_millis(), "completion succeeded");
                    return Ok(response);
                }
                Err(e) => {
@ -202,4 +215,46 @@ mod tests {
        assert_eq!(chain.len(), 2);
        assert!(!chain.is_empty());
    }
+
+    // ── Gemini-first chain ordering ───────────────────────────────────────────
+
+    #[tokio::test]
+    async fn gemini_first_in_single_provider_chain() {
+        // When we build a chain with a mock "gemini" provider first, it should
+        // be used before any fallback.
+        let chain = ProviderChain::from_providers(vec![
+            Box::new(MockProvider {
+                name: "gemini",
+                response: Ok("from gemini".into()),
+                available: true,
+            }),
+            Box::new(MockProvider {
+                name: "openai",
+                response: Ok("from openai".into()),
+                available: true,
+            }),
+        ]);
+        let result = chain.complete(&test_request()).await.unwrap();
+        assert_eq!(result, "from gemini");
+        // Confirm order: first provider name is "gemini"
+        assert_eq!(chain.providers[0].name(), "gemini");
+    }
+
+    #[tokio::test]
+    async fn gemini_failure_falls_back_to_openai() {
+        let chain = ProviderChain::from_providers(vec![
+            Box::new(MockProvider {
+                name: "gemini",
+                response: Err("subprocess timed out".into()),
+                available: true,
+            }),
+            Box::new(MockProvider {
+                name: "openai",
+                response: Ok("from openai".into()),
+                available: true,
+            }),
+        ]);
+        let result = chain.complete(&test_request()).await.unwrap();
+        assert_eq!(result, "from openai");
+    }
 }
--- a/crates/noxa-llm/src/error.rs
+++ b/crates/noxa-llm/src/error.rs
@ -4,6 +4,12 @@ pub enum LlmError {
    #[error("HTTP error: {0}")]
    Http(#[from] reqwest::Error),

+    #[error("subprocess error: {0}")]
+    Subprocess(#[from] std::io::Error),
+
+    #[error("subprocess timed out")]
+    Timeout,
+
    #[error("no providers available")]
    NoProviders,

--- a/crates/noxa-llm/src/extract.rs
+++ b/crates/noxa-llm/src/extract.rs
@ -1,11 +1,45 @@
 /// Schema-based and prompt-based LLM extraction.
 /// Both functions build a system prompt, send content to the LLM, and parse JSON back.
+use jsonschema;
+
 use crate::clean::strip_thinking_tags;
 use crate::error::LlmError;
 use crate::provider::{CompletionRequest, LlmProvider, Message};

+/// Validate a JSON value against a schema. Returns Ok(()) on success or
+/// Err(LlmError::InvalidJson) with a concise error message on failure.
+fn validate_schema(
+    value: &serde_json::Value,
+    schema: &serde_json::Value,
+) -> Result<(), LlmError> {
+    let compiled = jsonschema::validator_for(schema).map_err(|e| {
+        LlmError::InvalidJson(format!("invalid schema: {e}"))
+    })?;
+
+    let errors: Vec<String> = compiled
+        .iter_errors(value)
+        .map(|e| format!("{} at {}", e, e.instance_path()))
+        .collect();
+
+    if errors.is_empty() {
+        Ok(())
+    } else {
+        Err(LlmError::InvalidJson(format!(
+            "schema validation failed: {}",
+            errors.join("; ")
+        )))
+    }
+}
+
 /// Extract structured JSON from content using a JSON schema.
 /// The schema tells the LLM exactly what fields to extract and their types.
+///
+/// Retry policy:
+/// - If the response cannot be parsed as JSON at all: retry once with the
+///   identical request (handles transient formatting issues).
+/// - If the response is valid JSON but fails schema validation: return
+///   `LlmError::InvalidJson` immediately — the schema is likely unsatisfiable
+///   for this content, so retrying would produce the same result.
 pub async fn extract_json(
    content: &str,
    schema: &serde_json::Value,
@ -37,7 +71,22 @@ pub async fn extract_json(
    };

    let response = provider.complete(&request).await?;
-    parse_json_response(&response)
+
+    match parse_json_response(&response) {
+        Ok(value) => {
+            // Valid JSON — now validate against the schema.
+            // Schema mismatches do not retry (unsatisfiable → same result).
+            validate_schema(&value, schema)?;
+            Ok(value)
+        }
+        Err(_parse_err) => {
+            // Unparseable JSON — retry once with the identical request.
+            let retry_response = provider.complete(&request).await?;
+            let value = parse_json_response(&retry_response)?;
+            validate_schema(&value, schema)?;
+            Ok(value)
+        }
+    }
 }

 /// Extract information using a natural language prompt.
@ -184,4 +233,130 @@ mod tests {

        assert_eq!(result["emails"][0], "test@example.com");
    }
+
+    // ── Schema validation ─────────────────────────────────────────────────────
+
+    #[tokio::test]
+    async fn schema_validation_passes_for_matching_json() {
+        let schema = serde_json::json!({
+            "type": "object",
+            "required": ["price"],
+            "properties": {
+                "price": { "type": "number" }
+            }
+        });
+        let mock = MockProvider::ok(r#"{"price": 9.99}"#);
+        let result = extract_json("content", &schema, &mock, None).await.unwrap();
+        assert_eq!(result["price"], 9.99);
+    }
+
+    #[tokio::test]
+    async fn schema_validation_fails_for_wrong_type() {
+        let schema = serde_json::json!({
+            "type": "object",
+            "required": ["price"],
+            "properties": {
+                "price": { "type": "number" }
+            }
+        });
+        // Model returns valid JSON but wrong type ("string" instead of number).
+        // Should NOT retry (schema mismatch ≠ parse failure) — returns InvalidJson immediately.
+        let mock = MockProvider::ok(r#"{"price": "not-a-number"}"#);
+        let result = extract_json("content", &schema, &mock, None).await;
+        assert!(
+            matches!(result, Err(LlmError::InvalidJson(_))),
+            "expected InvalidJson for schema mismatch, got {result:?}"
+        );
+    }
+
+    #[tokio::test]
+    async fn schema_validation_fails_for_missing_required_field() {
+        let schema = serde_json::json!({
+            "type": "object",
+            "required": ["title"],
+            "properties": {
+                "title": { "type": "string" }
+            }
+        });
+        let mock = MockProvider::ok(r#"{"other": "value"}"#);
+        let result = extract_json("content", &schema, &mock, None).await;
+        assert!(matches!(result, Err(LlmError::InvalidJson(_))));
+    }
+
+    #[tokio::test]
+    async fn parse_failure_triggers_one_retry() {
+        use crate::testing::mock::SequenceMockProvider;
+
+        let schema = serde_json::json!({
+            "type": "object",
+            "properties": { "title": { "type": "string" } }
+        });
+
+        // First call: unparseable JSON. Second call: valid JSON matching schema.
+        let mock = SequenceMockProvider::new(
+            "mock-seq",
+            vec![
+                Ok("this is not json at all".to_string()),
+                Ok(r#"{"title": "Retry succeeded"}"#.to_string()),
+            ],
+        );
+
+        let result = extract_json("content", &schema, &mock, None)
+            .await
+            .unwrap();
+        assert_eq!(result["title"], "Retry succeeded");
+    }
+
+    #[tokio::test]
+    async fn both_attempts_fail_returns_invalid_json() {
+        use crate::testing::mock::SequenceMockProvider;
+
+        let schema = serde_json::json!({
+            "type": "object",
+            "properties": { "title": { "type": "string" } }
+        });
+
+        let mock = SequenceMockProvider::new(
+            "mock-seq",
+            vec![
+                Ok("not json".to_string()),
+                Ok("also not json".to_string()),
+            ],
+        );
+
+        let result = extract_json("content", &schema, &mock, None).await;
+        assert!(
+            matches!(result, Err(LlmError::InvalidJson(_))),
+            "expected InvalidJson after both attempts fail"
+        );
+    }
+
+    #[tokio::test]
+    async fn schema_mismatch_does_not_retry() {
+        use crate::testing::mock::SequenceMockProvider;
+
+        let schema = serde_json::json!({
+            "type": "object",
+            "required": ["price"],
+            "properties": {
+                "price": { "type": "number" }
+            }
+        });
+
+        // Both calls return valid JSON with wrong schema — but only one call should happen.
+        let mock = SequenceMockProvider::new(
+            "mock-seq",
+            vec![
+                Ok(r#"{"price": "wrong-type"}"#.to_string()),
+                Ok(r#"{"price": 9.99}"#.to_string()), // would succeed — but shouldn't be called
+            ],
+        );
+
+        // Should return InvalidJson without calling second response.
+        let result = extract_json("content", &schema, &mock, None).await;
+        assert!(
+            matches!(result, Err(LlmError::InvalidJson(_))),
+            "schema mismatch should not trigger retry"
+        );
+    }
 }
--- a/crates/noxa-llm/src/lib.rs
+++ b/crates/noxa-llm/src/lib.rs
@ -1,8 +1,9 @@
-/// noxa-llm: LLM integration with local-first hybrid architecture.
+/// noxa-llm: LLM integration with Gemini-CLI-first hybrid architecture.
 ///
-/// Provider chain tries Ollama (local) first, falls back to OpenAI, then Anthropic.
-/// Provides schema-based extraction, prompt extraction, and summarization
-/// on top of noxa-core's content pipeline.
+/// Provider chain: Gemini CLI (primary) → OpenAI → Ollama → Anthropic.
+/// Gemini CLI requires the `gemini` binary on PATH; GEMINI_MODEL env var sets the model.
+/// Provides schema-validated extraction (with one retry on parse failure),
+/// prompt extraction, and summarization on top of noxa-core's content pipeline.
 pub mod chain;
 pub mod clean;
 pub mod error;
--- a/crates/noxa-llm/src/providers/gemini_cli.rs
+++ b/crates/noxa-llm/src/providers/gemini_cli.rs
@ -0,0 +1,392 @@
+/// Gemini CLI provider — shells out to `gemini -p` for completions.
+/// Primary provider in the default chain; requires the `gemini` binary on PATH.
+///
+/// Prompts are passed via the `-p` flag (not via stdin or as a positional) to prevent
+/// command injection from web-scraped content. Output is parsed from `--output-format json`.
+///
+/// # Startup optimizations
+///
+/// The gemini CLI is an agentic Node.js application that connects to every configured MCP
+/// server at startup (the user has 6). Without mitigation this can add 10-60+ seconds per
+/// call as those servers spin up and time out.
+///
+/// Two flags reduce this:
+/// - `--extensions ""` — skips extension loading (~3 s saved)
+/// - `current_dir` set to a temp workdir containing `.gemini/settings.json` with
+///   `{"mcpServers":{}}` — workspace settings override user settings, so all 6 MCP
+///   servers are skipped at subprocess startup (major speedup).
+///
+/// The workdir is created once at construction and reused for every call.
+use std::path::PathBuf;
+use std::sync::Arc;
+use std::time::Duration;
+
+use async_trait::async_trait;
+use tokio::process::Command;
+use tokio::sync::Semaphore;
+use tokio::time::timeout;
+use tracing::debug;
+
+use crate::clean::strip_thinking_tags;
+use crate::error::LlmError;
+use crate::provider::{CompletionRequest, LlmProvider};
+
+/// Maximum concurrent Gemini subprocess calls.
+const MAX_CONCURRENT: usize = 6;
+/// Subprocess deadline — prevents hung `gemini` processes blocking the chain.
+const SUBPROCESS_TIMEOUT: Duration = Duration::from_secs(60);
+
+/// Fixed workdir used for every subprocess call.
+/// A workspace-level `.gemini/settings.json` here overrides the user's MCP server config.
+const NOXA_GEMINI_WORKDIR: &str = "/tmp/noxa-gemini";
+
+pub struct GeminiCliProvider {
+    default_model: String,
+    semaphore: Arc<Semaphore>,
+    /// Workdir with a minimal `.gemini/settings.json` that disables MCP servers.
+    workdir: PathBuf,
+}
+
+impl GeminiCliProvider {
+    /// Construct the provider.
+    /// Model resolves as: `model` arg → `GEMINI_MODEL` env → `"gemini-2.5-pro"`.
+    pub fn new(model: Option<String>) -> Self {
+        let default_model = model
+            .or_else(|| std::env::var("GEMINI_MODEL").ok())
+            .filter(|s| !s.is_empty())
+            .unwrap_or_else(|| "gemini-2.5-pro".into());
+
+        let workdir = PathBuf::from(NOXA_GEMINI_WORKDIR);
+        ensure_gemini_workdir(&workdir);
+
+        Self {
+            default_model,
+            semaphore: Arc::new(Semaphore::new(MAX_CONCURRENT)),
+            workdir,
+        }
+    }
+
+    #[cfg(test)]
+    fn default_model(&self) -> &str {
+        &self.default_model
+    }
+}
+
+#[async_trait]
+impl LlmProvider for GeminiCliProvider {
+    async fn complete(&self, request: &CompletionRequest) -> Result<String, LlmError> {
+        let model = if request.model.is_empty() {
+            &self.default_model
+        } else {
+            &request.model
+        };
+
+        // Build the prompt text from all messages.
+        let prompt = build_prompt(&request.messages);
+
+        // Acquire concurrency slot before spawning.
+        let _permit = self
+            .semaphore
+            .acquire()
+            .await
+            .map_err(|_| LlmError::ProviderError("gemini semaphore closed".into()))?;
+
+        let mut cmd = Command::new("gemini");
+        // -p STRING: headless mode with prompt as the flag value (never positional arg).
+        // Passing via -p prevents command injection; the value is never interpreted as a shell command.
+        cmd.arg("-p").arg(&prompt);
+        cmd.arg("--model").arg(model);
+        // Always request structured JSON output so we can extract the `response` field
+        // and skip any preceding noise lines (e.g. MCP status warnings).
+        cmd.arg("--output-format").arg("json");
+        // --yolo suppresses any interactive confirmation prompts in headless mode.
+        cmd.arg("--yolo");
+        // --extensions "" skips loading user extensions (~3 s startup savings).
+        cmd.arg("--extensions").arg("");
+        // Workspace settings in self.workdir override the user's ~/.gemini/settings.json,
+        // replacing the user's MCP server list with {} so none are spawned at startup.
+        // Without this, each of the user's MCP servers adds latency to every call.
+        cmd.current_dir(&self.workdir);
+
+        cmd.stdin(std::process::Stdio::null());
+        cmd.stdout(std::process::Stdio::piped());
+        cmd.stderr(std::process::Stdio::piped());
+
+        debug!(model, workdir = %self.workdir.display(), "spawning gemini subprocess");
+
+        let child = cmd.spawn().map_err(LlmError::Subprocess)?;
+
+        // Bounded wait — prevents indefinite hangs on auth expiry or network stall.
+        let output = match timeout(SUBPROCESS_TIMEOUT, child.wait_with_output()).await {
+            Ok(Ok(out)) => out,
+            Ok(Err(e)) => return Err(LlmError::Subprocess(e)),
+            Err(_elapsed) => return Err(LlmError::Timeout),
+        };
+
+        if !output.status.success() {
+            let stderr_preview = String::from_utf8_lossy(&output.stderr);
+            let preview = &stderr_preview[..stderr_preview.len().min(500)];
+            return Err(LlmError::ProviderError(format!(
+                "gemini exited with {}: {preview}",
+                output.status
+            )));
+        }
+
+        let stdout = String::from_utf8_lossy(&output.stdout);
+        let response = extract_response_from_output(&stdout)?;
+        let cleaned = strip_code_fences(strip_thinking_tags(&response).trim());
+        Ok(cleaned)
+    }
+
+    async fn is_available(&self) -> bool {
+        // Pure PATH check — no inference call, fast.
+        matches!(
+            Command::new("gemini")
+                .arg("--version")
+                .stdout(std::process::Stdio::null())
+                .stderr(std::process::Stdio::null())
+                .status()
+                .await,
+            Ok(s) if s.success()
+        )
+    }
+
+    fn name(&self) -> &str {
+        "gemini"
+    }
+}
+
+/// Parse the `response` field from gemini's `--output-format json` output.
+///
+/// The CLI emits lines before the JSON object (e.g. MCP status warnings).
+/// We find the first `{` to locate the JSON, parse it, and extract `.response`.
+fn extract_response_from_output(stdout: &str) -> Result<String, LlmError> {
+    let json_start = stdout.find('{').ok_or_else(|| {
+        let preview = &stdout[..stdout.len().min(300)];
+        LlmError::ProviderError(format!("gemini produced no JSON output: {preview}"))
+    })?;
+
+    let json_str = &stdout[json_start..];
+    let outer: serde_json::Value = serde_json::from_str(json_str).map_err(|e| {
+        let preview = &json_str[..json_str.len().min(300)];
+        LlmError::ProviderError(format!("failed to parse gemini JSON output: {e} — {preview}"))
+    })?;
+
+    // `response` holds the model's actual text output.
+    outer["response"]
+        .as_str()
+        .ok_or_else(|| {
+            LlmError::ProviderError(format!(
+                "gemini JSON output missing 'response' field: {}",
+                &json_str[..json_str.len().min(300)]
+            ))
+        })
+        .map(|s| s.to_string())
+}
+
+/// Create the noxa gemini workdir with a minimal workspace settings file.
+///
+/// The `.gemini/settings.json` written here overrides the user's `~/.gemini/settings.json`
+/// for any `gemini` subprocess run from this directory. Setting `mcpServers` to `{}` prevents
+/// the CLI from spawning the user's configured MCP servers on every headless call.
+///
+/// Errors are intentionally ignored — if the write fails, the subprocess still works,
+/// just without the startup optimization (and with a warning in the logs).
+fn ensure_gemini_workdir(workdir: &std::path::Path) {
+    let settings_dir = workdir.join(".gemini");
+    let settings_path = settings_dir.join("settings.json");
+
+    if settings_path.exists() {
+        return;
+    }
+
+    if let Err(e) = std::fs::create_dir_all(&settings_dir) {
+        tracing::warn!(path = %settings_dir.display(), error = %e, "failed to create gemini workdir");
+        return;
+    }
+
+    // Minimal workspace settings: disable all MCP servers.
+    // Workspace settings override ~/.gemini/settings.json per gemini CLI docs.
+    let content = r#"{"mcpServers":{}}"#;
+    if let Err(e) = std::fs::write(&settings_path, content) {
+        tracing::warn!(path = %settings_path.display(), error = %e, "failed to write gemini workspace settings");
+    }
+}
+
+/// Concatenate all messages into a single prompt string for the CLI.
+fn build_prompt(messages: &[crate::provider::Message]) -> String {
+    messages
+        .iter()
+        .map(|m| match m.role.as_str() {
+            "system" => format!("[System]: {}", m.content),
+            "assistant" => format!("[Assistant]: {}", m.content),
+            _ => m.content.clone(),
+        })
+        .collect::<Vec<_>>()
+        .join("\n\n")
+}
+
+/// Strip markdown code fences from a response string.
+fn strip_code_fences(s: &str) -> String {
+    let trimmed = s.trim();
+    if trimmed.starts_with("```") {
+        let without_opener = trimmed
+            .strip_prefix("```json")
+            .or_else(|| trimmed.strip_prefix("```"))
+            .unwrap_or(trimmed);
+        without_opener
+            .strip_suffix("```")
+            .unwrap_or(without_opener)
+            .trim()
+            .to_string()
+    } else {
+        trimmed.to_string()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // ── Construction ──────────────────────────────────────────────────────────
+
+    #[test]
+    fn explicit_model_used() {
+        let p = GeminiCliProvider::new(Some("gemini-1.5-flash".into()));
+        assert_eq!(p.default_model(), "gemini-1.5-flash");
+        assert_eq!(p.name(), "gemini");
+    }
+
+    #[test]
+    fn default_model_fallback() {
+        // Explicit None + no GEMINI_MODEL env → hardcoded default.
+        // We unset the env to avoid flakiness (it may or may not be set).
+        unsafe { std::env::remove_var("GEMINI_MODEL") };
+        let p = GeminiCliProvider::new(None);
+        assert_eq!(p.default_model(), "gemini-2.5-pro");
+    }
+
+    // Env var tests mutate process-global state and race with parallel tests.
+    // Run in isolation if needed:
+    //   cargo test -p noxa-llm env_model_override -- --ignored --test-threads=1
+    #[test]
+    #[ignore = "mutates process env; run with --test-threads=1"]
+    fn env_model_override() {
+        unsafe { std::env::set_var("GEMINI_MODEL", "gemini-1.5-pro") };
+        let p = GeminiCliProvider::new(None);
+        assert_eq!(p.default_model(), "gemini-1.5-pro");
+        unsafe { std::env::remove_var("GEMINI_MODEL") };
+    }
+
+    // ── build_prompt ──────────────────────────────────────────────────────────
+
+    #[test]
+    fn build_prompt_user_only() {
+        use crate::provider::Message;
+        let messages = vec![Message {
+            role: "user".into(),
+            content: "hello world".into(),
+        }];
+        assert_eq!(build_prompt(&messages), "hello world");
+    }
+
+    #[test]
+    fn build_prompt_system_and_user() {
+        use crate::provider::Message;
+        let messages = vec![
+            Message {
+                role: "system".into(),
+                content: "You are helpful.".into(),
+            },
+            Message {
+                role: "user".into(),
+                content: "Tell me something.".into(),
+            },
+        ];
+        let result = build_prompt(&messages);
+        assert!(result.contains("[System]: You are helpful."));
+        assert!(result.contains("Tell me something."));
+    }
+
+    // ── extract_response_from_output ──────────────────────────────────────────
+
+    #[test]
+    fn extracts_response_from_clean_json() {
+        let stdout = r#"{"session_id":"abc","response":"Hello world","stats":{}}"#;
+        assert_eq!(extract_response_from_output(stdout).unwrap(), "Hello world");
+    }
+
+    #[test]
+    fn extracts_response_skipping_mcp_noise() {
+        // MCP warning line appears before the JSON object in real gemini output.
+        let stdout = "MCP issues detected. Run /mcp list for status.\n{\"session_id\":\"abc\",\"response\":\"the answer\",\"stats\":{}}";
+        assert_eq!(
+            extract_response_from_output(stdout).unwrap(),
+            "the answer"
+        );
+    }
+
+    #[test]
+    fn error_when_no_json_in_output() {
+        let result = extract_response_from_output("MCP issues detected. No JSON follows.");
+        assert!(matches!(result, Err(LlmError::ProviderError(_))));
+    }
+
+    #[test]
+    fn error_when_response_field_missing() {
+        let stdout = r#"{"session_id":"abc","stats":{}}"#;
+        let result = extract_response_from_output(stdout);
+        assert!(matches!(result, Err(LlmError::ProviderError(_))));
+    }
+
+    // ── strip_code_fences ─────────────────────────────────────────────────────
+
+    #[test]
+    fn strips_json_fence() {
+        let input = "```json\n{\"key\": \"value\"}\n```";
+        assert_eq!(strip_code_fences(input), "{\"key\": \"value\"}");
+    }
+
+    #[test]
+    fn strips_plain_fence() {
+        let input = "```\nhello\n```";
+        assert_eq!(strip_code_fences(input), "hello");
+    }
+
+    #[test]
+    fn passthrough_no_fence() {
+        let input = "{\"key\": \"value\"}";
+        assert_eq!(strip_code_fences(input), "{\"key\": \"value\"}");
+    }
+
+    // ── is_available returns false when binary absent ──────────────────────────
+
+    #[tokio::test]
+    async fn unavailable_when_binary_missing() {
+        let result = tokio::process::Command::new("__noxa_nonexistent_binary_xyz__")
+            .arg("--version")
+            .stdout(std::process::Stdio::null())
+            .stderr(std::process::Stdio::null())
+            .status()
+            .await;
+        assert!(result.is_err(), "missing binary should fail to spawn");
+    }
+
+    // ── thinking tag stripping ────────────────────────────────────────────────
+
+    #[test]
+    fn strips_thinking_tags_from_output() {
+        let raw = "<think>internal reasoning</think>{\"result\": true}";
+        let after_thinking = strip_thinking_tags(raw);
+        let after_fences = strip_code_fences(after_thinking.trim());
+        assert_eq!(after_fences, "{\"result\": true}");
+    }
+
+    #[test]
+    fn strips_code_fence_after_thinking() {
+        let raw = "<think>let me check</think>\n```json\n{\"ok\": 1}\n```";
+        let after_thinking = strip_thinking_tags(raw);
+        let after_fences = strip_code_fences(after_thinking.trim());
+        assert_eq!(after_fences, "{\"ok\": 1}");
+    }
+}
--- a/crates/noxa-llm/src/providers/mod.rs
+++ b/crates/noxa-llm/src/providers/mod.rs
@ -1,4 +1,5 @@
 pub mod anthropic;
+pub mod gemini_cli;
 pub mod ollama;
 pub mod openai;

--- a/crates/noxa-llm/src/providers/ollama.rs
+++ b/crates/noxa-llm/src/providers/ollama.rs
@ -2,6 +2,7 @@
 /// First choice in the provider chain: free, private, fast on Apple Silicon.
 use async_trait::async_trait;
 use serde_json::json;
+use std::time::Duration;

 use crate::clean::strip_thinking_tags;
 use crate::error::LlmError;
@ -96,7 +97,10 @@ impl LlmProvider for OllamaProvider {

    async fn is_available(&self) -> bool {
        let url = format!("{}/api/tags", self.base_url);
-        matches!(self.client.get(&url).send().await, Ok(r) if r.status().is_success())
+        matches!(
+            tokio::time::timeout(Duration::from_millis(500), self.client.get(&url).send()).await,
+            Ok(Ok(r)) if r.status().is_success()
+        )
    }

    fn name(&self) -> &str {
--- a/crates/noxa-llm/src/testing.rs
+++ b/crates/noxa-llm/src/testing.rs
@ -4,6 +4,9 @@
 /// extract, chain, and other modules that need a fake LLM backend.
 #[cfg(test)]
 pub(crate) mod mock {
+    use std::sync::atomic::{AtomicUsize, Ordering};
+    use std::sync::Arc;
+
    use async_trait::async_trait;

    use crate::error::LlmError;
@ -45,4 +48,48 @@ pub(crate) mod mock {
            self.name
        }
    }
+
+    /// A mock provider that returns responses from a sequence.
+    /// Call N → returns responses[N], wrapping at the end.
+    /// Useful for testing first-failure / second-success retry paths.
+    pub struct SequenceMockProvider {
+        pub name: &'static str,
+        pub responses: Vec<Result<String, String>>,
+        pub available: bool,
+        call_count: Arc<AtomicUsize>,
+    }
+
+    impl SequenceMockProvider {
+        pub fn new(
+            name: &'static str,
+            responses: Vec<Result<String, String>>,
+        ) -> Self {
+            Self {
+                name,
+                responses,
+                available: true,
+                call_count: Arc::new(AtomicUsize::new(0)),
+            }
+        }
+    }
+
+    #[async_trait]
+    impl LlmProvider for SequenceMockProvider {
+        async fn complete(&self, _request: &CompletionRequest) -> Result<String, LlmError> {
+            let idx = self.call_count.fetch_add(1, Ordering::SeqCst);
+            let response = &self.responses[idx.min(self.responses.len() - 1)];
+            match response {
+                Ok(text) => Ok(text.clone()),
+                Err(msg) => Err(LlmError::ProviderError(msg.clone())),
+            }
+        }
+
+        async fn is_available(&self) -> bool {
+            self.available
+        }
+
+        fn name(&self) -> &str {
+            self.name
+        }
+    }
 }
--- a/crates/noxa-mcp/Cargo.toml
+++ b/crates/noxa-mcp/Cargo.toml
@ -5,6 +5,10 @@ version.workspace = true
 edition.workspace = true
 license.workspace = true

+[lib]
+name = "noxa_mcp"
+path = "src/lib.rs"
+
 [[bin]]
 name = "noxa-mcp"
 path = "src/main.rs"
@ -14,8 +18,8 @@ noxa-core = { workspace = true }
 noxa-fetch = { workspace = true }
 noxa-llm = { workspace = true }
 noxa-pdf = { workspace = true }
-rmcp = { version = "1.2", features = ["server", "macros", "transport-io", "schemars"] }
-schemars = "1.0"
+rmcp = { workspace = true }
+schemars = { workspace = true }
 dotenvy = { workspace = true }
 serde = { workspace = true }
 serde_json = { workspace = true }
@ -24,4 +28,4 @@ tracing = { workspace = true }
 tracing-subscriber = { workspace = true }
 reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
 url = "2"
-dirs = "6.0.0"
+dirs = { workspace = true }
--- a/crates/noxa-mcp/src/lib.rs
+++ b/crates/noxa-mcp/src/lib.rs
@ -0,0 +1,20 @@
+/// noxa-mcp library wrapper.
+///
+/// This exposes the MCP server so it can be embedded by the `noxa` CLI via
+/// `noxa mcp` without duplicating the transport/bootstrap code.
+///
+/// Callers must initialize tracing before calling `run()`. Stdout must remain
+/// untouched after `run()` begins because it carries the MCP wire protocol.
+pub(crate) mod cloud;
+pub(crate) mod server;
+pub(crate) mod tools;
+
+use rmcp::ServiceExt;
+use rmcp::transport::stdio;
+
+/// Start the MCP server over stdio and block until the client disconnects.
+pub async fn run() -> Result<(), Box<dyn std::error::Error>> {
+    let service = server::NoxaMcp::new().await.serve(stdio()).await?;
+    service.waiting().await?;
+    Ok(())
+}
--- a/crates/noxa-mcp/src/main.rs
+++ b/crates/noxa-mcp/src/main.rs
@ -1,15 +1,6 @@
 /// noxa-mcp: MCP (Model Context Protocol) server for noxa.
 /// Exposes web extraction tools over stdio transport for AI agents
 /// like Claude Desktop, Claude Code, and other MCP clients.
-mod cloud;
-mod server;
-mod tools;
-
-use rmcp::ServiceExt;
-use rmcp::transport::stdio;
-
-use server::NoxaMcp;
-
 #[tokio::main]
 async fn main() -> Result<(), Box<dyn std::error::Error>> {
    dotenvy::dotenv().ok();
@ -21,8 +12,5 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
        .with_ansi(false)
        .init();

-    let service = NoxaMcp::new().await.serve(stdio()).await?;
-
-    service.waiting().await?;
-    Ok(())
+    noxa_mcp::run().await
 }
--- a/crates/noxa-mcp/src/server.rs
+++ b/crates/noxa-mcp/src/server.rs
@ -89,7 +89,7 @@ impl NoxaMcp {

        let chain = noxa_llm::ProviderChain::default().await;
        let llm_chain = if chain.is_empty() {
-            warn!("no LLM providers available -- extract/summarize tools will fail");
+            warn!("no LLM providers available (gemini CLI, OPENAI_API_KEY, ANTHROPIC_API_KEY) -- extract/summarize tools will fail");
            None
        } else {
            info!(providers = chain.len(), "LLM provider chain ready");
@ -334,7 +334,7 @@ impl NoxaMcp {
        // No local LLM — fall back to cloud API directly
        if self.llm_chain.is_none() {
            let cloud = self.cloud.as_ref().ok_or(
-                "No LLM providers available. Set OPENAI_API_KEY, ANTHROPIC_API_KEY, or NOXA_API_KEY for cloud fallback.",
+                "No LLM providers available. Install the gemini CLI, set OPENAI_API_KEY, ANTHROPIC_API_KEY, or NOXA_API_KEY for cloud fallback.",
            )?;
            let mut body = json!({"url": params.url});
            if let Some(ref schema) = params.schema {
@ -387,7 +387,7 @@ impl NoxaMcp {
        // No local LLM — fall back to cloud API directly
        if self.llm_chain.is_none() {
            let cloud = self.cloud.as_ref().ok_or(
-                "No LLM providers available. Set OPENAI_API_KEY, ANTHROPIC_API_KEY, or NOXA_API_KEY for cloud fallback.",
+                "No LLM providers available. Install the gemini CLI, set OPENAI_API_KEY, ANTHROPIC_API_KEY, or NOXA_API_KEY for cloud fallback.",
            )?;
            let mut body = json!({"url": params.url});
            if let Some(sentences) = params.max_sentences {
--- a/env.example
+++ b/env.example
@ -1,43 +1,20 @@
-# ============================================
-# Noxa Configuration
-# Copy to .env and fill in your values
-# ============================================
+# Secrets, URLs, and path overrides only — everything else goes in config.json
+# See config.example.json for the full list of configurable defaults.

-# --- LLM Providers ---
+# Cloud API key (required for --cloud / --research)
+NOXA_API_KEY=

-# Ollama (local, default provider)
-OLLAMA_HOST=http://localhost:11434
-OLLAMA_MODEL=qwen3:8b
+# Single proxy URL (or use NOXA_PROXY_FILE for pool rotation)
+NOXA_PROXY=

-# OpenAI (optional cloud fallback)
-# OPENAI_API_KEY   — set your OpenAI key
-# OPENAI_BASE_URL  — defaults to https://api.openai.com/v1
-# OPENAI_MODEL     — defaults to gpt-4o-mini
+# Proxy pool file path for rotating proxies
+NOXA_PROXY_FILE=

-# Anthropic (optional cloud fallback)
-# ANTHROPIC_API_KEY — set your Anthropic key
-# ANTHROPIC_MODEL  — defaults to claude-sonnet-4-20250514
+# Webhook URL for completion notifications
+NOXA_WEBHOOK_URL=

-# --- Proxy ---
+# LLM base URL (Ollama or OpenAI-compatible endpoint)
+NOXA_LLM_BASE_URL=

-# Single proxy
-# NOXA_PROXY=http://user:pass@host:port
-
-# Proxy file (one per line: host:port:user:pass)
-# NOXA_PROXY_FILE=/path/to/proxies.txt
-
-# --- Server (noxa-server only) ---
-# NOXA_PORT=3000
-# NOXA_HOST=0.0.0.0
-# NOXA_AUTH_KEY=your-auth-key
-# NOXA_MAX_CONCURRENCY=50
-# NOXA_JOB_TTL_SECS=3600
-# NOXA_MAX_JOBS=100
-
-# --- CLI LLM overrides ---
-# NOXA_LLM_PROVIDER=ollama
-# NOXA_LLM_MODEL=qwen3:8b
-# NOXA_LLM_BASE_URL=http://localhost:11434
-
-# --- Logging ---
-# NOXA_LOG=info
+# Optional: path to a non-default config file (default: ./config.json)
+# NOXA_CONFIG=/path/to/my-config.json