From af304eda7f3a4aacae02e1c425d5fadb961456f2 Mon Sep 17 00:00:00 2001 From: Jacob Magar Date: Sat, 11 Apr 2026 07:36:19 -0400 Subject: [PATCH] docs(noxa-9fw.4): describe gemini cli as primary llm backend - Update CLAUDE.md: provider chain, LLM modules section, CLI examples - Update env.example: add GEMINI_MODEL, reorder providers (Gemini first) - Update noxa-llm/src/lib.rs crate doc comment --- CLAUDE.md | 16 +++++++++++----- crates/noxa-llm/src/lib.rs | 9 +++++---- env.example | 5 ++++- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 0f3b388..6e6ab01 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -15,8 +15,8 @@ noxa/ # + proxy pool rotation (per-request) # + PDF content-type detection # + document parsing (DOCX, XLSX, CSV) - noxa-llm/ # LLM provider chain (Ollama -> OpenAI -> Anthropic) - # + JSON schema extraction, prompt extraction, summarization + noxa-llm/ # LLM provider chain (Gemini CLI -> OpenAI -> Ollama -> Anthropic) + # + JSON schema extraction (validated + retry), prompt extraction, summarization noxa-pdf/ # PDF text extraction via pdf-extract noxa-mcp/ # MCP server (Model Context Protocol) for AI agents noxa/ # CLI binary @@ -48,8 +48,10 @@ Two binaries: `noxa` (CLI), `noxa-mcp` (MCP server). - `search.rs` — Web search via Serper.dev with parallel result scraping ### LLM Modules (`noxa-llm`) -- Provider chain: Ollama (local-first) -> OpenAI -> Anthropic -- JSON schema extraction, prompt-based extraction, summarization +- Provider chain: Gemini CLI (primary) -> OpenAI -> Ollama -> Anthropic +- Gemini CLI requires the `gemini` binary on PATH; `GEMINI_MODEL` env var controls model (default: `gemini-2.5-pro`) +- JSON schema extraction with jsonschema validation; parse failures retry once; schema mismatches fail immediately +- Prompt-based extraction, summarization ### PDF Modules (`noxa-pdf`) - PDF text extraction via pdf-extract crate @@ -105,11 +107,15 @@ noxa https://example.com --diff-with snap.json # Brand extraction noxa https://example.com --brand -# LLM features (Ollama local-first) +# LLM features (Gemini CLI primary; requires `gemini` on PATH) noxa https://example.com --summarize noxa https://example.com --extract-prompt "Get all pricing tiers" noxa https://example.com --extract-json '{"type":"object","properties":{"title":{"type":"string"}}}' +# Force a specific LLM provider +noxa https://example.com --llm-provider gemini --summarize +noxa https://example.com --llm-provider openai --summarize + # PDF (auto-detected via Content-Type) noxa https://example.com/report.pdf diff --git a/crates/noxa-llm/src/lib.rs b/crates/noxa-llm/src/lib.rs index 15664b9..250ae88 100644 --- a/crates/noxa-llm/src/lib.rs +++ b/crates/noxa-llm/src/lib.rs @@ -1,8 +1,9 @@ -/// noxa-llm: LLM integration with local-first hybrid architecture. +/// noxa-llm: LLM integration with Gemini-CLI-first hybrid architecture. /// -/// Provider chain tries Ollama (local) first, falls back to OpenAI, then Anthropic. -/// Provides schema-based extraction, prompt extraction, and summarization -/// on top of noxa-core's content pipeline. +/// Provider chain: Gemini CLI (primary) → OpenAI → Ollama → Anthropic. +/// Gemini CLI requires the `gemini` binary on PATH; GEMINI_MODEL env var sets the model. +/// Provides schema-validated extraction (with one retry on parse failure), +/// prompt extraction, and summarization on top of noxa-core's content pipeline. pub mod chain; pub mod clean; pub mod error; diff --git a/env.example b/env.example index d15b729..e81f4e4 100644 --- a/env.example +++ b/env.example @@ -5,7 +5,10 @@ # --- LLM Providers --- -# Ollama (local, default provider) +# Gemini CLI (primary provider — requires `gemini` binary on PATH) +# GEMINI_MODEL=gemini-2.5-pro # defaults to gemini-2.5-pro + +# Ollama (fallback; local inference) OLLAMA_HOST=http://localhost:11434 OLLAMA_MODEL=qwen3:8b