chore: rebrand webclaw to noxa

2026-05-13 17:02:36 +02:00 · 2026-04-11 00:10:38 -04:00 · 2026-04-11 00:10:38 -04:00 · 8674b60b4e
commit 8674b60b4e
parent a4c351d5ae
86 changed files with 781 additions and 2121 deletions
--- a/crates/noxa-llm/Cargo.toml
+++ b/crates/noxa-llm/Cargo.toml
@ -0,0 +1,15 @@
+[package]
+name = "noxa-llm"
+description = "LLM integration for noxa — local-first hybrid architecture (Ollama -> OpenAI -> Anthropic)"
+version.workspace = true
+edition.workspace = true
+license.workspace = true
+
+[dependencies]
+reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
+async-trait = "0.1"
+serde = { workspace = true }
+serde_json = { workspace = true }
+tokio = { workspace = true }
+thiserror = { workspace = true }
+tracing = { workspace = true }
--- a/crates/noxa-llm/src/chain.rs
+++ b/crates/noxa-llm/src/chain.rs
@ -0,0 +1,205 @@
+/// Provider chain — tries providers in order until one succeeds.
+/// Default order: Ollama (local, free) -> OpenAI -> Anthropic.
+/// Only includes providers that are actually configured/available.
+use async_trait::async_trait;
+use tracing::{debug, warn};
+
+use crate::error::LlmError;
+use crate::provider::{CompletionRequest, LlmProvider};
+use crate::providers::{
+    anthropic::AnthropicProvider, ollama::OllamaProvider, openai::OpenAiProvider,
+};
+
+pub struct ProviderChain {
+    providers: Vec<Box<dyn LlmProvider>>,
+}
+
+impl ProviderChain {
+    /// Build the default chain: Ollama -> OpenAI -> Anthropic.
+    /// Ollama is always added (availability checked at call time).
+    /// Cloud providers are only added if their API keys are configured.
+    pub async fn default() -> Self {
+        let mut providers: Vec<Box<dyn LlmProvider>> = Vec::new();
+
+        let ollama = OllamaProvider::new(None, None);
+        if ollama.is_available().await {
+            debug!("ollama is available, adding to chain");
+            providers.push(Box::new(ollama));
+        } else {
+            debug!("ollama not available, skipping");
+        }
+
+        if let Some(openai) = OpenAiProvider::new(None, None, None) {
+            debug!("openai configured, adding to chain");
+            providers.push(Box::new(openai));
+        }
+
+        if let Some(anthropic) = AnthropicProvider::new(None, None) {
+            debug!("anthropic configured, adding to chain");
+            providers.push(Box::new(anthropic));
+        }
+
+        Self { providers }
+    }
+
+    /// Build a chain with a single explicit provider.
+    pub fn single(provider: Box<dyn LlmProvider>) -> Self {
+        Self {
+            providers: vec![provider],
+        }
+    }
+
+    /// Build from an explicit list of providers.
+    pub fn from_providers(providers: Vec<Box<dyn LlmProvider>>) -> Self {
+        Self { providers }
+    }
+
+    /// How many providers are in the chain.
+    pub fn len(&self) -> usize {
+        self.providers.len()
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.providers.is_empty()
+    }
+}
+
+/// ProviderChain itself implements LlmProvider, so it can be used anywhere
+/// a single provider is expected. This makes the CLI simple: build a chain
+/// or a single provider, pass either as `Box<dyn LlmProvider>`.
+#[async_trait]
+impl LlmProvider for ProviderChain {
+    async fn complete(&self, request: &CompletionRequest) -> Result<String, LlmError> {
+        if self.providers.is_empty() {
+            return Err(LlmError::NoProviders);
+        }
+
+        let mut errors = Vec::new();
+
+        for provider in &self.providers {
+            debug!(provider = provider.name(), "attempting completion");
+
+            match provider.complete(request).await {
+                Ok(response) => {
+                    debug!(provider = provider.name(), "completion succeeded");
+                    return Ok(response);
+                }
+                Err(e) => {
+                    warn!(provider = provider.name(), error = %e, "provider failed, trying next");
+                    errors.push(format!("{}: {e}", provider.name()));
+                }
+            }
+        }
+
+        Err(LlmError::AllProvidersFailed(errors.join("; ")))
+    }
+
+    async fn is_available(&self) -> bool {
+        !self.providers.is_empty()
+    }
+
+    fn name(&self) -> &str {
+        "chain"
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::provider::Message;
+    use crate::testing::mock::MockProvider;
+
+    fn test_request() -> CompletionRequest {
+        CompletionRequest {
+            model: String::new(),
+            messages: vec![Message {
+                role: "user".into(),
+                content: "test".into(),
+            }],
+            temperature: None,
+            max_tokens: None,
+            json_mode: false,
+        }
+    }
+
+    #[tokio::test]
+    async fn empty_chain_returns_no_providers() {
+        let chain = ProviderChain::from_providers(vec![]);
+        let result = chain.complete(&test_request()).await;
+        assert!(matches!(result, Err(LlmError::NoProviders)));
+    }
+
+    #[tokio::test]
+    async fn single_provider_success() {
+        let chain = ProviderChain::from_providers(vec![Box::new(MockProvider {
+            name: "mock",
+            response: Ok("hello".into()),
+            available: true,
+        })]);
+
+        let result = chain.complete(&test_request()).await.unwrap();
+        assert_eq!(result, "hello");
+    }
+
+    #[tokio::test]
+    async fn fallback_on_first_failure() {
+        let chain = ProviderChain::from_providers(vec![
+            Box::new(MockProvider {
+                name: "failing",
+                response: Err("connection refused".into()),
+                available: true,
+            }),
+            Box::new(MockProvider {
+                name: "backup",
+                response: Ok("from backup".into()),
+                available: true,
+            }),
+        ]);
+
+        let result = chain.complete(&test_request()).await.unwrap();
+        assert_eq!(result, "from backup");
+    }
+
+    #[tokio::test]
+    async fn all_fail_collects_errors() {
+        let chain = ProviderChain::from_providers(vec![
+            Box::new(MockProvider {
+                name: "a",
+                response: Err("timeout".into()),
+                available: true,
+            }),
+            Box::new(MockProvider {
+                name: "b",
+                response: Err("rate limited".into()),
+                available: true,
+            }),
+        ]);
+
+        let result = chain.complete(&test_request()).await;
+        match result {
+            Err(LlmError::AllProvidersFailed(msg)) => {
+                assert!(msg.contains("timeout"));
+                assert!(msg.contains("rate limited"));
+            }
+            other => panic!("expected AllProvidersFailed, got {other:?}"),
+        }
+    }
+
+    #[tokio::test]
+    async fn chain_length() {
+        let chain = ProviderChain::from_providers(vec![
+            Box::new(MockProvider {
+                name: "a",
+                response: Ok("ok".into()),
+                available: true,
+            }),
+            Box::new(MockProvider {
+                name: "b",
+                response: Ok("ok".into()),
+                available: true,
+            }),
+        ]);
+        assert_eq!(chain.len(), 2);
+        assert!(!chain.is_empty());
+    }
+}
--- a/crates/noxa-llm/src/clean.rs
+++ b/crates/noxa-llm/src/clean.rs
@ -0,0 +1,124 @@
+//! Post-processing for LLM responses.
+//! Strips chain-of-thought reasoning tags that models like qwen3 emit.
+//! Applied to every provider response so callers never see internal reasoning.
+
+/// Strip `<think>...</think>` blocks from LLM responses.
+/// Models like qwen3 wrap internal chain-of-thought reasoning in these tags.
+/// Handles multiline content, multiple blocks, and partial/malformed tags.
+pub fn strip_thinking_tags(text: &str) -> String {
+    let mut result = String::with_capacity(text.len());
+    let mut remaining = text;
+
+    while let Some(start) = remaining.find("<think>") {
+        // Keep everything before the opening tag
+        result.push_str(&remaining[..start]);
+
+        // Find the matching closing tag
+        let after_open = &remaining[start + 7..]; // len("<think>") == 7
+        if let Some(end) = after_open.find("</think>") {
+            remaining = &after_open[end + 8..]; // len("</think>") == 8
+        } else {
+            // Unclosed <think> — discard everything after it (the model is still "thinking")
+            remaining = "";
+        }
+    }
+
+    result.push_str(remaining);
+
+    // Clean up: leftover </think> or /think fragments from partial responses
+    let result = result.replace("</think>", "");
+    let result = result.replace("/think", "");
+
+    // Collapse leading whitespace left behind after stripping
+    let trimmed = result.trim();
+    if trimmed.is_empty() {
+        String::new()
+    } else {
+        trimmed.to_string()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn strips_simple_thinking_block() {
+        let input = "<think>reasoning here</think>actual response";
+        assert_eq!(strip_thinking_tags(input), "actual response");
+    }
+
+    #[test]
+    fn strips_multiline_thinking() {
+        let input = "<think>\nlong\nthinking\nprocess\n</think>\nclean output";
+        assert_eq!(strip_thinking_tags(input), "clean output");
+    }
+
+    #[test]
+    fn passthrough_no_tags() {
+        let input = "no thinking tags here";
+        assert_eq!(strip_thinking_tags(input), "no thinking tags here");
+    }
+
+    #[test]
+    fn strips_partial_think_at_end() {
+        let input = "some text /think";
+        assert_eq!(strip_thinking_tags(input), "some text");
+    }
+
+    #[test]
+    fn strips_orphan_closing_tag() {
+        let input = "some text</think> more text";
+        assert_eq!(strip_thinking_tags(input), "some text more text");
+    }
+
+    #[test]
+    fn strips_multiple_thinking_blocks() {
+        let input = "<think>first</think>hello <think>second</think>world";
+        assert_eq!(strip_thinking_tags(input), "hello world");
+    }
+
+    #[test]
+    fn handles_unclosed_think_tag() {
+        // Model started thinking and never closed — discard everything after <think>
+        let input = "good content<think>still reasoning...";
+        assert_eq!(strip_thinking_tags(input), "good content");
+    }
+
+    #[test]
+    fn handles_empty_thinking_block() {
+        let input = "<think></think>content";
+        assert_eq!(strip_thinking_tags(input), "content");
+    }
+
+    #[test]
+    fn handles_only_thinking() {
+        let input = "<think>just thinking, no output</think>";
+        assert_eq!(strip_thinking_tags(input), "");
+    }
+
+    #[test]
+    fn preserves_json_content() {
+        let input = "<think>let me analyze...</think>{\"key\": \"value\", \"count\": 42}";
+        assert_eq!(
+            strip_thinking_tags(input),
+            "{\"key\": \"value\", \"count\": 42}"
+        );
+    }
+
+    #[test]
+    fn real_world_extract_leak() {
+        // Actual bug: qwen3 leaked "/think" into JSON values
+        let input = "<think>analyzing the page</think>{\"learn_more\": \"Learn more\"}";
+        assert_eq!(
+            strip_thinking_tags(input),
+            "{\"learn_more\": \"Learn more\"}"
+        );
+    }
+
+    #[test]
+    fn thinking_with_newlines_before_json() {
+        let input = "<think>\nstep 1\nstep 2\n</think>\n\n{\"result\": true}";
+        assert_eq!(strip_thinking_tags(input), "{\"result\": true}");
+    }
+}
--- a/crates/noxa-llm/src/error.rs
+++ b/crates/noxa-llm/src/error.rs
@ -0,0 +1,18 @@
+/// LLM-specific errors. Kept flat — one enum covers transport, provider, and parsing failures.
+#[derive(Debug, thiserror::Error)]
+pub enum LlmError {
+    #[error("HTTP error: {0}")]
+    Http(#[from] reqwest::Error),
+
+    #[error("no providers available")]
+    NoProviders,
+
+    #[error("all providers failed: {0}")]
+    AllProvidersFailed(String),
+
+    #[error("invalid JSON response: {0}")]
+    InvalidJson(String),
+
+    #[error("provider error: {0}")]
+    ProviderError(String),
+}
--- a/crates/noxa-llm/src/extract.rs
+++ b/crates/noxa-llm/src/extract.rs
@ -0,0 +1,187 @@
+/// Schema-based and prompt-based LLM extraction.
+/// Both functions build a system prompt, send content to the LLM, and parse JSON back.
+use crate::clean::strip_thinking_tags;
+use crate::error::LlmError;
+use crate::provider::{CompletionRequest, LlmProvider, Message};
+
+/// Extract structured JSON from content using a JSON schema.
+/// The schema tells the LLM exactly what fields to extract and their types.
+pub async fn extract_json(
+    content: &str,
+    schema: &serde_json::Value,
+    provider: &dyn LlmProvider,
+    model: Option<&str>,
+) -> Result<serde_json::Value, LlmError> {
+    let system = format!(
+        "You are a JSON extraction engine. Extract data from the content according to this schema.\n\
+         Return ONLY valid JSON matching the schema. No explanations, no markdown, no commentary.\n\n\
+         Schema:\n```json\n{}\n```",
+        serde_json::to_string_pretty(schema).unwrap_or_else(|_| schema.to_string())
+    );
+
+    let request = CompletionRequest {
+        model: model.unwrap_or_default().to_string(),
+        messages: vec![
+            Message {
+                role: "system".into(),
+                content: system,
+            },
+            Message {
+                role: "user".into(),
+                content: content.to_string(),
+            },
+        ],
+        temperature: Some(0.0),
+        max_tokens: None,
+        json_mode: true,
+    };
+
+    let response = provider.complete(&request).await?;
+    parse_json_response(&response)
+}
+
+/// Extract information using a natural language prompt.
+/// More flexible than schema extraction — the user describes what they want.
+pub async fn extract_with_prompt(
+    content: &str,
+    prompt: &str,
+    provider: &dyn LlmProvider,
+    model: Option<&str>,
+) -> Result<serde_json::Value, LlmError> {
+    let system = format!(
+        "You are a JSON extraction engine. Extract information from the content based on these instructions.\n\
+         Return ONLY valid JSON. No explanations, no markdown, no commentary.\n\n\
+         Instructions: {prompt}"
+    );
+
+    let request = CompletionRequest {
+        model: model.unwrap_or_default().to_string(),
+        messages: vec![
+            Message {
+                role: "system".into(),
+                content: system,
+            },
+            Message {
+                role: "user".into(),
+                content: content.to_string(),
+            },
+        ],
+        temperature: Some(0.0),
+        max_tokens: None,
+        json_mode: true,
+    };
+
+    let response = provider.complete(&request).await?;
+    parse_json_response(&response)
+}
+
+/// Parse an LLM response string as JSON. Handles common edge cases:
+/// - Thinking tags (`<think>...</think>`)
+/// - Markdown code fences (```json ... ```)
+/// - Leading/trailing whitespace
+fn parse_json_response(response: &str) -> Result<serde_json::Value, LlmError> {
+    // Strip thinking tags before any JSON parsing — providers already do this,
+    // but defense in depth for any caller that bypasses the provider layer
+    let cleaned = strip_thinking_tags(response);
+    let trimmed = cleaned.trim();
+
+    // Strip markdown code fences if present
+    let json_str = if trimmed.starts_with("```") {
+        let without_opener = trimmed
+            .strip_prefix("```json")
+            .or_else(|| trimmed.strip_prefix("```"))
+            .unwrap_or(trimmed);
+        without_opener
+            .strip_suffix("```")
+            .unwrap_or(without_opener)
+            .trim()
+    } else {
+        trimmed
+    };
+
+    serde_json::from_str(json_str)
+        .map_err(|e| LlmError::InvalidJson(format!("{e} — raw response: {response}")))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::testing::mock::MockProvider;
+
+    #[test]
+    fn parse_clean_json() {
+        let result = parse_json_response(r#"{"name": "Rust", "version": 2024}"#).unwrap();
+        assert_eq!(result["name"], "Rust");
+        assert_eq!(result["version"], 2024);
+    }
+
+    #[test]
+    fn parse_json_with_code_fence() {
+        let response = "```json\n{\"key\": \"value\"}\n```";
+        let result = parse_json_response(response).unwrap();
+        assert_eq!(result["key"], "value");
+    }
+
+    #[test]
+    fn parse_json_with_whitespace() {
+        let response = "  \n  {\"ok\": true}  \n  ";
+        let result = parse_json_response(response).unwrap();
+        assert_eq!(result["ok"], true);
+    }
+
+    #[test]
+    fn parse_invalid_json() {
+        let result = parse_json_response("not json at all");
+        assert!(matches!(result, Err(LlmError::InvalidJson(_))));
+    }
+
+    #[test]
+    fn parse_json_with_thinking_tags() {
+        let response = "<think>analyzing the content</think>{\"title\": \"Hello\"}";
+        let result = parse_json_response(response).unwrap();
+        assert_eq!(result["title"], "Hello");
+    }
+
+    #[test]
+    fn parse_json_with_thinking_and_code_fence() {
+        let response = "<think>let me think</think>\n```json\n{\"key\": \"value\"}\n```";
+        let result = parse_json_response(response).unwrap();
+        assert_eq!(result["key"], "value");
+    }
+
+    #[tokio::test]
+    async fn extract_json_uses_schema_in_prompt() {
+        let mock = MockProvider::ok(r#"{"title": "Test Article", "author": "Jane"}"#);
+
+        let schema = serde_json::json!({
+            "type": "object",
+            "properties": {
+                "title": { "type": "string" },
+                "author": { "type": "string" }
+            }
+        });
+
+        let result = extract_json("Some article content by Jane", &schema, &mock, None)
+            .await
+            .unwrap();
+
+        assert_eq!(result["title"], "Test Article");
+        assert_eq!(result["author"], "Jane");
+    }
+
+    #[tokio::test]
+    async fn extract_with_prompt_returns_json() {
+        let mock = MockProvider::ok(r#"{"emails": ["test@example.com"]}"#);
+
+        let result = extract_with_prompt(
+            "Contact us at test@example.com",
+            "Find all email addresses",
+            &mock,
+            None,
+        )
+        .await
+        .unwrap();
+
+        assert_eq!(result["emails"][0], "test@example.com");
+    }
+}
--- a/crates/noxa-llm/src/lib.rs
+++ b/crates/noxa-llm/src/lib.rs
@ -0,0 +1,19 @@
+/// noxa-llm: LLM integration with local-first hybrid architecture.
+///
+/// Provider chain tries Ollama (local) first, falls back to OpenAI, then Anthropic.
+/// Provides schema-based extraction, prompt extraction, and summarization
+/// on top of noxa-core's content pipeline.
+pub mod chain;
+pub mod clean;
+pub mod error;
+pub mod extract;
+pub mod provider;
+pub mod providers;
+pub mod summarize;
+#[cfg(test)]
+pub(crate) mod testing;
+
+pub use chain::ProviderChain;
+pub use clean::strip_thinking_tags;
+pub use error::LlmError;
+pub use provider::{CompletionRequest, LlmProvider, Message};
--- a/crates/noxa-llm/src/provider.rs
+++ b/crates/noxa-llm/src/provider.rs
@ -0,0 +1,34 @@
+/// Core LLM abstraction. Every backend (Ollama, OpenAI, Anthropic) implements `LlmProvider`.
+/// The trait is intentionally minimal — just completion and availability check.
+use async_trait::async_trait;
+use serde::{Deserialize, Serialize};
+
+use crate::error::LlmError;
+
+#[derive(Debug, Clone)]
+pub struct CompletionRequest {
+    pub model: String,
+    pub messages: Vec<Message>,
+    pub temperature: Option<f32>,
+    pub max_tokens: Option<u32>,
+    /// When true, instruct the provider to return valid JSON.
+    pub json_mode: bool,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Message {
+    pub role: String,
+    pub content: String,
+}
+
+#[async_trait]
+pub trait LlmProvider: Send + Sync {
+    /// Send a completion request and return the assistant's text response.
+    async fn complete(&self, request: &CompletionRequest) -> Result<String, LlmError>;
+
+    /// Quick health check — is this provider reachable / configured?
+    async fn is_available(&self) -> bool;
+
+    /// Human-readable name for logging.
+    fn name(&self) -> &str;
+}
--- a/crates/noxa-llm/src/providers/anthropic.rs
+++ b/crates/noxa-llm/src/providers/anthropic.rs
@ -0,0 +1,170 @@
+/// Anthropic provider — Claude models via api.anthropic.com.
+/// Anthropic's API differs from OpenAI: system message is a top-level param,
+/// not part of the messages array.
+use async_trait::async_trait;
+use serde_json::json;
+
+use crate::clean::strip_thinking_tags;
+use crate::error::LlmError;
+use crate::provider::{CompletionRequest, LlmProvider};
+
+use super::load_api_key;
+
+const ANTHROPIC_API_URL: &str = "https://api.anthropic.com/v1/messages";
+const ANTHROPIC_VERSION: &str = "2023-06-01";
+
+pub struct AnthropicProvider {
+    client: reqwest::Client,
+    key: String,
+    default_model: String,
+}
+
+impl AnthropicProvider {
+    /// Returns `None` if no API key is available (param or env).
+    pub fn new(key_override: Option<String>, model: Option<String>) -> Option<Self> {
+        let key = load_api_key(key_override, "ANTHROPIC_API_KEY")?;
+
+        Some(Self {
+            client: reqwest::Client::new(),
+            key,
+            default_model: model.unwrap_or_else(|| "claude-sonnet-4-20250514".into()),
+        })
+    }
+
+    pub fn default_model(&self) -> &str {
+        &self.default_model
+    }
+}
+
+#[async_trait]
+impl LlmProvider for AnthropicProvider {
+    async fn complete(&self, request: &CompletionRequest) -> Result<String, LlmError> {
+        let model = if request.model.is_empty() {
+            &self.default_model
+        } else {
+            &request.model
+        };
+
+        // Anthropic separates system from messages. Extract the system message if present.
+        let system_content: Option<String> = request
+            .messages
+            .iter()
+            .find(|m| m.role == "system")
+            .map(|m| m.content.clone());
+
+        let messages: Vec<serde_json::Value> = request
+            .messages
+            .iter()
+            .filter(|m| m.role != "system")
+            .map(|m| json!({ "role": m.role, "content": m.content }))
+            .collect();
+
+        let mut body = json!({
+            "model": model,
+            "messages": messages,
+            "max_tokens": request.max_tokens.unwrap_or(4096),
+        });
+
+        if let Some(system) = &system_content {
+            body["system"] = json!(system);
+        }
+        if let Some(temp) = request.temperature {
+            body["temperature"] = json!(temp);
+        }
+
+        let resp = self
+            .client
+            .post(ANTHROPIC_API_URL)
+            .header("x-api-key", &self.key)
+            .header("anthropic-version", ANTHROPIC_VERSION)
+            .header("content-type", "application/json")
+            .json(&body)
+            .send()
+            .await?;
+
+        if !resp.status().is_success() {
+            let status = resp.status();
+            let text = resp.text().await.unwrap_or_default();
+            let safe_text = if text.len() > 500 {
+                &text[..500]
+            } else {
+                &text
+            };
+            return Err(LlmError::ProviderError(format!(
+                "anthropic returned {status}: {safe_text}"
+            )));
+        }
+
+        let json: serde_json::Value = resp.json().await?;
+
+        // Anthropic response: {"content": [{"type": "text", "text": "..."}]}
+        let raw = json["content"][0]["text"]
+            .as_str()
+            .map(String::from)
+            .ok_or_else(|| {
+                LlmError::InvalidJson("missing content[0].text in anthropic response".into())
+            })?;
+
+        Ok(strip_thinking_tags(&raw))
+    }
+
+    async fn is_available(&self) -> bool {
+        !self.key.is_empty()
+    }
+
+    fn name(&self) -> &str {
+        "anthropic"
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn empty_key_returns_none() {
+        assert!(AnthropicProvider::new(Some(String::new()), None).is_none());
+    }
+
+    #[test]
+    fn explicit_key_constructs() {
+        let provider =
+            AnthropicProvider::new(Some("sk-ant-test".into()), None).expect("should construct");
+        assert_eq!(provider.name(), "anthropic");
+        assert_eq!(provider.default_model, "claude-sonnet-4-20250514");
+        assert_eq!(provider.key, "sk-ant-test");
+    }
+
+    #[test]
+    fn custom_model() {
+        let provider =
+            AnthropicProvider::new(Some("sk-ant-test".into()), Some("claude-3-haiku".into()))
+                .unwrap();
+        assert_eq!(provider.default_model, "claude-3-haiku");
+    }
+
+    #[test]
+    fn default_model_accessor() {
+        let provider = AnthropicProvider::new(Some("sk-ant-test".into()), None).unwrap();
+        assert_eq!(provider.default_model(), "claude-sonnet-4-20250514");
+    }
+
+    // Env var fallback tests mutate process-global state and race with parallel tests.
+    // The code path is trivial (load_api_key -> env::var().ok()). Run in isolation if needed:
+    //   cargo test -p noxa-llm env_var -- --ignored --test-threads=1
+    #[test]
+    #[ignore = "mutates process env; run with --test-threads=1"]
+    fn env_var_key_fallback() {
+        unsafe { std::env::set_var("ANTHROPIC_API_KEY", "sk-ant-env") };
+        let provider = AnthropicProvider::new(None, None).expect("should construct from env");
+        assert_eq!(provider.key, "sk-ant-env");
+        unsafe { std::env::remove_var("ANTHROPIC_API_KEY") };
+    }
+
+    #[test]
+    #[ignore = "mutates process env; run with --test-threads=1"]
+    fn no_key_returns_none() {
+        unsafe { std::env::remove_var("ANTHROPIC_API_KEY") };
+        assert!(AnthropicProvider::new(None, None).is_none());
+    }
+}
--- a/crates/noxa-llm/src/providers/mod.rs
+++ b/crates/noxa-llm/src/providers/mod.rs
@ -0,0 +1,36 @@
+pub mod anthropic;
+pub mod ollama;
+pub mod openai;
+
+/// Load an API key from an explicit override or an environment variable.
+/// Returns `None` if neither is set or the value is empty.
+pub(crate) fn load_api_key(override_key: Option<String>, env_var: &str) -> Option<String> {
+    let key = override_key.or_else(|| std::env::var(env_var).ok())?;
+    if key.is_empty() { None } else { Some(key) }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn override_key_takes_precedence() {
+        assert_eq!(
+            load_api_key(Some("explicit".into()), "NONEXISTENT_VAR"),
+            Some("explicit".into())
+        );
+    }
+
+    #[test]
+    fn empty_override_returns_none() {
+        assert_eq!(load_api_key(Some(String::new()), "NONEXISTENT_VAR"), None);
+    }
+
+    #[test]
+    fn none_override_with_no_env_returns_none() {
+        assert_eq!(
+            load_api_key(None, "NOXA_TEST_NONEXISTENT_KEY_12345"),
+            None
+        );
+    }
+}
--- a/crates/noxa-llm/src/providers/ollama.rs
+++ b/crates/noxa-llm/src/providers/ollama.rs
@ -0,0 +1,161 @@
+/// Ollama provider — talks to a local Ollama instance (default localhost:11434).
+/// First choice in the provider chain: free, private, fast on Apple Silicon.
+use async_trait::async_trait;
+use serde_json::json;
+
+use crate::clean::strip_thinking_tags;
+use crate::error::LlmError;
+use crate::provider::{CompletionRequest, LlmProvider};
+
+pub struct OllamaProvider {
+    client: reqwest::Client,
+    base_url: String,
+    default_model: String,
+}
+
+impl OllamaProvider {
+    pub fn new(base_url: Option<String>, model: Option<String>) -> Self {
+        let base_url = base_url
+            .or_else(|| std::env::var("OLLAMA_HOST").ok())
+            .unwrap_or_else(|| "http://localhost:11434".into());
+
+        let default_model = model
+            .or_else(|| std::env::var("OLLAMA_MODEL").ok())
+            .unwrap_or_else(|| "qwen3:8b".into());
+
+        Self {
+            client: reqwest::Client::new(),
+            base_url,
+            default_model,
+        }
+    }
+
+    pub fn default_model(&self) -> &str {
+        &self.default_model
+    }
+}
+
+#[async_trait]
+impl LlmProvider for OllamaProvider {
+    async fn complete(&self, request: &CompletionRequest) -> Result<String, LlmError> {
+        let model = if request.model.is_empty() {
+            &self.default_model
+        } else {
+            &request.model
+        };
+
+        let messages: Vec<serde_json::Value> = request
+            .messages
+            .iter()
+            .map(|m| json!({ "role": m.role, "content": m.content }))
+            .collect();
+
+        let mut body = json!({
+            "model": model,
+            "messages": messages,
+            "stream": false,
+            "think": false,
+        });
+
+        if request.json_mode {
+            body["format"] = json!("json");
+        }
+        if let Some(temp) = request.temperature {
+            body["options"] = json!({ "temperature": temp });
+        }
+
+        let url = format!("{}/api/chat", self.base_url);
+        let resp = self.client.post(&url).json(&body).send().await?;
+
+        if !resp.status().is_success() {
+            let status = resp.status();
+            let text = resp.text().await.unwrap_or_default();
+            let safe_text = if text.len() > 500 {
+                &text[..500]
+            } else {
+                &text
+            };
+            return Err(LlmError::ProviderError(format!(
+                "ollama returned {status}: {safe_text}"
+            )));
+        }
+
+        let json: serde_json::Value = resp.json().await?;
+
+        let raw = json["message"]["content"]
+            .as_str()
+            .map(String::from)
+            .ok_or_else(|| {
+                LlmError::InvalidJson(format!(
+                    "missing message.content in ollama response: {json}"
+                ))
+            })?;
+
+        Ok(strip_thinking_tags(&raw))
+    }
+
+    async fn is_available(&self) -> bool {
+        let url = format!("{}/api/tags", self.base_url);
+        matches!(self.client.get(&url).send().await, Ok(r) if r.status().is_success())
+    }
+
+    fn name(&self) -> &str {
+        "ollama"
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn explicit_params_used() {
+        let provider = OllamaProvider::new(
+            Some("http://gpu-box:11434".into()),
+            Some("llama3:70b".into()),
+        );
+        assert_eq!(provider.base_url, "http://gpu-box:11434");
+        assert_eq!(provider.default_model, "llama3:70b");
+        assert_eq!(provider.name(), "ollama");
+    }
+
+    #[test]
+    fn explicit_model_overrides_any_env() {
+        // Passing Some(...) bypasses env vars entirely -- no race possible
+        let provider = OllamaProvider::new(None, Some("mistral:7b".into()));
+        assert_eq!(provider.default_model, "mistral:7b");
+    }
+
+    #[test]
+    fn explicit_url_overrides_any_env() {
+        let provider = OllamaProvider::new(Some("http://local:11434".into()), None);
+        assert_eq!(provider.base_url, "http://local:11434");
+    }
+
+    #[test]
+    fn default_model_accessor() {
+        let provider = OllamaProvider::new(None, Some("phi3:mini".into()));
+        assert_eq!(provider.default_model(), "phi3:mini");
+    }
+
+    // Env var fallback is a trivial `env::var().ok()` -- not worth the flakiness
+    // of manipulating process-global state. Run in isolation if needed:
+    //   cargo test -p noxa-llm env_var_fallback -- --ignored --test-threads=1
+    #[test]
+    #[ignore = "mutates process env; run with --test-threads=1"]
+    fn env_var_fallback() {
+        unsafe {
+            std::env::set_var("OLLAMA_HOST", "http://remote:11434");
+            std::env::set_var("OLLAMA_MODEL", "mistral:7b");
+        }
+
+        let provider = OllamaProvider::new(None, None);
+        assert_eq!(provider.base_url, "http://remote:11434");
+        assert_eq!(provider.default_model, "mistral:7b");
+
+        unsafe {
+            std::env::remove_var("OLLAMA_HOST");
+            std::env::remove_var("OLLAMA_MODEL");
+        }
+    }
+}
--- a/crates/noxa-llm/src/providers/openai.rs
+++ b/crates/noxa-llm/src/providers/openai.rs
@ -0,0 +1,181 @@
+/// OpenAI provider — works with api.openai.com and any OpenAI-compatible endpoint.
+use async_trait::async_trait;
+use serde_json::json;
+
+use crate::clean::strip_thinking_tags;
+use crate::error::LlmError;
+use crate::provider::{CompletionRequest, LlmProvider};
+
+use super::load_api_key;
+
+pub struct OpenAiProvider {
+    client: reqwest::Client,
+    key: String,
+    base_url: String,
+    default_model: String,
+}
+
+impl OpenAiProvider {
+    /// Returns `None` if no API key is available (param or env).
+    pub fn new(
+        key_override: Option<String>,
+        base_url: Option<String>,
+        model: Option<String>,
+    ) -> Option<Self> {
+        let key = load_api_key(key_override, "OPENAI_API_KEY")?;
+
+        Some(Self {
+            client: reqwest::Client::new(),
+            key,
+            base_url: base_url
+                .or_else(|| std::env::var("OPENAI_BASE_URL").ok())
+                .unwrap_or_else(|| "https://api.openai.com/v1".into()),
+            default_model: model.unwrap_or_else(|| "gpt-4o-mini".into()),
+        })
+    }
+
+    pub fn default_model(&self) -> &str {
+        &self.default_model
+    }
+}
+
+#[async_trait]
+impl LlmProvider for OpenAiProvider {
+    async fn complete(&self, request: &CompletionRequest) -> Result<String, LlmError> {
+        let model = if request.model.is_empty() {
+            &self.default_model
+        } else {
+            &request.model
+        };
+
+        let messages: Vec<serde_json::Value> = request
+            .messages
+            .iter()
+            .map(|m| json!({ "role": m.role, "content": m.content }))
+            .collect();
+
+        let mut body = json!({
+            "model": model,
+            "messages": messages,
+        });
+
+        if request.json_mode {
+            body["response_format"] = json!({ "type": "json_object" });
+        }
+        if let Some(temp) = request.temperature {
+            body["temperature"] = json!(temp);
+        }
+        if let Some(max) = request.max_tokens {
+            body["max_tokens"] = json!(max);
+        }
+
+        let url = format!("{}/chat/completions", self.base_url);
+        let resp = self
+            .client
+            .post(&url)
+            .header("Authorization", format!("Bearer {}", self.key))
+            .json(&body)
+            .send()
+            .await?;
+
+        if !resp.status().is_success() {
+            let status = resp.status();
+            let text = resp.text().await.unwrap_or_default();
+            let safe_text = if text.len() > 500 {
+                &text[..500]
+            } else {
+                &text
+            };
+            return Err(LlmError::ProviderError(format!(
+                "openai returned {status}: {safe_text}"
+            )));
+        }
+
+        let json: serde_json::Value = resp.json().await?;
+
+        let raw = json["choices"][0]["message"]["content"]
+            .as_str()
+            .map(String::from)
+            .ok_or_else(|| {
+                LlmError::InvalidJson(
+                    "missing choices[0].message.content in openai response".into(),
+                )
+            })?;
+
+        Ok(strip_thinking_tags(&raw))
+    }
+
+    async fn is_available(&self) -> bool {
+        !self.key.is_empty()
+    }
+
+    fn name(&self) -> &str {
+        "openai"
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn empty_key_returns_none() {
+        assert!(OpenAiProvider::new(Some(String::new()), None, None).is_none());
+    }
+
+    #[test]
+    fn explicit_key_constructs() {
+        let provider = OpenAiProvider::new(
+            Some("test-key-123".into()),
+            Some("https://api.openai.com/v1".into()),
+            Some("gpt-4o-mini".into()),
+        )
+        .expect("should construct");
+        assert_eq!(provider.name(), "openai");
+        assert_eq!(provider.default_model, "gpt-4o-mini");
+        assert_eq!(provider.base_url, "https://api.openai.com/v1");
+        assert_eq!(provider.key, "test-key-123");
+    }
+
+    #[test]
+    fn custom_base_url_and_model() {
+        let provider = OpenAiProvider::new(
+            Some("test-key".into()),
+            Some("http://localhost:8080/v1".into()),
+            Some("gpt-3.5-turbo".into()),
+        )
+        .unwrap();
+        assert_eq!(provider.base_url, "http://localhost:8080/v1");
+        assert_eq!(provider.default_model, "gpt-3.5-turbo");
+    }
+
+    #[test]
+    fn default_model_accessor() {
+        let provider = OpenAiProvider::new(
+            Some("test-key".into()),
+            Some("https://api.openai.com/v1".into()),
+            None,
+        )
+        .unwrap();
+        assert_eq!(provider.default_model(), "gpt-4o-mini");
+    }
+
+    // Env var fallback tests mutate process-global state and race with parallel tests.
+    // The code path is trivial (load_api_key -> env::var().ok()). Run in isolation if needed:
+    //   cargo test -p noxa-llm env_var -- --ignored --test-threads=1
+    #[test]
+    #[ignore = "mutates process env; run with --test-threads=1"]
+    fn env_var_key_fallback() {
+        unsafe { std::env::set_var("OPENAI_API_KEY", "sk-env-key") };
+        let provider = OpenAiProvider::new(None, None, None).expect("should construct from env");
+        assert_eq!(provider.key, "sk-env-key");
+        unsafe { std::env::remove_var("OPENAI_API_KEY") };
+    }
+
+    #[test]
+    #[ignore = "mutates process env; run with --test-threads=1"]
+    fn no_key_returns_none() {
+        unsafe { std::env::remove_var("OPENAI_API_KEY") };
+        assert!(OpenAiProvider::new(None, None, None).is_none());
+    }
+}
--- a/crates/noxa-llm/src/summarize.rs
+++ b/crates/noxa-llm/src/summarize.rs
@ -0,0 +1,124 @@
+/// LLM-powered content summarization. Keeps it simple: one function, one prompt.
+use crate::clean::strip_thinking_tags;
+use crate::error::LlmError;
+use crate::provider::{CompletionRequest, LlmProvider, Message};
+
+/// Summarize content using an LLM.
+/// Returns plain text (not JSON). Default is 3 sentences.
+pub async fn summarize(
+    content: &str,
+    max_sentences: Option<usize>,
+    provider: &dyn LlmProvider,
+    model: Option<&str>,
+) -> Result<String, LlmError> {
+    let n = max_sentences.unwrap_or(3);
+
+    let system = format!(
+        "You are a summarization engine. Summarize the following content in exactly {n} sentences. \
+         Output ONLY the summary, nothing else. No introductions, no questions, no formatting, no preamble."
+    );
+
+    let request = CompletionRequest {
+        model: model.unwrap_or_default().to_string(),
+        messages: vec![
+            Message {
+                role: "system".into(),
+                content: system,
+            },
+            Message {
+                role: "user".into(),
+                content: content.to_string(),
+            },
+        ],
+        temperature: Some(0.3),
+        max_tokens: None,
+        json_mode: false,
+    };
+
+    let response = provider.complete(&request).await?;
+
+    // Providers already strip thinking tags, but defense in depth for summarize
+    // since its output goes directly to the user as plain text
+    Ok(strip_thinking_tags(&response))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use async_trait::async_trait;
+
+    struct MockSummarizer;
+
+    #[async_trait]
+    impl LlmProvider for MockSummarizer {
+        async fn complete(&self, req: &CompletionRequest) -> Result<String, LlmError> {
+            // Verify the prompt is well-formed
+            let system = &req.messages[0].content;
+            assert!(system.contains("sentences"));
+            assert!(system.contains("summarization engine"));
+            assert!(!req.json_mode, "summarize should not use json_mode");
+            Ok("This is a test summary.".into())
+        }
+        async fn is_available(&self) -> bool {
+            true
+        }
+        fn name(&self) -> &str {
+            "mock"
+        }
+    }
+
+    #[tokio::test]
+    async fn summarize_returns_text() {
+        let result = summarize("Long article content...", None, &MockSummarizer, None)
+            .await
+            .unwrap();
+        assert_eq!(result, "This is a test summary.");
+    }
+
+    #[tokio::test]
+    async fn summarize_custom_sentence_count() {
+        // Verify custom count is passed through
+        struct CountChecker;
+
+        #[async_trait]
+        impl LlmProvider for CountChecker {
+            async fn complete(&self, req: &CompletionRequest) -> Result<String, LlmError> {
+                assert!(req.messages[0].content.contains("5 sentences"));
+                Ok("Summary.".into())
+            }
+            async fn is_available(&self) -> bool {
+                true
+            }
+            fn name(&self) -> &str {
+                "count_checker"
+            }
+        }
+
+        summarize("Content", Some(5), &CountChecker, None)
+            .await
+            .unwrap();
+    }
+
+    #[tokio::test]
+    async fn summarize_strips_thinking_tags() {
+        struct ThinkingMock;
+
+        #[async_trait]
+        impl LlmProvider for ThinkingMock {
+            async fn complete(&self, _req: &CompletionRequest) -> Result<String, LlmError> {
+                Ok("<think>let me analyze this</think>This is the clean summary.".into())
+            }
+            async fn is_available(&self) -> bool {
+                true
+            }
+            fn name(&self) -> &str {
+                "thinking_mock"
+            }
+        }
+
+        let result = summarize("Some content", None, &ThinkingMock, None)
+            .await
+            .unwrap();
+        assert_eq!(result, "This is the clean summary.");
+    }
+}
--- a/crates/noxa-llm/src/testing.rs
+++ b/crates/noxa-llm/src/testing.rs
@ -0,0 +1,48 @@
+/// Shared test utilities for noxa-llm.
+///
+/// Provides a configurable mock LLM provider for unit tests across
+/// extract, chain, and other modules that need a fake LLM backend.
+#[cfg(test)]
+pub(crate) mod mock {
+    use async_trait::async_trait;
+
+    use crate::error::LlmError;
+    use crate::provider::{CompletionRequest, LlmProvider};
+
+    /// A mock LLM provider that returns a canned response or error.
+    /// Covers the common test cases: success, failure, and availability.
+    pub struct MockProvider {
+        pub name: &'static str,
+        pub response: Result<String, String>,
+        pub available: bool,
+    }
+
+    impl MockProvider {
+        /// Shorthand for a mock that always succeeds with the given response.
+        pub fn ok(response: &str) -> Self {
+            Self {
+                name: "mock",
+                response: Ok(response.to_string()),
+                available: true,
+            }
+        }
+    }
+
+    #[async_trait]
+    impl LlmProvider for MockProvider {
+        async fn complete(&self, _request: &CompletionRequest) -> Result<String, LlmError> {
+            match &self.response {
+                Ok(text) => Ok(text.clone()),
+                Err(msg) => Err(LlmError::ProviderError(msg.clone())),
+            }
+        }
+
+        async fn is_available(&self) -> bool {
+            self.available
+        }
+
+        fn name(&self) -> &str {
+            self.name
+        }
+    }
+}