chore: rebrand webclaw to noxa

This commit is contained in:
Jacob Magar 2026-04-11 00:10:38 -04:00
parent a4c351d5ae
commit 8674b60b4e
86 changed files with 781 additions and 2121 deletions

View file

@ -0,0 +1,15 @@
[package]
name = "noxa-llm"
description = "LLM integration for noxa — local-first hybrid architecture (Ollama -> OpenAI -> Anthropic)"
version.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
async-trait = "0.1"
serde = { workspace = true }
serde_json = { workspace = true }
tokio = { workspace = true }
thiserror = { workspace = true }
tracing = { workspace = true }

View file

@ -0,0 +1,205 @@
/// Provider chain — tries providers in order until one succeeds.
/// Default order: Ollama (local, free) -> OpenAI -> Anthropic.
/// Only includes providers that are actually configured/available.
use async_trait::async_trait;
use tracing::{debug, warn};
use crate::error::LlmError;
use crate::provider::{CompletionRequest, LlmProvider};
use crate::providers::{
anthropic::AnthropicProvider, ollama::OllamaProvider, openai::OpenAiProvider,
};
pub struct ProviderChain {
providers: Vec<Box<dyn LlmProvider>>,
}
impl ProviderChain {
/// Build the default chain: Ollama -> OpenAI -> Anthropic.
/// Ollama is always added (availability checked at call time).
/// Cloud providers are only added if their API keys are configured.
pub async fn default() -> Self {
let mut providers: Vec<Box<dyn LlmProvider>> = Vec::new();
let ollama = OllamaProvider::new(None, None);
if ollama.is_available().await {
debug!("ollama is available, adding to chain");
providers.push(Box::new(ollama));
} else {
debug!("ollama not available, skipping");
}
if let Some(openai) = OpenAiProvider::new(None, None, None) {
debug!("openai configured, adding to chain");
providers.push(Box::new(openai));
}
if let Some(anthropic) = AnthropicProvider::new(None, None) {
debug!("anthropic configured, adding to chain");
providers.push(Box::new(anthropic));
}
Self { providers }
}
/// Build a chain with a single explicit provider.
pub fn single(provider: Box<dyn LlmProvider>) -> Self {
Self {
providers: vec![provider],
}
}
/// Build from an explicit list of providers.
pub fn from_providers(providers: Vec<Box<dyn LlmProvider>>) -> Self {
Self { providers }
}
/// How many providers are in the chain.
pub fn len(&self) -> usize {
self.providers.len()
}
pub fn is_empty(&self) -> bool {
self.providers.is_empty()
}
}
/// ProviderChain itself implements LlmProvider, so it can be used anywhere
/// a single provider is expected. This makes the CLI simple: build a chain
/// or a single provider, pass either as `Box<dyn LlmProvider>`.
#[async_trait]
impl LlmProvider for ProviderChain {
async fn complete(&self, request: &CompletionRequest) -> Result<String, LlmError> {
if self.providers.is_empty() {
return Err(LlmError::NoProviders);
}
let mut errors = Vec::new();
for provider in &self.providers {
debug!(provider = provider.name(), "attempting completion");
match provider.complete(request).await {
Ok(response) => {
debug!(provider = provider.name(), "completion succeeded");
return Ok(response);
}
Err(e) => {
warn!(provider = provider.name(), error = %e, "provider failed, trying next");
errors.push(format!("{}: {e}", provider.name()));
}
}
}
Err(LlmError::AllProvidersFailed(errors.join("; ")))
}
async fn is_available(&self) -> bool {
!self.providers.is_empty()
}
fn name(&self) -> &str {
"chain"
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::provider::Message;
use crate::testing::mock::MockProvider;
fn test_request() -> CompletionRequest {
CompletionRequest {
model: String::new(),
messages: vec![Message {
role: "user".into(),
content: "test".into(),
}],
temperature: None,
max_tokens: None,
json_mode: false,
}
}
#[tokio::test]
async fn empty_chain_returns_no_providers() {
let chain = ProviderChain::from_providers(vec![]);
let result = chain.complete(&test_request()).await;
assert!(matches!(result, Err(LlmError::NoProviders)));
}
#[tokio::test]
async fn single_provider_success() {
let chain = ProviderChain::from_providers(vec![Box::new(MockProvider {
name: "mock",
response: Ok("hello".into()),
available: true,
})]);
let result = chain.complete(&test_request()).await.unwrap();
assert_eq!(result, "hello");
}
#[tokio::test]
async fn fallback_on_first_failure() {
let chain = ProviderChain::from_providers(vec![
Box::new(MockProvider {
name: "failing",
response: Err("connection refused".into()),
available: true,
}),
Box::new(MockProvider {
name: "backup",
response: Ok("from backup".into()),
available: true,
}),
]);
let result = chain.complete(&test_request()).await.unwrap();
assert_eq!(result, "from backup");
}
#[tokio::test]
async fn all_fail_collects_errors() {
let chain = ProviderChain::from_providers(vec![
Box::new(MockProvider {
name: "a",
response: Err("timeout".into()),
available: true,
}),
Box::new(MockProvider {
name: "b",
response: Err("rate limited".into()),
available: true,
}),
]);
let result = chain.complete(&test_request()).await;
match result {
Err(LlmError::AllProvidersFailed(msg)) => {
assert!(msg.contains("timeout"));
assert!(msg.contains("rate limited"));
}
other => panic!("expected AllProvidersFailed, got {other:?}"),
}
}
#[tokio::test]
async fn chain_length() {
let chain = ProviderChain::from_providers(vec![
Box::new(MockProvider {
name: "a",
response: Ok("ok".into()),
available: true,
}),
Box::new(MockProvider {
name: "b",
response: Ok("ok".into()),
available: true,
}),
]);
assert_eq!(chain.len(), 2);
assert!(!chain.is_empty());
}
}

View file

@ -0,0 +1,124 @@
//! Post-processing for LLM responses.
//! Strips chain-of-thought reasoning tags that models like qwen3 emit.
//! Applied to every provider response so callers never see internal reasoning.
/// Strip `<think>...</think>` blocks from LLM responses.
/// Models like qwen3 wrap internal chain-of-thought reasoning in these tags.
/// Handles multiline content, multiple blocks, and partial/malformed tags.
pub fn strip_thinking_tags(text: &str) -> String {
let mut result = String::with_capacity(text.len());
let mut remaining = text;
while let Some(start) = remaining.find("<think>") {
// Keep everything before the opening tag
result.push_str(&remaining[..start]);
// Find the matching closing tag
let after_open = &remaining[start + 7..]; // len("<think>") == 7
if let Some(end) = after_open.find("</think>") {
remaining = &after_open[end + 8..]; // len("</think>") == 8
} else {
// Unclosed <think> — discard everything after it (the model is still "thinking")
remaining = "";
}
}
result.push_str(remaining);
// Clean up: leftover </think> or /think fragments from partial responses
let result = result.replace("</think>", "");
let result = result.replace("/think", "");
// Collapse leading whitespace left behind after stripping
let trimmed = result.trim();
if trimmed.is_empty() {
String::new()
} else {
trimmed.to_string()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn strips_simple_thinking_block() {
let input = "<think>reasoning here</think>actual response";
assert_eq!(strip_thinking_tags(input), "actual response");
}
#[test]
fn strips_multiline_thinking() {
let input = "<think>\nlong\nthinking\nprocess\n</think>\nclean output";
assert_eq!(strip_thinking_tags(input), "clean output");
}
#[test]
fn passthrough_no_tags() {
let input = "no thinking tags here";
assert_eq!(strip_thinking_tags(input), "no thinking tags here");
}
#[test]
fn strips_partial_think_at_end() {
let input = "some text /think";
assert_eq!(strip_thinking_tags(input), "some text");
}
#[test]
fn strips_orphan_closing_tag() {
let input = "some text</think> more text";
assert_eq!(strip_thinking_tags(input), "some text more text");
}
#[test]
fn strips_multiple_thinking_blocks() {
let input = "<think>first</think>hello <think>second</think>world";
assert_eq!(strip_thinking_tags(input), "hello world");
}
#[test]
fn handles_unclosed_think_tag() {
// Model started thinking and never closed — discard everything after <think>
let input = "good content<think>still reasoning...";
assert_eq!(strip_thinking_tags(input), "good content");
}
#[test]
fn handles_empty_thinking_block() {
let input = "<think></think>content";
assert_eq!(strip_thinking_tags(input), "content");
}
#[test]
fn handles_only_thinking() {
let input = "<think>just thinking, no output</think>";
assert_eq!(strip_thinking_tags(input), "");
}
#[test]
fn preserves_json_content() {
let input = "<think>let me analyze...</think>{\"key\": \"value\", \"count\": 42}";
assert_eq!(
strip_thinking_tags(input),
"{\"key\": \"value\", \"count\": 42}"
);
}
#[test]
fn real_world_extract_leak() {
// Actual bug: qwen3 leaked "/think" into JSON values
let input = "<think>analyzing the page</think>{\"learn_more\": \"Learn more\"}";
assert_eq!(
strip_thinking_tags(input),
"{\"learn_more\": \"Learn more\"}"
);
}
#[test]
fn thinking_with_newlines_before_json() {
let input = "<think>\nstep 1\nstep 2\n</think>\n\n{\"result\": true}";
assert_eq!(strip_thinking_tags(input), "{\"result\": true}");
}
}

View file

@ -0,0 +1,18 @@
/// LLM-specific errors. Kept flat — one enum covers transport, provider, and parsing failures.
#[derive(Debug, thiserror::Error)]
pub enum LlmError {
#[error("HTTP error: {0}")]
Http(#[from] reqwest::Error),
#[error("no providers available")]
NoProviders,
#[error("all providers failed: {0}")]
AllProvidersFailed(String),
#[error("invalid JSON response: {0}")]
InvalidJson(String),
#[error("provider error: {0}")]
ProviderError(String),
}

View file

@ -0,0 +1,187 @@
/// Schema-based and prompt-based LLM extraction.
/// Both functions build a system prompt, send content to the LLM, and parse JSON back.
use crate::clean::strip_thinking_tags;
use crate::error::LlmError;
use crate::provider::{CompletionRequest, LlmProvider, Message};
/// Extract structured JSON from content using a JSON schema.
/// The schema tells the LLM exactly what fields to extract and their types.
pub async fn extract_json(
content: &str,
schema: &serde_json::Value,
provider: &dyn LlmProvider,
model: Option<&str>,
) -> Result<serde_json::Value, LlmError> {
let system = format!(
"You are a JSON extraction engine. Extract data from the content according to this schema.\n\
Return ONLY valid JSON matching the schema. No explanations, no markdown, no commentary.\n\n\
Schema:\n```json\n{}\n```",
serde_json::to_string_pretty(schema).unwrap_or_else(|_| schema.to_string())
);
let request = CompletionRequest {
model: model.unwrap_or_default().to_string(),
messages: vec![
Message {
role: "system".into(),
content: system,
},
Message {
role: "user".into(),
content: content.to_string(),
},
],
temperature: Some(0.0),
max_tokens: None,
json_mode: true,
};
let response = provider.complete(&request).await?;
parse_json_response(&response)
}
/// Extract information using a natural language prompt.
/// More flexible than schema extraction — the user describes what they want.
pub async fn extract_with_prompt(
content: &str,
prompt: &str,
provider: &dyn LlmProvider,
model: Option<&str>,
) -> Result<serde_json::Value, LlmError> {
let system = format!(
"You are a JSON extraction engine. Extract information from the content based on these instructions.\n\
Return ONLY valid JSON. No explanations, no markdown, no commentary.\n\n\
Instructions: {prompt}"
);
let request = CompletionRequest {
model: model.unwrap_or_default().to_string(),
messages: vec![
Message {
role: "system".into(),
content: system,
},
Message {
role: "user".into(),
content: content.to_string(),
},
],
temperature: Some(0.0),
max_tokens: None,
json_mode: true,
};
let response = provider.complete(&request).await?;
parse_json_response(&response)
}
/// Parse an LLM response string as JSON. Handles common edge cases:
/// - Thinking tags (`<think>...</think>`)
/// - Markdown code fences (```json ... ```)
/// - Leading/trailing whitespace
fn parse_json_response(response: &str) -> Result<serde_json::Value, LlmError> {
// Strip thinking tags before any JSON parsing — providers already do this,
// but defense in depth for any caller that bypasses the provider layer
let cleaned = strip_thinking_tags(response);
let trimmed = cleaned.trim();
// Strip markdown code fences if present
let json_str = if trimmed.starts_with("```") {
let without_opener = trimmed
.strip_prefix("```json")
.or_else(|| trimmed.strip_prefix("```"))
.unwrap_or(trimmed);
without_opener
.strip_suffix("```")
.unwrap_or(without_opener)
.trim()
} else {
trimmed
};
serde_json::from_str(json_str)
.map_err(|e| LlmError::InvalidJson(format!("{e} — raw response: {response}")))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::testing::mock::MockProvider;
#[test]
fn parse_clean_json() {
let result = parse_json_response(r#"{"name": "Rust", "version": 2024}"#).unwrap();
assert_eq!(result["name"], "Rust");
assert_eq!(result["version"], 2024);
}
#[test]
fn parse_json_with_code_fence() {
let response = "```json\n{\"key\": \"value\"}\n```";
let result = parse_json_response(response).unwrap();
assert_eq!(result["key"], "value");
}
#[test]
fn parse_json_with_whitespace() {
let response = " \n {\"ok\": true} \n ";
let result = parse_json_response(response).unwrap();
assert_eq!(result["ok"], true);
}
#[test]
fn parse_invalid_json() {
let result = parse_json_response("not json at all");
assert!(matches!(result, Err(LlmError::InvalidJson(_))));
}
#[test]
fn parse_json_with_thinking_tags() {
let response = "<think>analyzing the content</think>{\"title\": \"Hello\"}";
let result = parse_json_response(response).unwrap();
assert_eq!(result["title"], "Hello");
}
#[test]
fn parse_json_with_thinking_and_code_fence() {
let response = "<think>let me think</think>\n```json\n{\"key\": \"value\"}\n```";
let result = parse_json_response(response).unwrap();
assert_eq!(result["key"], "value");
}
#[tokio::test]
async fn extract_json_uses_schema_in_prompt() {
let mock = MockProvider::ok(r#"{"title": "Test Article", "author": "Jane"}"#);
let schema = serde_json::json!({
"type": "object",
"properties": {
"title": { "type": "string" },
"author": { "type": "string" }
}
});
let result = extract_json("Some article content by Jane", &schema, &mock, None)
.await
.unwrap();
assert_eq!(result["title"], "Test Article");
assert_eq!(result["author"], "Jane");
}
#[tokio::test]
async fn extract_with_prompt_returns_json() {
let mock = MockProvider::ok(r#"{"emails": ["test@example.com"]}"#);
let result = extract_with_prompt(
"Contact us at test@example.com",
"Find all email addresses",
&mock,
None,
)
.await
.unwrap();
assert_eq!(result["emails"][0], "test@example.com");
}
}

View file

@ -0,0 +1,19 @@
/// noxa-llm: LLM integration with local-first hybrid architecture.
///
/// Provider chain tries Ollama (local) first, falls back to OpenAI, then Anthropic.
/// Provides schema-based extraction, prompt extraction, and summarization
/// on top of noxa-core's content pipeline.
pub mod chain;
pub mod clean;
pub mod error;
pub mod extract;
pub mod provider;
pub mod providers;
pub mod summarize;
#[cfg(test)]
pub(crate) mod testing;
pub use chain::ProviderChain;
pub use clean::strip_thinking_tags;
pub use error::LlmError;
pub use provider::{CompletionRequest, LlmProvider, Message};

View file

@ -0,0 +1,34 @@
/// Core LLM abstraction. Every backend (Ollama, OpenAI, Anthropic) implements `LlmProvider`.
/// The trait is intentionally minimal — just completion and availability check.
use async_trait::async_trait;
use serde::{Deserialize, Serialize};
use crate::error::LlmError;
#[derive(Debug, Clone)]
pub struct CompletionRequest {
pub model: String,
pub messages: Vec<Message>,
pub temperature: Option<f32>,
pub max_tokens: Option<u32>,
/// When true, instruct the provider to return valid JSON.
pub json_mode: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Message {
pub role: String,
pub content: String,
}
#[async_trait]
pub trait LlmProvider: Send + Sync {
/// Send a completion request and return the assistant's text response.
async fn complete(&self, request: &CompletionRequest) -> Result<String, LlmError>;
/// Quick health check — is this provider reachable / configured?
async fn is_available(&self) -> bool;
/// Human-readable name for logging.
fn name(&self) -> &str;
}

View file

@ -0,0 +1,170 @@
/// Anthropic provider — Claude models via api.anthropic.com.
/// Anthropic's API differs from OpenAI: system message is a top-level param,
/// not part of the messages array.
use async_trait::async_trait;
use serde_json::json;
use crate::clean::strip_thinking_tags;
use crate::error::LlmError;
use crate::provider::{CompletionRequest, LlmProvider};
use super::load_api_key;
const ANTHROPIC_API_URL: &str = "https://api.anthropic.com/v1/messages";
const ANTHROPIC_VERSION: &str = "2023-06-01";
pub struct AnthropicProvider {
client: reqwest::Client,
key: String,
default_model: String,
}
impl AnthropicProvider {
/// Returns `None` if no API key is available (param or env).
pub fn new(key_override: Option<String>, model: Option<String>) -> Option<Self> {
let key = load_api_key(key_override, "ANTHROPIC_API_KEY")?;
Some(Self {
client: reqwest::Client::new(),
key,
default_model: model.unwrap_or_else(|| "claude-sonnet-4-20250514".into()),
})
}
pub fn default_model(&self) -> &str {
&self.default_model
}
}
#[async_trait]
impl LlmProvider for AnthropicProvider {
async fn complete(&self, request: &CompletionRequest) -> Result<String, LlmError> {
let model = if request.model.is_empty() {
&self.default_model
} else {
&request.model
};
// Anthropic separates system from messages. Extract the system message if present.
let system_content: Option<String> = request
.messages
.iter()
.find(|m| m.role == "system")
.map(|m| m.content.clone());
let messages: Vec<serde_json::Value> = request
.messages
.iter()
.filter(|m| m.role != "system")
.map(|m| json!({ "role": m.role, "content": m.content }))
.collect();
let mut body = json!({
"model": model,
"messages": messages,
"max_tokens": request.max_tokens.unwrap_or(4096),
});
if let Some(system) = &system_content {
body["system"] = json!(system);
}
if let Some(temp) = request.temperature {
body["temperature"] = json!(temp);
}
let resp = self
.client
.post(ANTHROPIC_API_URL)
.header("x-api-key", &self.key)
.header("anthropic-version", ANTHROPIC_VERSION)
.header("content-type", "application/json")
.json(&body)
.send()
.await?;
if !resp.status().is_success() {
let status = resp.status();
let text = resp.text().await.unwrap_or_default();
let safe_text = if text.len() > 500 {
&text[..500]
} else {
&text
};
return Err(LlmError::ProviderError(format!(
"anthropic returned {status}: {safe_text}"
)));
}
let json: serde_json::Value = resp.json().await?;
// Anthropic response: {"content": [{"type": "text", "text": "..."}]}
let raw = json["content"][0]["text"]
.as_str()
.map(String::from)
.ok_or_else(|| {
LlmError::InvalidJson("missing content[0].text in anthropic response".into())
})?;
Ok(strip_thinking_tags(&raw))
}
async fn is_available(&self) -> bool {
!self.key.is_empty()
}
fn name(&self) -> &str {
"anthropic"
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn empty_key_returns_none() {
assert!(AnthropicProvider::new(Some(String::new()), None).is_none());
}
#[test]
fn explicit_key_constructs() {
let provider =
AnthropicProvider::new(Some("sk-ant-test".into()), None).expect("should construct");
assert_eq!(provider.name(), "anthropic");
assert_eq!(provider.default_model, "claude-sonnet-4-20250514");
assert_eq!(provider.key, "sk-ant-test");
}
#[test]
fn custom_model() {
let provider =
AnthropicProvider::new(Some("sk-ant-test".into()), Some("claude-3-haiku".into()))
.unwrap();
assert_eq!(provider.default_model, "claude-3-haiku");
}
#[test]
fn default_model_accessor() {
let provider = AnthropicProvider::new(Some("sk-ant-test".into()), None).unwrap();
assert_eq!(provider.default_model(), "claude-sonnet-4-20250514");
}
// Env var fallback tests mutate process-global state and race with parallel tests.
// The code path is trivial (load_api_key -> env::var().ok()). Run in isolation if needed:
// cargo test -p noxa-llm env_var -- --ignored --test-threads=1
#[test]
#[ignore = "mutates process env; run with --test-threads=1"]
fn env_var_key_fallback() {
unsafe { std::env::set_var("ANTHROPIC_API_KEY", "sk-ant-env") };
let provider = AnthropicProvider::new(None, None).expect("should construct from env");
assert_eq!(provider.key, "sk-ant-env");
unsafe { std::env::remove_var("ANTHROPIC_API_KEY") };
}
#[test]
#[ignore = "mutates process env; run with --test-threads=1"]
fn no_key_returns_none() {
unsafe { std::env::remove_var("ANTHROPIC_API_KEY") };
assert!(AnthropicProvider::new(None, None).is_none());
}
}

View file

@ -0,0 +1,36 @@
pub mod anthropic;
pub mod ollama;
pub mod openai;
/// Load an API key from an explicit override or an environment variable.
/// Returns `None` if neither is set or the value is empty.
pub(crate) fn load_api_key(override_key: Option<String>, env_var: &str) -> Option<String> {
let key = override_key.or_else(|| std::env::var(env_var).ok())?;
if key.is_empty() { None } else { Some(key) }
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn override_key_takes_precedence() {
assert_eq!(
load_api_key(Some("explicit".into()), "NONEXISTENT_VAR"),
Some("explicit".into())
);
}
#[test]
fn empty_override_returns_none() {
assert_eq!(load_api_key(Some(String::new()), "NONEXISTENT_VAR"), None);
}
#[test]
fn none_override_with_no_env_returns_none() {
assert_eq!(
load_api_key(None, "NOXA_TEST_NONEXISTENT_KEY_12345"),
None
);
}
}

View file

@ -0,0 +1,161 @@
/// Ollama provider — talks to a local Ollama instance (default localhost:11434).
/// First choice in the provider chain: free, private, fast on Apple Silicon.
use async_trait::async_trait;
use serde_json::json;
use crate::clean::strip_thinking_tags;
use crate::error::LlmError;
use crate::provider::{CompletionRequest, LlmProvider};
pub struct OllamaProvider {
client: reqwest::Client,
base_url: String,
default_model: String,
}
impl OllamaProvider {
pub fn new(base_url: Option<String>, model: Option<String>) -> Self {
let base_url = base_url
.or_else(|| std::env::var("OLLAMA_HOST").ok())
.unwrap_or_else(|| "http://localhost:11434".into());
let default_model = model
.or_else(|| std::env::var("OLLAMA_MODEL").ok())
.unwrap_or_else(|| "qwen3:8b".into());
Self {
client: reqwest::Client::new(),
base_url,
default_model,
}
}
pub fn default_model(&self) -> &str {
&self.default_model
}
}
#[async_trait]
impl LlmProvider for OllamaProvider {
async fn complete(&self, request: &CompletionRequest) -> Result<String, LlmError> {
let model = if request.model.is_empty() {
&self.default_model
} else {
&request.model
};
let messages: Vec<serde_json::Value> = request
.messages
.iter()
.map(|m| json!({ "role": m.role, "content": m.content }))
.collect();
let mut body = json!({
"model": model,
"messages": messages,
"stream": false,
"think": false,
});
if request.json_mode {
body["format"] = json!("json");
}
if let Some(temp) = request.temperature {
body["options"] = json!({ "temperature": temp });
}
let url = format!("{}/api/chat", self.base_url);
let resp = self.client.post(&url).json(&body).send().await?;
if !resp.status().is_success() {
let status = resp.status();
let text = resp.text().await.unwrap_or_default();
let safe_text = if text.len() > 500 {
&text[..500]
} else {
&text
};
return Err(LlmError::ProviderError(format!(
"ollama returned {status}: {safe_text}"
)));
}
let json: serde_json::Value = resp.json().await?;
let raw = json["message"]["content"]
.as_str()
.map(String::from)
.ok_or_else(|| {
LlmError::InvalidJson(format!(
"missing message.content in ollama response: {json}"
))
})?;
Ok(strip_thinking_tags(&raw))
}
async fn is_available(&self) -> bool {
let url = format!("{}/api/tags", self.base_url);
matches!(self.client.get(&url).send().await, Ok(r) if r.status().is_success())
}
fn name(&self) -> &str {
"ollama"
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn explicit_params_used() {
let provider = OllamaProvider::new(
Some("http://gpu-box:11434".into()),
Some("llama3:70b".into()),
);
assert_eq!(provider.base_url, "http://gpu-box:11434");
assert_eq!(provider.default_model, "llama3:70b");
assert_eq!(provider.name(), "ollama");
}
#[test]
fn explicit_model_overrides_any_env() {
// Passing Some(...) bypasses env vars entirely -- no race possible
let provider = OllamaProvider::new(None, Some("mistral:7b".into()));
assert_eq!(provider.default_model, "mistral:7b");
}
#[test]
fn explicit_url_overrides_any_env() {
let provider = OllamaProvider::new(Some("http://local:11434".into()), None);
assert_eq!(provider.base_url, "http://local:11434");
}
#[test]
fn default_model_accessor() {
let provider = OllamaProvider::new(None, Some("phi3:mini".into()));
assert_eq!(provider.default_model(), "phi3:mini");
}
// Env var fallback is a trivial `env::var().ok()` -- not worth the flakiness
// of manipulating process-global state. Run in isolation if needed:
// cargo test -p noxa-llm env_var_fallback -- --ignored --test-threads=1
#[test]
#[ignore = "mutates process env; run with --test-threads=1"]
fn env_var_fallback() {
unsafe {
std::env::set_var("OLLAMA_HOST", "http://remote:11434");
std::env::set_var("OLLAMA_MODEL", "mistral:7b");
}
let provider = OllamaProvider::new(None, None);
assert_eq!(provider.base_url, "http://remote:11434");
assert_eq!(provider.default_model, "mistral:7b");
unsafe {
std::env::remove_var("OLLAMA_HOST");
std::env::remove_var("OLLAMA_MODEL");
}
}
}

View file

@ -0,0 +1,181 @@
/// OpenAI provider — works with api.openai.com and any OpenAI-compatible endpoint.
use async_trait::async_trait;
use serde_json::json;
use crate::clean::strip_thinking_tags;
use crate::error::LlmError;
use crate::provider::{CompletionRequest, LlmProvider};
use super::load_api_key;
pub struct OpenAiProvider {
client: reqwest::Client,
key: String,
base_url: String,
default_model: String,
}
impl OpenAiProvider {
/// Returns `None` if no API key is available (param or env).
pub fn new(
key_override: Option<String>,
base_url: Option<String>,
model: Option<String>,
) -> Option<Self> {
let key = load_api_key(key_override, "OPENAI_API_KEY")?;
Some(Self {
client: reqwest::Client::new(),
key,
base_url: base_url
.or_else(|| std::env::var("OPENAI_BASE_URL").ok())
.unwrap_or_else(|| "https://api.openai.com/v1".into()),
default_model: model.unwrap_or_else(|| "gpt-4o-mini".into()),
})
}
pub fn default_model(&self) -> &str {
&self.default_model
}
}
#[async_trait]
impl LlmProvider for OpenAiProvider {
async fn complete(&self, request: &CompletionRequest) -> Result<String, LlmError> {
let model = if request.model.is_empty() {
&self.default_model
} else {
&request.model
};
let messages: Vec<serde_json::Value> = request
.messages
.iter()
.map(|m| json!({ "role": m.role, "content": m.content }))
.collect();
let mut body = json!({
"model": model,
"messages": messages,
});
if request.json_mode {
body["response_format"] = json!({ "type": "json_object" });
}
if let Some(temp) = request.temperature {
body["temperature"] = json!(temp);
}
if let Some(max) = request.max_tokens {
body["max_tokens"] = json!(max);
}
let url = format!("{}/chat/completions", self.base_url);
let resp = self
.client
.post(&url)
.header("Authorization", format!("Bearer {}", self.key))
.json(&body)
.send()
.await?;
if !resp.status().is_success() {
let status = resp.status();
let text = resp.text().await.unwrap_or_default();
let safe_text = if text.len() > 500 {
&text[..500]
} else {
&text
};
return Err(LlmError::ProviderError(format!(
"openai returned {status}: {safe_text}"
)));
}
let json: serde_json::Value = resp.json().await?;
let raw = json["choices"][0]["message"]["content"]
.as_str()
.map(String::from)
.ok_or_else(|| {
LlmError::InvalidJson(
"missing choices[0].message.content in openai response".into(),
)
})?;
Ok(strip_thinking_tags(&raw))
}
async fn is_available(&self) -> bool {
!self.key.is_empty()
}
fn name(&self) -> &str {
"openai"
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn empty_key_returns_none() {
assert!(OpenAiProvider::new(Some(String::new()), None, None).is_none());
}
#[test]
fn explicit_key_constructs() {
let provider = OpenAiProvider::new(
Some("test-key-123".into()),
Some("https://api.openai.com/v1".into()),
Some("gpt-4o-mini".into()),
)
.expect("should construct");
assert_eq!(provider.name(), "openai");
assert_eq!(provider.default_model, "gpt-4o-mini");
assert_eq!(provider.base_url, "https://api.openai.com/v1");
assert_eq!(provider.key, "test-key-123");
}
#[test]
fn custom_base_url_and_model() {
let provider = OpenAiProvider::new(
Some("test-key".into()),
Some("http://localhost:8080/v1".into()),
Some("gpt-3.5-turbo".into()),
)
.unwrap();
assert_eq!(provider.base_url, "http://localhost:8080/v1");
assert_eq!(provider.default_model, "gpt-3.5-turbo");
}
#[test]
fn default_model_accessor() {
let provider = OpenAiProvider::new(
Some("test-key".into()),
Some("https://api.openai.com/v1".into()),
None,
)
.unwrap();
assert_eq!(provider.default_model(), "gpt-4o-mini");
}
// Env var fallback tests mutate process-global state and race with parallel tests.
// The code path is trivial (load_api_key -> env::var().ok()). Run in isolation if needed:
// cargo test -p noxa-llm env_var -- --ignored --test-threads=1
#[test]
#[ignore = "mutates process env; run with --test-threads=1"]
fn env_var_key_fallback() {
unsafe { std::env::set_var("OPENAI_API_KEY", "sk-env-key") };
let provider = OpenAiProvider::new(None, None, None).expect("should construct from env");
assert_eq!(provider.key, "sk-env-key");
unsafe { std::env::remove_var("OPENAI_API_KEY") };
}
#[test]
#[ignore = "mutates process env; run with --test-threads=1"]
fn no_key_returns_none() {
unsafe { std::env::remove_var("OPENAI_API_KEY") };
assert!(OpenAiProvider::new(None, None, None).is_none());
}
}

View file

@ -0,0 +1,124 @@
/// LLM-powered content summarization. Keeps it simple: one function, one prompt.
use crate::clean::strip_thinking_tags;
use crate::error::LlmError;
use crate::provider::{CompletionRequest, LlmProvider, Message};
/// Summarize content using an LLM.
/// Returns plain text (not JSON). Default is 3 sentences.
pub async fn summarize(
content: &str,
max_sentences: Option<usize>,
provider: &dyn LlmProvider,
model: Option<&str>,
) -> Result<String, LlmError> {
let n = max_sentences.unwrap_or(3);
let system = format!(
"You are a summarization engine. Summarize the following content in exactly {n} sentences. \
Output ONLY the summary, nothing else. No introductions, no questions, no formatting, no preamble."
);
let request = CompletionRequest {
model: model.unwrap_or_default().to_string(),
messages: vec![
Message {
role: "system".into(),
content: system,
},
Message {
role: "user".into(),
content: content.to_string(),
},
],
temperature: Some(0.3),
max_tokens: None,
json_mode: false,
};
let response = provider.complete(&request).await?;
// Providers already strip thinking tags, but defense in depth for summarize
// since its output goes directly to the user as plain text
Ok(strip_thinking_tags(&response))
}
#[cfg(test)]
mod tests {
use super::*;
use async_trait::async_trait;
struct MockSummarizer;
#[async_trait]
impl LlmProvider for MockSummarizer {
async fn complete(&self, req: &CompletionRequest) -> Result<String, LlmError> {
// Verify the prompt is well-formed
let system = &req.messages[0].content;
assert!(system.contains("sentences"));
assert!(system.contains("summarization engine"));
assert!(!req.json_mode, "summarize should not use json_mode");
Ok("This is a test summary.".into())
}
async fn is_available(&self) -> bool {
true
}
fn name(&self) -> &str {
"mock"
}
}
#[tokio::test]
async fn summarize_returns_text() {
let result = summarize("Long article content...", None, &MockSummarizer, None)
.await
.unwrap();
assert_eq!(result, "This is a test summary.");
}
#[tokio::test]
async fn summarize_custom_sentence_count() {
// Verify custom count is passed through
struct CountChecker;
#[async_trait]
impl LlmProvider for CountChecker {
async fn complete(&self, req: &CompletionRequest) -> Result<String, LlmError> {
assert!(req.messages[0].content.contains("5 sentences"));
Ok("Summary.".into())
}
async fn is_available(&self) -> bool {
true
}
fn name(&self) -> &str {
"count_checker"
}
}
summarize("Content", Some(5), &CountChecker, None)
.await
.unwrap();
}
#[tokio::test]
async fn summarize_strips_thinking_tags() {
struct ThinkingMock;
#[async_trait]
impl LlmProvider for ThinkingMock {
async fn complete(&self, _req: &CompletionRequest) -> Result<String, LlmError> {
Ok("<think>let me analyze this</think>This is the clean summary.".into())
}
async fn is_available(&self) -> bool {
true
}
fn name(&self) -> &str {
"thinking_mock"
}
}
let result = summarize("Some content", None, &ThinkingMock, None)
.await
.unwrap();
assert_eq!(result, "This is the clean summary.");
}
}

View file

@ -0,0 +1,48 @@
/// Shared test utilities for noxa-llm.
///
/// Provides a configurable mock LLM provider for unit tests across
/// extract, chain, and other modules that need a fake LLM backend.
#[cfg(test)]
pub(crate) mod mock {
use async_trait::async_trait;
use crate::error::LlmError;
use crate::provider::{CompletionRequest, LlmProvider};
/// A mock LLM provider that returns a canned response or error.
/// Covers the common test cases: success, failure, and availability.
pub struct MockProvider {
pub name: &'static str,
pub response: Result<String, String>,
pub available: bool,
}
impl MockProvider {
/// Shorthand for a mock that always succeeds with the given response.
pub fn ok(response: &str) -> Self {
Self {
name: "mock",
response: Ok(response.to_string()),
available: true,
}
}
}
#[async_trait]
impl LlmProvider for MockProvider {
async fn complete(&self, _request: &CompletionRequest) -> Result<String, LlmError> {
match &self.response {
Ok(text) => Ok(text.clone()),
Err(msg) => Err(LlmError::ProviderError(msg.clone())),
}
}
async fn is_available(&self) -> bool {
self.available
}
fn name(&self) -> &str {
self.name
}
}
}