chore: rebrand webclaw to noxa

This commit is contained in:
Jacob Magar 2026-04-11 00:10:38 -04:00
parent a4c351d5ae
commit 8674b60b4e
86 changed files with 781 additions and 2121 deletions

View file

@ -1,19 +1,19 @@
[package]
name = "webclaw-cli"
name = "noxa-cli"
description = "CLI for extracting web content into LLM-optimized formats"
version.workspace = true
edition.workspace = true
license.workspace = true
[[bin]]
name = "webclaw"
name = "noxa"
path = "src/main.rs"
[dependencies]
webclaw-core = { workspace = true }
webclaw-fetch = { workspace = true }
webclaw-llm = { workspace = true }
webclaw-pdf = { workspace = true }
noxa-core = { workspace = true }
noxa-fetch = { workspace = true }
noxa-llm = { workspace = true }
noxa-pdf = { workspace = true }
dotenvy = { workspace = true }
rand = "0.8"
serde_json = { workspace = true }

View file

@ -1,16 +1,16 @@
/// Cloud API client for automatic fallback when local extraction fails.
///
/// When WEBCLAW_API_KEY is set (or --api-key is passed), the CLI can fall back
/// to api.webclaw.io for bot-protected or JS-rendered sites. With --cloud flag,
/// When NOXA_API_KEY is set (or --api-key is passed), the CLI can fall back
/// to api.noxa.io for bot-protected or JS-rendered sites. With --cloud flag,
/// all requests go through the cloud API directly.
///
/// NOTE: The canonical, full-featured cloud module lives in webclaw-mcp/src/cloud.rs
/// NOTE: The canonical, full-featured cloud module lives in noxa-mcp/src/cloud.rs
/// (smart_fetch, bot detection, JS rendering checks). This is the minimal subset
/// needed by the CLI. Kept separate to avoid pulling in rmcp via webclaw-mcp.
/// and adding webclaw-mcp as a dependency would pull in rmcp.
/// needed by the CLI. Kept separate to avoid pulling in rmcp via noxa-mcp.
/// and adding noxa-mcp as a dependency would pull in rmcp.
use serde_json::{Value, json};
const API_BASE: &str = "https://api.webclaw.io/v1";
const API_BASE: &str = "https://api.noxa.io/v1";
pub struct CloudClient {
api_key: String,
@ -18,11 +18,11 @@ pub struct CloudClient {
}
impl CloudClient {
/// Create from explicit key or WEBCLAW_API_KEY env var.
/// Create from explicit key or NOXA_API_KEY env var.
pub fn new(explicit_key: Option<&str>) -> Option<Self> {
let key = explicit_key
.map(String::from)
.or_else(|| std::env::var("WEBCLAW_API_KEY").ok())
.or_else(|| std::env::var("NOXA_API_KEY").ok())
.filter(|k| !k.is_empty())?;
Some(Self {

View file

@ -1,5 +1,5 @@
#![allow(dead_code)]
/// CLI entry point -- wires webclaw-core and webclaw-fetch into a single command.
/// CLI entry point -- wires noxa-core and noxa-fetch into a single command.
/// All extraction and fetching logic lives in sibling crates; this is pure plumbing.
mod cloud;
@ -11,16 +11,16 @@ use std::sync::atomic::{AtomicBool, Ordering};
use clap::{Parser, ValueEnum};
use tracing_subscriber::EnvFilter;
use webclaw_core::{
use noxa_core::{
ChangeStatus, ContentDiff, ExtractionOptions, ExtractionResult, Metadata, extract_with_options,
to_llm_text,
};
use webclaw_fetch::{
use noxa_fetch::{
BatchExtractResult, BrowserProfile, CrawlConfig, CrawlResult, Crawler, FetchClient,
FetchConfig, FetchResult, PageResult, SitemapEntry,
};
use webclaw_llm::LlmProvider;
use webclaw_pdf::PdfMode;
use noxa_llm::LlmProvider;
use noxa_pdf::PdfMode;
/// Known anti-bot challenge page titles (case-insensitive prefix match).
const ANTIBOT_TITLES: &[&str] = &[
@ -73,19 +73,19 @@ fn warn_empty(url: &str, reason: &EmptyReason) {
EmptyReason::Antibot => eprintln!(
"\x1b[33mwarning:\x1b[0m Anti-bot protection detected on {url}\n\
This site requires CAPTCHA solving or browser rendering.\n\
Use the webclaw Cloud API for automatic bypass: https://webclaw.io/pricing"
Use the noxa Cloud API for automatic bypass: https://noxa.io/pricing"
),
EmptyReason::JsRequired => eprintln!(
"\x1b[33mwarning:\x1b[0m No content extracted from {url}\n\
This site requires JavaScript rendering (SPA).\n\
Use the webclaw Cloud API for JS rendering: https://webclaw.io/pricing"
Use the noxa Cloud API for JS rendering: https://noxa.io/pricing"
),
EmptyReason::None => {}
}
}
#[derive(Parser)]
#[command(name = "webclaw", about = "Extract web content for LLMs", version)]
#[command(name = "noxa", about = "Extract web content for LLMs", version)]
struct Cli {
/// URLs to fetch (multiple allowed)
#[arg()]
@ -104,11 +104,11 @@ struct Cli {
browser: Browser,
/// Proxy URL (http://user:pass@host:port or socks5://host:port)
#[arg(short, long, env = "WEBCLAW_PROXY")]
#[arg(short, long, env = "NOXA_PROXY")]
proxy: Option<String>,
/// File with proxies (host:port:user:pass, one per line). Rotates per request.
#[arg(long, env = "WEBCLAW_PROXY_FILE")]
#[arg(long, env = "NOXA_PROXY_FILE")]
proxy_file: Option<String>,
/// Request timeout in seconds
@ -177,7 +177,7 @@ struct Cli {
/// Webhook URL: POST a JSON payload when an operation completes.
/// Works with crawl, batch, watch (on change), and single URL modes.
#[arg(long, env = "WEBCLAW_WEBHOOK_URL")]
#[arg(long, env = "NOXA_WEBHOOK_URL")]
webhook: Option<String>,
/// Extract brand identity (colors, fonts, logo)
@ -248,20 +248,20 @@ struct Cli {
summarize: Option<usize>,
/// Force a specific LLM provider (ollama, openai, anthropic)
#[arg(long, env = "WEBCLAW_LLM_PROVIDER")]
#[arg(long, env = "NOXA_LLM_PROVIDER")]
llm_provider: Option<String>,
/// Override the LLM model name
#[arg(long, env = "WEBCLAW_LLM_MODEL")]
#[arg(long, env = "NOXA_LLM_MODEL")]
llm_model: Option<String>,
/// Override the LLM base URL (Ollama or OpenAI-compatible)
#[arg(long, env = "WEBCLAW_LLM_BASE_URL")]
#[arg(long, env = "NOXA_LLM_BASE_URL")]
llm_base_url: Option<String>,
// -- Cloud API options --
/// Webclaw Cloud API key for automatic fallback on bot-protected or JS-rendered sites
#[arg(long, env = "WEBCLAW_API_KEY")]
/// Noxa Cloud API key for automatic fallback on bot-protected or JS-rendered sites
#[arg(long, env = "NOXA_API_KEY")]
api_key: Option<String>,
/// Force all requests through the cloud API (skip local extraction)
@ -330,9 +330,9 @@ impl From<Browser> for BrowserProfile {
fn init_logging(verbose: bool) {
let filter = if verbose {
EnvFilter::new("webclaw=debug")
EnvFilter::new("noxa=debug")
} else {
EnvFilter::try_from_env("WEBCLAW_LOG").unwrap_or_else(|_| EnvFilter::new("warn"))
EnvFilter::try_from_env("NOXA_LOG").unwrap_or_else(|_| EnvFilter::new("warn"))
};
tracing_subscriber::fmt().with_env_filter(filter).init();
@ -347,7 +347,7 @@ fn build_fetch_config(cli: &Cli) -> FetchConfig {
let (proxy, proxy_pool) = if cli.proxy.is_some() {
(cli.proxy.clone(), Vec::new())
} else if let Some(ref path) = cli.proxy_file {
match webclaw_fetch::parse_proxy_file(path) {
match noxa_fetch::parse_proxy_file(path) {
Ok(pool) => (None, pool),
Err(e) => {
eprintln!("warning: {e}");
@ -356,7 +356,7 @@ fn build_fetch_config(cli: &Cli) -> FetchConfig {
}
} else if std::path::Path::new("proxies.txt").exists() {
// Auto-load proxies.txt from working directory if present
match webclaw_fetch::parse_proxy_file("proxies.txt") {
match noxa_fetch::parse_proxy_file("proxies.txt") {
Ok(pool) if !pool.is_empty() => {
eprintln!("loaded {} proxies from proxies.txt", pool.len());
(None, pool)
@ -652,7 +652,7 @@ async fn fetch_and_extract(cli: &Cli) -> Result<FetchOutput, String> {
// --cloud: skip local, go straight to cloud API
if cli.cloud {
let c =
cloud_client.ok_or("--cloud requires WEBCLAW_API_KEY (set via env or --api-key)")?;
cloud_client.ok_or("--cloud requires NOXA_API_KEY (set via env or --api-key)")?;
let options = build_extraction_options(cli);
let format_str = match cli.format {
OutputFormat::Markdown => "markdown",
@ -1349,7 +1349,7 @@ async fn run_map(cli: &Cli) -> Result<(), String> {
let client =
FetchClient::new(build_fetch_config(cli)).map_err(|e| format!("client error: {e}"))?;
let entries = webclaw_fetch::sitemap::discover(&client, url)
let entries = noxa_fetch::sitemap::discover(&client, url)
.await
.map_err(|e| format!("sitemap discovery failed: {e}"))?;
@ -1469,7 +1469,7 @@ fn fire_webhook(url: &str, payload: &serde_json::Value) {
let details = serde_json::to_string_pretty(payload).unwrap_or_default();
serde_json::json!({
"embeds": [{
"title": format!("webclaw: {event}"),
"title": format!("noxa: {event}"),
"description": format!("```json\n{details}\n```"),
"color": 5814783
}]
@ -1482,7 +1482,7 @@ fn fire_webhook(url: &str, payload: &serde_json::Value) {
.unwrap_or("notification");
let details = serde_json::to_string_pretty(payload).unwrap_or_default();
serde_json::json!({
"text": format!("*webclaw: {event}*\n```{details}```")
"text": format!("*noxa: {event}*\n```{details}```")
})
.to_string()
} else {
@ -1575,7 +1575,7 @@ async fn run_watch_single(
}
};
let diff = webclaw_core::diff::diff(&previous, &current);
let diff = noxa_core::diff::diff(&previous, &current);
if diff.status == ChangeStatus::Same {
eprintln!("[watch] No changes ({})", timestamp());
@ -1687,7 +1687,7 @@ async fn run_watch_multi(
match r.result {
Ok(current) => {
if let Some(previous) = snapshots.get(&r.url) {
let diff = webclaw_core::diff::diff(previous, &current);
let diff = noxa_core::diff::diff(previous, &current);
if diff.status == ChangeStatus::Same {
same_count += 1;
} else {
@ -1790,7 +1790,7 @@ async fn run_diff(cli: &Cli, snapshot_path: &str) -> Result<(), String> {
// Extract current version (handles PDF detection for URLs)
let new_result = fetch_and_extract(cli).await?.into_extraction()?;
let diff = webclaw_core::diff::diff(&old, &new_result);
let diff = noxa_core::diff::diff(&old, &new_result);
print_diff_output(&diff, &cli.format);
Ok(())
@ -1799,7 +1799,7 @@ async fn run_diff(cli: &Cli, snapshot_path: &str) -> Result<(), String> {
async fn run_brand(cli: &Cli) -> Result<(), String> {
let result = fetch_html(cli).await?;
let enriched = enrich_html_with_stylesheets(&result.html, &result.url).await;
let brand = webclaw_core::brand::extract_brand(
let brand = noxa_core::brand::extract_brand(
&enriched,
Some(result.url.as_str()).filter(|s| !s.is_empty()),
);
@ -1815,7 +1815,7 @@ async fn build_llm_provider(cli: &Cli) -> Result<Box<dyn LlmProvider>, String> {
if let Some(ref name) = cli.llm_provider {
match name.as_str() {
"ollama" => {
let provider = webclaw_llm::providers::ollama::OllamaProvider::new(
let provider = noxa_llm::providers::ollama::OllamaProvider::new(
cli.llm_base_url.clone(),
cli.llm_model.clone(),
);
@ -1825,7 +1825,7 @@ async fn build_llm_provider(cli: &Cli) -> Result<Box<dyn LlmProvider>, String> {
Ok(Box::new(provider))
}
"openai" => {
let provider = webclaw_llm::providers::openai::OpenAiProvider::new(
let provider = noxa_llm::providers::openai::OpenAiProvider::new(
None,
cli.llm_base_url.clone(),
cli.llm_model.clone(),
@ -1834,7 +1834,7 @@ async fn build_llm_provider(cli: &Cli) -> Result<Box<dyn LlmProvider>, String> {
Ok(Box::new(provider))
}
"anthropic" => {
let provider = webclaw_llm::providers::anthropic::AnthropicProvider::new(
let provider = noxa_llm::providers::anthropic::AnthropicProvider::new(
None,
cli.llm_model.clone(),
)
@ -1846,7 +1846,7 @@ async fn build_llm_provider(cli: &Cli) -> Result<Box<dyn LlmProvider>, String> {
)),
}
} else {
let chain = webclaw_llm::ProviderChain::default().await;
let chain = noxa_llm::ProviderChain::default().await;
if chain.is_empty() {
return Err(
"no LLM providers available -- start Ollama or set OPENAI_API_KEY / ANTHROPIC_API_KEY"
@ -1876,7 +1876,7 @@ async fn run_llm(cli: &Cli) -> Result<(), String> {
let schema: serde_json::Value =
serde_json::from_str(&schema_str).map_err(|e| format!("invalid JSON schema: {e}"))?;
let extracted = webclaw_llm::extract::extract_json(
let extracted = noxa_llm::extract::extract_json(
&result.content.plain_text,
&schema,
provider.as_ref(),
@ -1890,7 +1890,7 @@ async fn run_llm(cli: &Cli) -> Result<(), String> {
serde_json::to_string_pretty(&extracted).expect("serialization failed")
);
} else if let Some(ref prompt) = cli.extract_prompt {
let extracted = webclaw_llm::extract::extract_with_prompt(
let extracted = noxa_llm::extract::extract_with_prompt(
&result.content.plain_text,
prompt,
provider.as_ref(),
@ -1904,7 +1904,7 @@ async fn run_llm(cli: &Cli) -> Result<(), String> {
serde_json::to_string_pretty(&extracted).expect("serialization failed")
);
} else if let Some(sentences) = cli.summarize {
let summary = webclaw_llm::summarize::summarize(
let summary = noxa_llm::summarize::summarize(
&result.content.plain_text,
Some(sentences),
provider.as_ref(),
@ -1975,15 +1975,15 @@ async fn run_batch_llm(cli: &Cli, entries: &[(String, Option<String>)]) -> Resul
// Run the appropriate LLM operation
let llm_result = if let Some(ref schema) = schema {
webclaw_llm::extract::extract_json(text, schema, provider.as_ref(), model)
noxa_llm::extract::extract_json(text, schema, provider.as_ref(), model)
.await
.map(LlmOutput::Json)
} else if let Some(ref prompt) = cli.extract_prompt {
webclaw_llm::extract::extract_with_prompt(text, prompt, provider.as_ref(), model)
noxa_llm::extract::extract_with_prompt(text, prompt, provider.as_ref(), model)
.await
.map(LlmOutput::Json)
} else if let Some(sentences) = cli.summarize {
webclaw_llm::summarize::summarize(text, Some(sentences), provider.as_ref(), model)
noxa_llm::summarize::summarize(text, Some(sentences), provider.as_ref(), model)
.await
.map(LlmOutput::Text)
} else {
@ -2080,7 +2080,7 @@ async fn run_research(cli: &Cli, query: &str) -> Result<(), String> {
let api_key = cli
.api_key
.as_deref()
.ok_or("--research requires WEBCLAW_API_KEY (set via env or --api-key)")?;
.ok_or("--research requires NOXA_API_KEY (set via env or --api-key)")?;
let client = reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(600))
@ -2099,7 +2099,7 @@ async fn run_research(cli: &Cli, query: &str) -> Result<(), String> {
// Start job
let resp = client
.post("https://api.webclaw.io/v1/research")
.post("https://api.noxa.io/v1/research")
.header("Authorization", format!("Bearer {api_key}"))
.json(&body)
.send()
@ -2122,7 +2122,7 @@ async fn run_research(cli: &Cli, query: &str) -> Result<(), String> {
tokio::time::sleep(std::time::Duration::from_secs(3)).await;
let status_resp = client
.get(format!("https://api.webclaw.io/v1/research/{job_id}"))
.get(format!("https://api.noxa.io/v1/research/{job_id}"))
.header("Authorization", format!("Bearer {api_key}"))
.send()
.await
@ -2448,7 +2448,7 @@ mod tests {
#[test]
fn write_to_file_creates_dirs() {
let dir = std::env::temp_dir().join("webclaw_test_output_dir");
let dir = std::env::temp_dir().join("noxa_test_output_dir");
let _ = std::fs::remove_dir_all(&dir);
write_to_file(&dir, "nested/deep/file.md", "hello").unwrap();
let content = std::fs::read_to_string(dir.join("nested/deep/file.md")).unwrap();

View file

@ -1,5 +1,5 @@
[package]
name = "webclaw-core"
name = "noxa-core"
description = "Pure HTML content extraction engine for LLMs"
version.workspace = true
edition.workspace = true

View file

@ -1,6 +1,6 @@
pub mod brand;
pub(crate) mod data_island;
/// webclaw-core: Pure HTML content extraction engine for LLMs.
/// noxa-core: Pure HTML content extraction engine for LLMs.
///
/// Takes raw HTML + optional URL, returns structured content
/// (metadata, markdown, plain text, links, images, code blocks).

View file

@ -1,13 +1,13 @@
[package]
name = "webclaw-fetch"
name = "noxa-fetch"
description = "HTTP client with browser TLS fingerprint impersonation via wreq"
version.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
webclaw-core = { workspace = true }
webclaw-pdf = { path = "../webclaw-pdf" }
noxa-core = { workspace = true }
noxa-pdf = { path = "../noxa-pdf" }
serde = { workspace = true }
thiserror = { workspace = true }
tracing = { workspace = true }

View file

@ -1,5 +1,5 @@
//! Browser fingerprint selection and rotation.
//! Maps our BrowserProfile enum to webclaw-http client builder methods.
//! Maps our BrowserProfile enum to noxa-http client builder methods.
/// Which browser identity to present at the TLS/HTTP layer.
#[derive(Debug, Clone, Default)]
@ -11,7 +11,7 @@ pub enum BrowserProfile {
Random,
}
/// A browser variant for building webclaw-http clients.
/// A browser variant for building noxa-http clients.
#[derive(Debug, Clone, Copy)]
pub enum BrowserVariant {
Chrome,

View file

@ -1,7 +1,7 @@
/// HTTP client with browser TLS fingerprint impersonation.
/// Uses wreq (BoringSSL) for browser-grade TLS + HTTP/2 fingerprinting.
/// Supports single and batch operations with proxy rotation.
/// Automatically detects PDF responses and extracts text via webclaw-pdf.
/// Automatically detects PDF responses and extracts text via noxa-pdf.
///
/// Two proxy modes:
/// - **Static**: single proxy (or none) baked into pre-built clients at construction.
@ -15,7 +15,7 @@ use std::time::{Duration, Instant};
use rand::seq::SliceRandom;
use tokio::sync::Semaphore;
use tracing::{debug, instrument, warn};
use webclaw_pdf::PdfMode;
use noxa_pdf::PdfMode;
use crate::browser::{self, BrowserProfile, BrowserVariant};
use crate::error::FetchError;
@ -75,11 +75,11 @@ pub struct BatchResult {
#[derive(Debug)]
pub struct BatchExtractResult {
pub url: String,
pub result: Result<webclaw_core::ExtractionResult, FetchError>,
pub result: Result<noxa_core::ExtractionResult, FetchError>,
}
/// Buffered response that owns its body. Provides the same sync API
/// that webclaw-http::Response used to provide.
/// that noxa-http::Response used to provide.
struct Response {
status: u16,
url: String,
@ -268,8 +268,8 @@ impl FetchClient {
pub async fn fetch_and_extract(
&self,
url: &str,
) -> Result<webclaw_core::ExtractionResult, FetchError> {
self.fetch_and_extract_with_options(url, &webclaw_core::ExtractionOptions::default())
) -> Result<noxa_core::ExtractionResult, FetchError> {
self.fetch_and_extract_with_options(url, &noxa_core::ExtractionOptions::default())
.await
}
@ -278,8 +278,8 @@ impl FetchClient {
pub async fn fetch_and_extract_with_options(
&self,
url: &str,
options: &webclaw_core::ExtractionOptions,
) -> Result<webclaw_core::ExtractionResult, FetchError> {
options: &noxa_core::ExtractionOptions,
) -> Result<noxa_core::ExtractionResult, FetchError> {
// Reddit fallback: use their JSON API to get post + full comment tree.
if crate::reddit::is_reddit_url(url) {
let json_url = crate::reddit::json_url(url);
@ -334,7 +334,7 @@ impl FetchClient {
"PDF fetch complete"
);
let pdf_result = webclaw_pdf::extract_pdf(bytes, self.pdf_mode.clone())?;
let pdf_result = noxa_pdf::extract_pdf(bytes, self.pdf_mode.clone())?;
Ok(pdf_to_extraction_result(&pdf_result, &final_url))
} else if let Some(doc_type) =
crate::document::is_document_content_type(&headers, &final_url)
@ -369,7 +369,7 @@ impl FetchClient {
debug!("linkedin extraction failed, falling back to standard");
}
let extraction = webclaw_core::extract_with_options(&html, Some(&final_url), options)?;
let extraction = noxa_core::extract_with_options(&html, Some(&final_url), options)?;
Ok(extraction)
}
@ -408,7 +408,7 @@ impl FetchClient {
self.fetch_and_extract_batch_with_options(
urls,
concurrency,
&webclaw_core::ExtractionOptions::default(),
&noxa_core::ExtractionOptions::default(),
)
.await
}
@ -418,7 +418,7 @@ impl FetchClient {
self: &Arc<Self>,
urls: &[&str],
concurrency: usize,
options: &webclaw_core::ExtractionOptions,
options: &noxa_core::ExtractionOptions,
) -> Vec<BatchExtractResult> {
let semaphore = Arc::new(Semaphore::new(concurrency));
let mut handles = Vec::with_capacity(urls.len());
@ -572,16 +572,16 @@ fn extract_homepage(url: &str) -> Option<String> {
.map(|u| format!("{}://{}/", u.scheme(), u.host_str().unwrap_or("")))
}
/// Convert a webclaw-pdf PdfResult into a webclaw-core ExtractionResult.
/// Convert a noxa-pdf PdfResult into a noxa-core ExtractionResult.
fn pdf_to_extraction_result(
pdf: &webclaw_pdf::PdfResult,
pdf: &noxa_pdf::PdfResult,
url: &str,
) -> webclaw_core::ExtractionResult {
let markdown = webclaw_pdf::to_markdown(pdf);
) -> noxa_core::ExtractionResult {
let markdown = noxa_pdf::to_markdown(pdf);
let word_count = markdown.split_whitespace().count();
webclaw_core::ExtractionResult {
metadata: webclaw_core::Metadata {
noxa_core::ExtractionResult {
metadata: noxa_core::Metadata {
title: pdf.metadata.title.clone(),
description: pdf.metadata.subject.clone(),
author: pdf.metadata.author.clone(),
@ -593,7 +593,7 @@ fn pdf_to_extraction_result(
favicon: None,
word_count,
},
content: webclaw_core::Content {
content: noxa_core::Content {
markdown,
plain_text: pdf.text.clone(),
links: Vec::new(),
@ -713,10 +713,10 @@ mod tests {
#[test]
fn test_pdf_to_extraction_result() {
let pdf = webclaw_pdf::PdfResult {
let pdf = noxa_pdf::PdfResult {
text: "Hello from PDF.".into(),
page_count: 2,
metadata: webclaw_pdf::PdfMetadata {
metadata: noxa_pdf::PdfMetadata {
title: Some("My Doc".into()),
author: Some("Author".into()),
subject: Some("Testing".into()),

View file

@ -91,7 +91,7 @@ pub struct CrawlResult {
pub struct PageResult {
pub url: String,
pub depth: usize,
pub extraction: Option<webclaw_core::ExtractionResult>,
pub extraction: Option<noxa_core::ExtractionResult>,
pub error: Option<String>,
#[serde(skip)]
pub elapsed: Duration,

View file

@ -81,7 +81,7 @@ pub fn is_document_content_type(headers: &http::HeaderMap, url: &str) -> Option<
pub fn extract_document(
bytes: &[u8],
doc_type: DocType,
) -> Result<webclaw_core::ExtractionResult, FetchError> {
) -> Result<noxa_core::ExtractionResult, FetchError> {
debug!(
doc_type = doc_type.label(),
bytes = bytes.len(),
@ -98,8 +98,8 @@ pub fn extract_document(
let plain_text = strip_markdown_formatting(&markdown);
let word_count = plain_text.split_whitespace().count();
Ok(webclaw_core::ExtractionResult {
metadata: webclaw_core::Metadata {
Ok(noxa_core::ExtractionResult {
metadata: noxa_core::Metadata {
title: None,
description: None,
author: None,
@ -111,7 +111,7 @@ pub fn extract_document(
favicon: None,
word_count,
},
content: webclaw_core::Content {
content: noxa_core::Content {
markdown,
plain_text,
links: Vec::new(),

View file

@ -14,10 +14,10 @@ pub enum FetchError {
BodyDecode(String),
#[error("extraction failed: {0}")]
Extraction(#[from] webclaw_core::ExtractError),
Extraction(#[from] noxa_core::ExtractError),
#[error("PDF extraction failed: {0}")]
Pdf(#[from] webclaw_pdf::PdfError),
Pdf(#[from] noxa_pdf::PdfError),
#[error("client build failed: {0}")]
Build(String),

View file

@ -1,6 +1,6 @@
//! webclaw-fetch: HTTP client layer with browser TLS fingerprint impersonation.
//! noxa-fetch: HTTP client layer with browser TLS fingerprint impersonation.
//! Uses wreq (BoringSSL) for browser-grade TLS + HTTP/2 fingerprinting.
//! Automatically detects PDF responses and delegates to webclaw-pdf.
//! Automatically detects PDF responses and delegates to noxa-pdf.
pub mod browser;
pub mod client;
pub mod crawler;
@ -19,4 +19,4 @@ pub use error::FetchError;
pub use http::HeaderMap;
pub use proxy::{parse_proxy_file, parse_proxy_line};
pub use sitemap::SitemapEntry;
pub use webclaw_pdf::PdfMode;
pub use noxa_pdf::PdfMode;

View file

@ -5,7 +5,7 @@
/// Profile, etc. We parse these to reconstruct post + comments as markdown.
use serde_json::Value;
use tracing::debug;
use webclaw_core::{Content, ExtractionResult, Metadata};
use noxa_core::{Content, ExtractionResult, Metadata};
/// Check if a URL is a LinkedIn post/activity.
pub fn is_linkedin_post(url: &str) -> bool {

View file

@ -5,7 +5,7 @@
/// comment tree as structured JSON, which we convert to clean markdown.
use serde::Deserialize;
use tracing::debug;
use webclaw_core::{Content, ExtractionResult, Metadata};
use noxa_core::{Content, ExtractionResult, Metadata};
/// Check if a URL points to a Reddit post/comment page.
pub fn is_reddit_url(url: &str) -> bool {

View file

@ -1,6 +1,6 @@
//! Browser TLS + HTTP/2 fingerprint profiles built on wreq (BoringSSL).
//!
//! Replaces the old webclaw-http/webclaw-tls patched rustls stack.
//! Replaces the old noxa-http/noxa-tls patched rustls stack.
//! Each profile configures TLS options (cipher suites, curves, extensions,
//! PSK, ECH GREASE) and HTTP/2 options (SETTINGS order, pseudo-header order,
//! stream dependency, priorities) to match real browser fingerprints.

View file

@ -1,6 +1,6 @@
[package]
name = "webclaw-llm"
description = "LLM integration for webclaw — local-first hybrid architecture (Ollama -> OpenAI -> Anthropic)"
name = "noxa-llm"
description = "LLM integration for noxa — local-first hybrid architecture (Ollama -> OpenAI -> Anthropic)"
version.workspace = true
edition.workspace = true
license.workspace = true

View file

@ -1,8 +1,8 @@
/// webclaw-llm: LLM integration with local-first hybrid architecture.
/// noxa-llm: LLM integration with local-first hybrid architecture.
///
/// Provider chain tries Ollama (local) first, falls back to OpenAI, then Anthropic.
/// Provides schema-based extraction, prompt extraction, and summarization
/// on top of webclaw-core's content pipeline.
/// on top of noxa-core's content pipeline.
pub mod chain;
pub mod clean;
pub mod error;

View file

@ -151,7 +151,7 @@ mod tests {
// Env var fallback tests mutate process-global state and race with parallel tests.
// The code path is trivial (load_api_key -> env::var().ok()). Run in isolation if needed:
// cargo test -p webclaw-llm env_var -- --ignored --test-threads=1
// cargo test -p noxa-llm env_var -- --ignored --test-threads=1
#[test]
#[ignore = "mutates process env; run with --test-threads=1"]
fn env_var_key_fallback() {

View file

@ -29,7 +29,7 @@ mod tests {
#[test]
fn none_override_with_no_env_returns_none() {
assert_eq!(
load_api_key(None, "WEBCLAW_TEST_NONEXISTENT_KEY_12345"),
load_api_key(None, "NOXA_TEST_NONEXISTENT_KEY_12345"),
None
);
}

View file

@ -140,7 +140,7 @@ mod tests {
// Env var fallback is a trivial `env::var().ok()` -- not worth the flakiness
// of manipulating process-global state. Run in isolation if needed:
// cargo test -p webclaw-llm env_var_fallback -- --ignored --test-threads=1
// cargo test -p noxa-llm env_var_fallback -- --ignored --test-threads=1
#[test]
#[ignore = "mutates process env; run with --test-threads=1"]
fn env_var_fallback() {

View file

@ -162,7 +162,7 @@ mod tests {
// Env var fallback tests mutate process-global state and race with parallel tests.
// The code path is trivial (load_api_key -> env::var().ok()). Run in isolation if needed:
// cargo test -p webclaw-llm env_var -- --ignored --test-threads=1
// cargo test -p noxa-llm env_var -- --ignored --test-threads=1
#[test]
#[ignore = "mutates process env; run with --test-threads=1"]
fn env_var_key_fallback() {

View file

@ -1,4 +1,4 @@
/// Shared test utilities for webclaw-llm.
/// Shared test utilities for noxa-llm.
///
/// Provides a configurable mock LLM provider for unit tests across
/// extract, chain, and other modules that need a fake LLM backend.

View file

@ -1,19 +1,19 @@
[package]
name = "webclaw-mcp"
description = "MCP server for webclaw web extraction toolkit"
name = "noxa-mcp"
description = "MCP server for noxa web extraction toolkit"
version.workspace = true
edition.workspace = true
license.workspace = true
[[bin]]
name = "webclaw-mcp"
name = "noxa-mcp"
path = "src/main.rs"
[dependencies]
webclaw-core = { workspace = true }
webclaw-fetch = { workspace = true }
webclaw-llm = { workspace = true }
webclaw-pdf = { workspace = true }
noxa-core = { workspace = true }
noxa-fetch = { workspace = true }
noxa-llm = { workspace = true }
noxa-pdf = { workspace = true }
rmcp = { version = "1.2", features = ["server", "macros", "transport-io", "schemars"] }
schemars = "1.0"
dotenvy = { workspace = true }

View file

@ -1,25 +1,26 @@
/// Cloud API fallback for protected sites.
///
/// When local fetch returns a challenge page, this module retries
/// via api.webclaw.io. Requires WEBCLAW_API_KEY to be set.
/// via api.noxa.io. Requires NOXA_API_KEY to be set.
use std::time::Duration;
use serde_json::{Value, json};
use tracing::info;
const API_BASE: &str = "https://api.webclaw.io/v1";
/// Lightweight client for the webclaw cloud API.
const API_BASE: &str = "https://api.noxa.io/v1";
/// Lightweight client for the noxa cloud API.
pub struct CloudClient {
api_key: String,
http: reqwest::Client,
}
impl CloudClient {
/// Create a new cloud client from WEBCLAW_API_KEY env var.
/// Create a new cloud client from NOXA_API_KEY env var.
/// Returns None if the key is not set.
pub fn from_env() -> Option<Self> {
let key = std::env::var("WEBCLAW_API_KEY").ok()?;
let key = std::env::var("NOXA_API_KEY").ok()?;
if key.is_empty() {
return None;
}
@ -114,7 +115,7 @@ fn truncate_error(text: &str) -> &str {
/// Check if fetched HTML looks like a bot protection challenge page.
/// Detects common bot protection challenge pages.
pub fn is_bot_protected(html: &str, headers: &webclaw_fetch::HeaderMap) -> bool {
pub fn is_bot_protected(html: &str, headers: &noxa_fetch::HeaderMap) -> bool {
let html_lower = html.to_lowercase();
// Cloudflare challenge page
@ -199,7 +200,7 @@ pub fn needs_js_rendering(word_count: usize, html: &str) -> bool {
/// Result of a smart fetch: either local extraction or cloud API response.
pub enum SmartFetchResult {
/// Successfully extracted locally.
Local(Box<webclaw_core::ExtractionResult>),
Local(Box<noxa_core::ExtractionResult>),
/// Fell back to cloud API. Contains the API response JSON.
Cloud(Value),
}
@ -210,7 +211,7 @@ pub enum SmartFetchResult {
/// If no API key is configured and local fetch is blocked, returns an error
/// with a helpful message.
pub async fn smart_fetch(
client: &webclaw_fetch::FetchClient,
client: &noxa_fetch::FetchClient,
cloud: Option<&CloudClient>,
url: &str,
include_selectors: &[String],
@ -239,7 +240,7 @@ pub async fn smart_fetch(
}
// Step 3: Extract locally
let options = webclaw_core::ExtractionOptions {
let options = noxa_core::ExtractionOptions {
include_selectors: include_selectors.to_vec(),
exclude_selectors: exclude_selectors.to_vec(),
only_main_content,
@ -247,7 +248,7 @@ pub async fn smart_fetch(
};
let extraction =
webclaw_core::extract_with_options(&fetch_result.html, Some(&fetch_result.url), &options)
noxa_core::extract_with_options(&fetch_result.html, Some(&fetch_result.url), &options)
.map_err(|e| format!("Extraction failed: {e}"))?;
// Step 4: Check for JS-rendered pages (low content from large HTML)
@ -295,8 +296,8 @@ async fn cloud_fallback(
Ok(SmartFetchResult::Cloud(resp))
}
None => Err(format!(
"Bot protection detected on {url}. Set WEBCLAW_API_KEY for automatic cloud bypass. \
Get a key at https://webclaw.io"
"Bot protection detected on {url}. Set NOXA_API_KEY for automatic cloud bypass. \
Get a key at https://noxa.io"
)),
}
}

View file

@ -1,4 +1,4 @@
/// webclaw-mcp: MCP (Model Context Protocol) server for webclaw.
/// noxa-mcp: MCP (Model Context Protocol) server for noxa.
/// Exposes web extraction tools over stdio transport for AI agents
/// like Claude Desktop, Claude Code, and other MCP clients.
mod cloud;
@ -8,7 +8,7 @@ mod tools;
use rmcp::ServiceExt;
use rmcp::transport::stdio;
use server::WebclawMcp;
use server::NoxaMcp;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
@ -21,7 +21,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
.with_ansi(false)
.init();
let service = WebclawMcp::new().await.serve(stdio()).await?;
let service = NoxaMcp::new().await.serve(stdio()).await?;
service.waiting().await?;
Ok(())

View file

@ -1,9 +1,9 @@
/// MCP server implementation for webclaw.
/// MCP server implementation for noxa.
/// Exposes web extraction capabilities as tools for AI agents.
///
/// Uses a local-first architecture: fetches pages directly, then falls back
/// to the webclaw cloud API (api.webclaw.io) when bot protection or
/// JS rendering is detected. Set WEBCLAW_API_KEY for automatic fallback.
/// to the noxa cloud API (api.noxa.io) when bot protection or
/// JS rendering is detected. Set NOXA_API_KEY for automatic fallback.
use std::sync::Arc;
use std::time::Duration;
@ -18,19 +18,19 @@ use url::Url;
use crate::cloud::{self, CloudClient, SmartFetchResult};
use crate::tools::*;
pub struct WebclawMcp {
pub struct NoxaMcp {
tool_router: ToolRouter<Self>,
fetch_client: Arc<webclaw_fetch::FetchClient>,
llm_chain: Option<webclaw_llm::ProviderChain>,
fetch_client: Arc<noxa_fetch::FetchClient>,
llm_chain: Option<noxa_llm::ProviderChain>,
cloud: Option<CloudClient>,
}
/// Parse a browser string into a BrowserProfile.
fn parse_browser(browser: Option<&str>) -> webclaw_fetch::BrowserProfile {
fn parse_browser(browser: Option<&str>) -> noxa_fetch::BrowserProfile {
match browser {
Some("firefox") => webclaw_fetch::BrowserProfile::Firefox,
Some("random") => webclaw_fetch::BrowserProfile::Random,
_ => webclaw_fetch::BrowserProfile::Chrome,
Some("firefox") => noxa_fetch::BrowserProfile::Firefox,
Some("random") => noxa_fetch::BrowserProfile::Random,
_ => noxa_fetch::BrowserProfile::Chrome,
}
}
@ -58,28 +58,28 @@ const LOCAL_FETCH_TIMEOUT: Duration = Duration::from_secs(30);
const RESEARCH_MAX_POLLS: u32 = 200;
#[tool_router]
impl WebclawMcp {
impl NoxaMcp {
pub async fn new() -> Self {
let mut config = webclaw_fetch::FetchConfig::default();
let mut config = noxa_fetch::FetchConfig::default();
// Load proxy config from env vars or local file
if let Ok(proxy) = std::env::var("WEBCLAW_PROXY") {
info!("using single proxy from WEBCLAW_PROXY");
if let Ok(proxy) = std::env::var("NOXA_PROXY") {
info!("using single proxy from NOXA_PROXY");
config.proxy = Some(proxy);
}
let proxy_file = std::env::var("WEBCLAW_PROXY_FILE")
let proxy_file = std::env::var("NOXA_PROXY_FILE")
.ok()
.unwrap_or_else(|| "proxies.txt".to_string());
if std::path::Path::new(&proxy_file).exists()
&& let Ok(pool) = webclaw_fetch::parse_proxy_file(&proxy_file)
&& let Ok(pool) = noxa_fetch::parse_proxy_file(&proxy_file)
&& !pool.is_empty()
{
info!(count = pool.len(), file = %proxy_file, "loaded proxy pool");
config.proxy_pool = pool;
}
let fetch_client = match webclaw_fetch::FetchClient::new(config) {
let fetch_client = match noxa_fetch::FetchClient::new(config) {
Ok(client) => client,
Err(e) => {
error!("failed to build FetchClient: {e}");
@ -87,7 +87,7 @@ impl WebclawMcp {
}
};
let chain = webclaw_llm::ProviderChain::default().await;
let chain = noxa_llm::ProviderChain::default().await;
let llm_chain = if chain.is_empty() {
warn!("no LLM providers available -- extract/summarize tools will fail");
None
@ -98,11 +98,11 @@ impl WebclawMcp {
let cloud = CloudClient::from_env();
if cloud.is_some() {
info!("cloud API fallback enabled (WEBCLAW_API_KEY set)");
info!("cloud API fallback enabled (NOXA_API_KEY set)");
} else {
warn!(
"WEBCLAW_API_KEY not set -- bot-protected sites will return challenge pages. \
Get a key at https://webclaw.io"
"NOXA_API_KEY not set -- bot-protected sites will return challenge pages. \
Get a key at https://noxa.io"
);
}
@ -129,7 +129,7 @@ impl WebclawMcp {
}
/// Scrape a single URL and extract its content as markdown, LLM-optimized text, plain text, or full JSON.
/// Automatically falls back to the webclaw cloud API when bot protection or JS rendering is detected.
/// Automatically falls back to the noxa cloud API when bot protection or JS rendering is detected.
#[tool]
async fn scrape(&self, Parameters(params): Parameters<ScrapeParams>) -> Result<String, String> {
validate_url(&params.url)?;
@ -147,21 +147,21 @@ impl WebclawMcp {
.map(|c| c.join("; "));
// Use a custom client if non-default browser or cookies are provided
let is_default_browser = matches!(browser, webclaw_fetch::BrowserProfile::Chrome);
let is_default_browser = matches!(browser, noxa_fetch::BrowserProfile::Chrome);
let needs_custom = !is_default_browser || cookie_header.is_some();
let custom_client;
let client: &webclaw_fetch::FetchClient = if needs_custom {
let client: &noxa_fetch::FetchClient = if needs_custom {
let mut headers = std::collections::HashMap::new();
headers.insert("Accept-Language".to_string(), "en-US,en;q=0.9".to_string());
if let Some(ref cookies) = cookie_header {
headers.insert("Cookie".to_string(), cookies.clone());
}
let config = webclaw_fetch::FetchConfig {
let config = noxa_fetch::FetchConfig {
browser,
headers,
..Default::default()
};
custom_client = webclaw_fetch::FetchClient::new(config)
custom_client = noxa_fetch::FetchClient::new(config)
.map_err(|e| format!("Failed to build client: {e}"))?;
&custom_client
} else {
@ -183,7 +183,7 @@ impl WebclawMcp {
match result {
SmartFetchResult::Local(extraction) => {
let output = match format {
"llm" => webclaw_core::to_llm_text(&extraction, Some(&params.url)),
"llm" => noxa_core::to_llm_text(&extraction, Some(&params.url)),
"text" => extraction.content.plain_text,
"json" => serde_json::to_string_pretty(&extraction).unwrap_or_default(),
_ => extraction.content.markdown,
@ -221,7 +221,7 @@ impl WebclawMcp {
let format = params.format.as_deref().unwrap_or("markdown");
let config = webclaw_fetch::CrawlConfig {
let config = noxa_fetch::CrawlConfig {
max_depth: params.depth.unwrap_or(2) as usize,
max_pages: params.max_pages.unwrap_or(50),
concurrency: params.concurrency.unwrap_or(5),
@ -229,7 +229,7 @@ impl WebclawMcp {
..Default::default()
};
let crawler = webclaw_fetch::Crawler::new(&params.url, config)
let crawler = noxa_fetch::Crawler::new(&params.url, config)
.map_err(|e| format!("Crawler init failed: {e}"))?;
let result = crawler.crawl(&params.url, None).await;
@ -243,7 +243,7 @@ impl WebclawMcp {
output.push_str(&format!("--- {} (depth {}) ---\n", page.url, page.depth));
if let Some(ref extraction) = page.extraction {
let content = match format {
"llm" => webclaw_core::to_llm_text(extraction, Some(&page.url)),
"llm" => noxa_core::to_llm_text(extraction, Some(&page.url)),
"text" => extraction.content.plain_text.clone(),
_ => extraction.content.markdown.clone(),
};
@ -261,7 +261,7 @@ impl WebclawMcp {
#[tool]
async fn map(&self, Parameters(params): Parameters<MapParams>) -> Result<String, String> {
validate_url(&params.url)?;
let entries = webclaw_fetch::sitemap::discover(&self.fetch_client, &params.url)
let entries = noxa_fetch::sitemap::discover(&self.fetch_client, &params.url)
.await
.map_err(|e| format!("Sitemap discovery failed: {e}"))?;
@ -302,7 +302,7 @@ impl WebclawMcp {
match &r.result {
Ok(extraction) => {
let content = match format {
"llm" => webclaw_core::to_llm_text(extraction, Some(&r.url)),
"llm" => noxa_core::to_llm_text(extraction, Some(&r.url)),
"text" => extraction.content.plain_text.clone(),
_ => extraction.content.markdown.clone(),
};
@ -319,7 +319,7 @@ impl WebclawMcp {
}
/// Extract structured data from a web page using an LLM. Provide either a JSON schema or a natural language prompt.
/// Falls back to the webclaw cloud API when no local LLM is available or bot protection is detected.
/// Falls back to the noxa cloud API when no local LLM is available or bot protection is detected.
#[tool]
async fn extract(
&self,
@ -334,7 +334,7 @@ impl WebclawMcp {
// No local LLM — fall back to cloud API directly
if self.llm_chain.is_none() {
let cloud = self.cloud.as_ref().ok_or(
"No LLM providers available. Set OPENAI_API_KEY, ANTHROPIC_API_KEY, or WEBCLAW_API_KEY for cloud fallback.",
"No LLM providers available. Set OPENAI_API_KEY, ANTHROPIC_API_KEY, or NOXA_API_KEY for cloud fallback.",
)?;
let mut body = json!({"url": params.url});
if let Some(ref schema) = params.schema {
@ -351,7 +351,7 @@ impl WebclawMcp {
let llm_content = match self.smart_fetch_llm(&params.url).await? {
SmartFetchResult::Local(extraction) => {
webclaw_core::to_llm_text(&extraction, Some(&params.url))
noxa_core::to_llm_text(&extraction, Some(&params.url))
}
SmartFetchResult::Cloud(resp) => resp
.get("llm")
@ -362,12 +362,12 @@ impl WebclawMcp {
};
let data = if let Some(ref schema) = params.schema {
webclaw_llm::extract::extract_json(&llm_content, schema, chain, None)
noxa_llm::extract::extract_json(&llm_content, schema, chain, None)
.await
.map_err(|e| format!("LLM extraction failed: {e}"))?
} else {
let prompt = params.prompt.as_deref().unwrap();
webclaw_llm::extract::extract_with_prompt(&llm_content, prompt, chain, None)
noxa_llm::extract::extract_with_prompt(&llm_content, prompt, chain, None)
.await
.map_err(|e| format!("LLM extraction failed: {e}"))?
};
@ -376,7 +376,7 @@ impl WebclawMcp {
}
/// Summarize the content of a web page using an LLM.
/// Falls back to the webclaw cloud API when no local LLM is available or bot protection is detected.
/// Falls back to the noxa cloud API when no local LLM is available or bot protection is detected.
#[tool]
async fn summarize(
&self,
@ -387,7 +387,7 @@ impl WebclawMcp {
// No local LLM — fall back to cloud API directly
if self.llm_chain.is_none() {
let cloud = self.cloud.as_ref().ok_or(
"No LLM providers available. Set OPENAI_API_KEY, ANTHROPIC_API_KEY, or WEBCLAW_API_KEY for cloud fallback.",
"No LLM providers available. Set OPENAI_API_KEY, ANTHROPIC_API_KEY, or NOXA_API_KEY for cloud fallback.",
)?;
let mut body = json!({"url": params.url});
if let Some(sentences) = params.max_sentences {
@ -405,7 +405,7 @@ impl WebclawMcp {
let llm_content = match self.smart_fetch_llm(&params.url).await? {
SmartFetchResult::Local(extraction) => {
webclaw_core::to_llm_text(&extraction, Some(&params.url))
noxa_core::to_llm_text(&extraction, Some(&params.url))
}
SmartFetchResult::Cloud(resp) => resp
.get("llm")
@ -415,17 +415,17 @@ impl WebclawMcp {
.to_string(),
};
webclaw_llm::summarize::summarize(&llm_content, params.max_sentences, chain, None)
noxa_llm::summarize::summarize(&llm_content, params.max_sentences, chain, None)
.await
.map_err(|e| format!("Summarization failed: {e}"))
}
/// Compare the current content of a URL against a previous extraction snapshot, showing what changed.
/// Automatically falls back to the webclaw cloud API when bot protection is detected.
/// Automatically falls back to the noxa cloud API when bot protection is detected.
#[tool]
async fn diff(&self, Parameters(params): Parameters<DiffParams>) -> Result<String, String> {
validate_url(&params.url)?;
let previous: webclaw_core::ExtractionResult =
let previous: noxa_core::ExtractionResult =
serde_json::from_str(&params.previous_snapshot)
.map_err(|e| format!("Failed to parse previous_snapshot JSON: {e}"))?;
@ -442,7 +442,7 @@ impl WebclawMcp {
match result {
SmartFetchResult::Local(current) => {
let content_diff = webclaw_core::diff::diff(&previous, &current);
let content_diff = noxa_core::diff::diff(&previous, &current);
Ok(serde_json::to_string_pretty(&content_diff).unwrap_or_default())
}
SmartFetchResult::Cloud(resp) => {
@ -457,8 +457,8 @@ impl WebclawMcp {
);
}
let current = webclaw_core::ExtractionResult {
content: webclaw_core::Content {
let current = noxa_core::ExtractionResult {
content: noxa_core::Content {
markdown: markdown.to_string(),
plain_text: markdown.to_string(),
links: Vec::new(),
@ -466,7 +466,7 @@ impl WebclawMcp {
code_blocks: Vec::new(),
raw_html: None,
},
metadata: webclaw_core::Metadata {
metadata: noxa_core::Metadata {
title: None,
description: None,
author: None,
@ -482,14 +482,14 @@ impl WebclawMcp {
structured_data: Vec::new(),
};
let content_diff = webclaw_core::diff::diff(&previous, &current);
let content_diff = noxa_core::diff::diff(&previous, &current);
Ok(serde_json::to_string_pretty(&content_diff).unwrap_or_default())
}
}
}
/// Extract brand identity (colors, fonts, logo, favicon) from a website's HTML and CSS.
/// Automatically falls back to the webclaw cloud API when bot protection is detected.
/// Automatically falls back to the noxa cloud API when bot protection is detected.
#[tool]
async fn brand(&self, Parameters(params): Parameters<BrandParams>) -> Result<String, String> {
validate_url(&params.url)?;
@ -508,21 +508,21 @@ impl WebclawMcp {
return Ok(serde_json::to_string_pretty(&resp).unwrap_or_default());
} else {
return Err(format!(
"Bot protection detected on {}. Set WEBCLAW_API_KEY for automatic cloud bypass. \
Get a key at https://webclaw.io",
"Bot protection detected on {}. Set NOXA_API_KEY for automatic cloud bypass. \
Get a key at https://noxa.io",
params.url
));
}
}
let identity =
webclaw_core::brand::extract_brand(&fetch_result.html, Some(&fetch_result.url));
noxa_core::brand::extract_brand(&fetch_result.html, Some(&fetch_result.url));
Ok(serde_json::to_string_pretty(&identity).unwrap_or_default())
}
/// Run a deep research investigation on a topic or question. Requires WEBCLAW_API_KEY.
/// Saves full result to ~/.webclaw/research/ and returns the file path + key findings.
/// Run a deep research investigation on a topic or question. Requires NOXA_API_KEY.
/// Saves full result to ~/.noxa/research/ and returns the file path + key findings.
/// Checks cache first — same query returns the cached result without spending credits.
#[tool]
async fn research(
@ -532,7 +532,7 @@ impl WebclawMcp {
let cloud = self
.cloud
.as_ref()
.ok_or("Research requires WEBCLAW_API_KEY. Get a key at https://webclaw.io")?;
.ok_or("Research requires NOXA_API_KEY. Get a key at https://noxa.io")?;
let research_dir = research_dir();
let slug = slugify(&params.query);
@ -622,17 +622,17 @@ impl WebclawMcp {
Err(format!(
"Research job {job_id} timed out after ~10 minutes of polling. \
Check status manually via the webclaw API: GET /v1/research/{job_id}"
Check status manually via the noxa API: GET /v1/research/{job_id}"
))
}
/// Search the web for a query and return structured results. Requires WEBCLAW_API_KEY.
/// Search the web for a query and return structured results. Requires NOXA_API_KEY.
#[tool]
async fn search(&self, Parameters(params): Parameters<SearchParams>) -> Result<String, String> {
let cloud = self
.cloud
.as_ref()
.ok_or("Search requires WEBCLAW_API_KEY. Get a key at https://webclaw.io")?;
.ok_or("Search requires NOXA_API_KEY. Get a key at https://noxa.io")?;
let mut body = json!({ "query": params.query });
if let Some(num) = params.num_results {
@ -670,12 +670,12 @@ impl WebclawMcp {
}
#[tool_handler]
impl ServerHandler for WebclawMcp {
impl ServerHandler for NoxaMcp {
fn get_info(&self) -> ServerInfo {
ServerInfo::new(ServerCapabilities::builder().enable_tools().build())
.with_server_info(Implementation::new("webclaw-mcp", env!("CARGO_PKG_VERSION")))
.with_server_info(Implementation::new("noxa-mcp", env!("CARGO_PKG_VERSION")))
.with_instructions(String::from(
"Webclaw MCP server -- web content extraction for AI agents. \
"Noxa MCP server -- web content extraction for AI agents. \
Tools: scrape, crawl, map, batch, extract, summarize, diff, brand, research, search.",
))
}
@ -688,7 +688,7 @@ impl ServerHandler for WebclawMcp {
fn research_dir() -> std::path::PathBuf {
let dir = dirs::home_dir()
.unwrap_or_else(|| std::path::PathBuf::from("."))
.join(".webclaw")
.join(".noxa")
.join("research");
std::fs::create_dir_all(&dir).ok();
dir

View file

@ -1,6 +1,6 @@
[package]
name = "webclaw-pdf"
description = "PDF text extraction for webclaw"
name = "noxa-pdf"
description = "PDF text extraction for noxa"
version.workspace = true
edition.workspace = true
license.workspace = true

View file

@ -1,4 +1,4 @@
/// PDF text extraction for webclaw.
/// PDF text extraction for noxa.
///
/// Uses pdf-extract (backed by lopdf) to pull text from PDF bytes.
/// No OCR -- text-based PDFs only. Scanned PDFs return EmptyPdf in Auto mode.