mirror of
https://github.com/0xMassi/webclaw.git
synced 2026-05-13 17:02:36 +02:00
chore: rebrand webclaw to noxa
This commit is contained in:
parent
a4c351d5ae
commit
8674b60b4e
86 changed files with 781 additions and 2121 deletions
26
crates/noxa-cli/Cargo.toml
Normal file
26
crates/noxa-cli/Cargo.toml
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
[package]
|
||||
name = "noxa-cli"
|
||||
description = "CLI for extracting web content into LLM-optimized formats"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[[bin]]
|
||||
name = "noxa"
|
||||
path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
noxa-core = { workspace = true }
|
||||
noxa-fetch = { workspace = true }
|
||||
noxa-llm = { workspace = true }
|
||||
noxa-pdf = { workspace = true }
|
||||
dotenvy = { workspace = true }
|
||||
rand = "0.8"
|
||||
serde_json = { workspace = true }
|
||||
tokio = { workspace = true }
|
||||
clap = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
tracing-subscriber = { workspace = true }
|
||||
reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
|
||||
regex = "1"
|
||||
url = "2"
|
||||
80
crates/noxa-cli/src/cloud.rs
Normal file
80
crates/noxa-cli/src/cloud.rs
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
/// Cloud API client for automatic fallback when local extraction fails.
|
||||
///
|
||||
/// When NOXA_API_KEY is set (or --api-key is passed), the CLI can fall back
|
||||
/// to api.noxa.io for bot-protected or JS-rendered sites. With --cloud flag,
|
||||
/// all requests go through the cloud API directly.
|
||||
///
|
||||
/// NOTE: The canonical, full-featured cloud module lives in noxa-mcp/src/cloud.rs
|
||||
/// (smart_fetch, bot detection, JS rendering checks). This is the minimal subset
|
||||
/// needed by the CLI. Kept separate to avoid pulling in rmcp via noxa-mcp.
|
||||
/// and adding noxa-mcp as a dependency would pull in rmcp.
|
||||
use serde_json::{Value, json};
|
||||
|
||||
const API_BASE: &str = "https://api.noxa.io/v1";
|
||||
|
||||
pub struct CloudClient {
|
||||
api_key: String,
|
||||
http: reqwest::Client,
|
||||
}
|
||||
|
||||
impl CloudClient {
|
||||
/// Create from explicit key or NOXA_API_KEY env var.
|
||||
pub fn new(explicit_key: Option<&str>) -> Option<Self> {
|
||||
let key = explicit_key
|
||||
.map(String::from)
|
||||
.or_else(|| std::env::var("NOXA_API_KEY").ok())
|
||||
.filter(|k| !k.is_empty())?;
|
||||
|
||||
Some(Self {
|
||||
api_key: key,
|
||||
http: reqwest::Client::new(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Scrape via the cloud API.
|
||||
pub async fn scrape(
|
||||
&self,
|
||||
url: &str,
|
||||
formats: &[&str],
|
||||
include_selectors: &[String],
|
||||
exclude_selectors: &[String],
|
||||
only_main_content: bool,
|
||||
) -> Result<Value, String> {
|
||||
let mut body = json!({
|
||||
"url": url,
|
||||
"formats": formats,
|
||||
});
|
||||
if only_main_content {
|
||||
body["only_main_content"] = json!(true);
|
||||
}
|
||||
if !include_selectors.is_empty() {
|
||||
body["include_selectors"] = json!(include_selectors);
|
||||
}
|
||||
if !exclude_selectors.is_empty() {
|
||||
body["exclude_selectors"] = json!(exclude_selectors);
|
||||
}
|
||||
self.post("scrape", body).await
|
||||
}
|
||||
|
||||
async fn post(&self, endpoint: &str, body: Value) -> Result<Value, String> {
|
||||
let resp = self
|
||||
.http
|
||||
.post(format!("{API_BASE}/{endpoint}"))
|
||||
.header("Authorization", format!("Bearer {}", self.api_key))
|
||||
.json(&body)
|
||||
.timeout(std::time::Duration::from_secs(120))
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("cloud API request failed: {e}"))?;
|
||||
|
||||
let status = resp.status();
|
||||
if !status.is_success() {
|
||||
let text = resp.text().await.unwrap_or_default();
|
||||
return Err(format!("cloud API error {status}: {text}"));
|
||||
}
|
||||
|
||||
resp.json::<Value>()
|
||||
.await
|
||||
.map_err(|e| format!("cloud API response parse failed: {e}"))
|
||||
}
|
||||
}
|
||||
2458
crates/noxa-cli/src/main.rs
Normal file
2458
crates/noxa-cli/src/main.rs
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue