mirror of
https://github.com/0xMassi/webclaw.git
synced 2026-04-27 17:26:21 +02:00
chore: rebrand webclaw to noxa
This commit is contained in:
parent
a4c351d5ae
commit
8674b60b4e
86 changed files with 781 additions and 2121 deletions
80
crates/noxa-cli/src/cloud.rs
Normal file
80
crates/noxa-cli/src/cloud.rs
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
/// Cloud API client for automatic fallback when local extraction fails.
|
||||
///
|
||||
/// When NOXA_API_KEY is set (or --api-key is passed), the CLI can fall back
|
||||
/// to api.noxa.io for bot-protected or JS-rendered sites. With --cloud flag,
|
||||
/// all requests go through the cloud API directly.
|
||||
///
|
||||
/// NOTE: The canonical, full-featured cloud module lives in noxa-mcp/src/cloud.rs
|
||||
/// (smart_fetch, bot detection, JS rendering checks). This is the minimal subset
|
||||
/// needed by the CLI. Kept separate to avoid pulling in rmcp via noxa-mcp.
|
||||
/// and adding noxa-mcp as a dependency would pull in rmcp.
|
||||
use serde_json::{Value, json};
|
||||
|
||||
const API_BASE: &str = "https://api.noxa.io/v1";
|
||||
|
||||
pub struct CloudClient {
|
||||
api_key: String,
|
||||
http: reqwest::Client,
|
||||
}
|
||||
|
||||
impl CloudClient {
|
||||
/// Create from explicit key or NOXA_API_KEY env var.
|
||||
pub fn new(explicit_key: Option<&str>) -> Option<Self> {
|
||||
let key = explicit_key
|
||||
.map(String::from)
|
||||
.or_else(|| std::env::var("NOXA_API_KEY").ok())
|
||||
.filter(|k| !k.is_empty())?;
|
||||
|
||||
Some(Self {
|
||||
api_key: key,
|
||||
http: reqwest::Client::new(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Scrape via the cloud API.
|
||||
pub async fn scrape(
|
||||
&self,
|
||||
url: &str,
|
||||
formats: &[&str],
|
||||
include_selectors: &[String],
|
||||
exclude_selectors: &[String],
|
||||
only_main_content: bool,
|
||||
) -> Result<Value, String> {
|
||||
let mut body = json!({
|
||||
"url": url,
|
||||
"formats": formats,
|
||||
});
|
||||
if only_main_content {
|
||||
body["only_main_content"] = json!(true);
|
||||
}
|
||||
if !include_selectors.is_empty() {
|
||||
body["include_selectors"] = json!(include_selectors);
|
||||
}
|
||||
if !exclude_selectors.is_empty() {
|
||||
body["exclude_selectors"] = json!(exclude_selectors);
|
||||
}
|
||||
self.post("scrape", body).await
|
||||
}
|
||||
|
||||
async fn post(&self, endpoint: &str, body: Value) -> Result<Value, String> {
|
||||
let resp = self
|
||||
.http
|
||||
.post(format!("{API_BASE}/{endpoint}"))
|
||||
.header("Authorization", format!("Bearer {}", self.api_key))
|
||||
.json(&body)
|
||||
.timeout(std::time::Duration::from_secs(120))
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("cloud API request failed: {e}"))?;
|
||||
|
||||
let status = resp.status();
|
||||
if !status.is_success() {
|
||||
let text = resp.text().await.unwrap_or_default();
|
||||
return Err(format!("cloud API error {status}: {text}"));
|
||||
}
|
||||
|
||||
resp.json::<Value>()
|
||||
.await
|
||||
.map_err(|e| format!("cloud API response parse failed: {e}"))
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue