mirror of
https://github.com/0xMassi/webclaw.git
synced 2026-05-13 17:02:36 +02:00
feat: wire ResolvedConfig into main.rs via clap ValueSource
This commit is contained in:
parent
e7583a5c51
commit
bac13fc1b5
2 changed files with 241 additions and 110 deletions
|
|
@ -134,6 +134,115 @@ pub struct ResolvedConfig {
|
||||||
pub llm_model: Option<String>,
|
pub llm_model: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
use clap::parser::ValueSource;
|
||||||
|
|
||||||
|
/// Merge CLI flags (detected via ValueSource), config file, and hard defaults
|
||||||
|
/// into a single ResolvedConfig. CLI explicit values always win.
|
||||||
|
pub fn resolve(
|
||||||
|
cli: &crate::Cli,
|
||||||
|
matches: &clap::ArgMatches,
|
||||||
|
cfg: &NoxaConfig,
|
||||||
|
) -> ResolvedConfig {
|
||||||
|
let explicit = |name: &str| {
|
||||||
|
matches.value_source(name) == Some(ValueSource::CommandLine)
|
||||||
|
};
|
||||||
|
|
||||||
|
ResolvedConfig {
|
||||||
|
format: if explicit("format") {
|
||||||
|
cli.format.clone()
|
||||||
|
} else {
|
||||||
|
cfg.format.clone().unwrap_or(crate::OutputFormat::Markdown)
|
||||||
|
},
|
||||||
|
browser: if explicit("browser") {
|
||||||
|
cli.browser.clone()
|
||||||
|
} else {
|
||||||
|
cfg.browser.clone().unwrap_or(crate::Browser::Chrome)
|
||||||
|
},
|
||||||
|
pdf_mode: if explicit("pdf_mode") {
|
||||||
|
cli.pdf_mode.clone()
|
||||||
|
} else {
|
||||||
|
cfg.pdf_mode.clone().unwrap_or(crate::PdfModeArg::Auto)
|
||||||
|
},
|
||||||
|
timeout: if explicit("timeout") {
|
||||||
|
cli.timeout
|
||||||
|
} else {
|
||||||
|
cfg.timeout.unwrap_or(30)
|
||||||
|
},
|
||||||
|
depth: if explicit("depth") {
|
||||||
|
cli.depth
|
||||||
|
} else {
|
||||||
|
cfg.depth.unwrap_or(1)
|
||||||
|
},
|
||||||
|
max_pages: if explicit("max_pages") {
|
||||||
|
cli.max_pages
|
||||||
|
} else {
|
||||||
|
cfg.max_pages.unwrap_or(20)
|
||||||
|
},
|
||||||
|
concurrency: if explicit("concurrency") {
|
||||||
|
cli.concurrency
|
||||||
|
} else {
|
||||||
|
cfg.concurrency.unwrap_or(5)
|
||||||
|
},
|
||||||
|
delay: if explicit("delay") {
|
||||||
|
cli.delay
|
||||||
|
} else {
|
||||||
|
cfg.delay.unwrap_or(100)
|
||||||
|
},
|
||||||
|
path_prefix: if explicit("path_prefix") {
|
||||||
|
cli.path_prefix.clone()
|
||||||
|
} else {
|
||||||
|
cfg.path_prefix.clone().or(cli.path_prefix.clone())
|
||||||
|
},
|
||||||
|
include_paths: if explicit("include_paths") {
|
||||||
|
cli.include_paths
|
||||||
|
.as_deref()
|
||||||
|
.map(|s| s.split(',').map(|p| p.trim().to_string()).collect())
|
||||||
|
.unwrap_or_default()
|
||||||
|
} else {
|
||||||
|
cfg.include_paths.clone().unwrap_or_default()
|
||||||
|
},
|
||||||
|
exclude_paths: if explicit("exclude_paths") {
|
||||||
|
cli.exclude_paths
|
||||||
|
.as_deref()
|
||||||
|
.map(|s| s.split(',').map(|p| p.trim().to_string()).collect())
|
||||||
|
.unwrap_or_default()
|
||||||
|
} else {
|
||||||
|
cfg.exclude_paths.clone().unwrap_or_default()
|
||||||
|
},
|
||||||
|
include_selectors: if explicit("include") {
|
||||||
|
cli.include
|
||||||
|
.as_deref()
|
||||||
|
.map(|s| s.split(',').map(|p| p.trim().to_string()).collect())
|
||||||
|
.unwrap_or_default()
|
||||||
|
} else {
|
||||||
|
cfg.include_selectors.clone().unwrap_or_default()
|
||||||
|
},
|
||||||
|
exclude_selectors: if explicit("exclude") {
|
||||||
|
cli.exclude
|
||||||
|
.as_deref()
|
||||||
|
.map(|s| s.split(',').map(|p| p.trim().to_string()).collect())
|
||||||
|
.unwrap_or_default()
|
||||||
|
} else {
|
||||||
|
cfg.exclude_selectors.clone().unwrap_or_default()
|
||||||
|
},
|
||||||
|
only_main_content: cli.only_main_content || cfg.only_main_content.unwrap_or(false),
|
||||||
|
metadata: cli.metadata || cfg.metadata.unwrap_or(false),
|
||||||
|
verbose: cli.verbose || cfg.verbose.unwrap_or(false),
|
||||||
|
use_sitemap: cli.sitemap || cfg.use_sitemap.unwrap_or(false),
|
||||||
|
raw_html: cli.raw_html,
|
||||||
|
llm_provider: if cli.llm_provider.is_some() {
|
||||||
|
cli.llm_provider.clone()
|
||||||
|
} else {
|
||||||
|
cfg.llm_provider.clone()
|
||||||
|
},
|
||||||
|
llm_model: if cli.llm_model.is_some() {
|
||||||
|
cli.llm_model.clone()
|
||||||
|
} else {
|
||||||
|
cfg.llm_model.clone()
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ use std::process;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::sync::atomic::{AtomicBool, Ordering};
|
use std::sync::atomic::{AtomicBool, Ordering};
|
||||||
|
|
||||||
use clap::{Parser, ValueEnum};
|
use clap::{CommandFactory, FromArgMatches, Parser, ValueEnum};
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
use tracing_subscriber::EnvFilter;
|
use tracing_subscriber::EnvFilter;
|
||||||
use noxa_core::{
|
use noxa_core::{
|
||||||
|
|
@ -89,6 +89,10 @@ fn warn_empty(url: &str, reason: &EmptyReason) {
|
||||||
#[derive(Parser)]
|
#[derive(Parser)]
|
||||||
#[command(name = "noxa", about = "Extract web content for LLMs", version)]
|
#[command(name = "noxa", about = "Extract web content for LLMs", version)]
|
||||||
struct Cli {
|
struct Cli {
|
||||||
|
/// Path to config.json (default: ./config.json, override with NOXA_CONFIG env var)
|
||||||
|
#[arg(long, global = true)]
|
||||||
|
config: Option<String>,
|
||||||
|
|
||||||
/// URLs to fetch (multiple allowed)
|
/// URLs to fetch (multiple allowed)
|
||||||
#[arg()]
|
#[arg()]
|
||||||
urls: Vec<String>,
|
urls: Vec<String>,
|
||||||
|
|
@ -348,7 +352,7 @@ fn init_logging(verbose: bool) {
|
||||||
/// `--proxy` sets a single static proxy (no rotation).
|
/// `--proxy` sets a single static proxy (no rotation).
|
||||||
/// `--proxy-file` loads a pool of proxies and rotates per-request.
|
/// `--proxy-file` loads a pool of proxies and rotates per-request.
|
||||||
/// `--proxy` takes priority: if both are set, only the single proxy is used.
|
/// `--proxy` takes priority: if both are set, only the single proxy is used.
|
||||||
fn build_fetch_config(cli: &Cli) -> FetchConfig {
|
fn build_fetch_config(cli: &Cli, resolved: &config::ResolvedConfig) -> FetchConfig {
|
||||||
let (proxy, proxy_pool) = if cli.proxy.is_some() {
|
let (proxy, proxy_pool) = if cli.proxy.is_some() {
|
||||||
(cli.proxy.clone(), Vec::new())
|
(cli.proxy.clone(), Vec::new())
|
||||||
} else if let Some(ref path) = cli.proxy_file {
|
} else if let Some(ref path) = cli.proxy_file {
|
||||||
|
|
@ -408,11 +412,11 @@ fn build_fetch_config(cli: &Cli) -> FetchConfig {
|
||||||
}
|
}
|
||||||
|
|
||||||
FetchConfig {
|
FetchConfig {
|
||||||
browser: cli.browser.clone().into(),
|
browser: resolved.browser.clone().into(),
|
||||||
proxy,
|
proxy,
|
||||||
proxy_pool,
|
proxy_pool,
|
||||||
timeout: std::time::Duration::from_secs(cli.timeout),
|
timeout: std::time::Duration::from_secs(resolved.timeout),
|
||||||
pdf_mode: cli.pdf_mode.clone().into(),
|
pdf_mode: resolved.pdf_mode.clone().into(),
|
||||||
headers,
|
headers,
|
||||||
..Default::default()
|
..Default::default()
|
||||||
}
|
}
|
||||||
|
|
@ -441,20 +445,12 @@ fn parse_cookie_file(path: &str) -> Result<String, String> {
|
||||||
Ok(pairs.join("; "))
|
Ok(pairs.join("; "))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn build_extraction_options(cli: &Cli) -> ExtractionOptions {
|
fn build_extraction_options(resolved: &config::ResolvedConfig) -> ExtractionOptions {
|
||||||
ExtractionOptions {
|
ExtractionOptions {
|
||||||
include_selectors: cli
|
include_selectors: resolved.include_selectors.clone(),
|
||||||
.include
|
exclude_selectors: resolved.exclude_selectors.clone(),
|
||||||
.as_deref()
|
only_main_content: resolved.only_main_content,
|
||||||
.map(|s| s.split(',').map(|s| s.trim().to_string()).collect())
|
include_raw_html: resolved.raw_html || matches!(resolved.format, OutputFormat::Html),
|
||||||
.unwrap_or_default(),
|
|
||||||
exclude_selectors: cli
|
|
||||||
.exclude
|
|
||||||
.as_deref()
|
|
||||||
.map(|s| s.split(',').map(|s| s.trim().to_string()).collect())
|
|
||||||
.unwrap_or_default(),
|
|
||||||
only_main_content: cli.only_main_content,
|
|
||||||
include_raw_html: cli.raw_html || matches!(cli.format, OutputFormat::Html),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -623,14 +619,17 @@ impl FetchOutput {
|
||||||
|
|
||||||
/// Fetch a URL and extract content, handling PDF detection automatically.
|
/// Fetch a URL and extract content, handling PDF detection automatically.
|
||||||
/// Falls back to cloud API when bot protection or JS rendering is detected.
|
/// Falls back to cloud API when bot protection or JS rendering is detected.
|
||||||
async fn fetch_and_extract(cli: &Cli) -> Result<FetchOutput, String> {
|
async fn fetch_and_extract(
|
||||||
|
cli: &Cli,
|
||||||
|
resolved: &config::ResolvedConfig,
|
||||||
|
) -> Result<FetchOutput, String> {
|
||||||
// Local sources: read and extract as HTML
|
// Local sources: read and extract as HTML
|
||||||
if cli.stdin {
|
if cli.stdin {
|
||||||
let mut buf = String::new();
|
let mut buf = String::new();
|
||||||
io::stdin()
|
io::stdin()
|
||||||
.read_to_string(&mut buf)
|
.read_to_string(&mut buf)
|
||||||
.map_err(|e| format!("failed to read stdin: {e}"))?;
|
.map_err(|e| format!("failed to read stdin: {e}"))?;
|
||||||
let options = build_extraction_options(cli);
|
let options = build_extraction_options(resolved);
|
||||||
return extract_with_options(&buf, None, &options)
|
return extract_with_options(&buf, None, &options)
|
||||||
.map(|r| FetchOutput::Local(Box::new(r)))
|
.map(|r| FetchOutput::Local(Box::new(r)))
|
||||||
.map_err(|e| format!("extraction error: {e}"));
|
.map_err(|e| format!("extraction error: {e}"));
|
||||||
|
|
@ -639,7 +638,7 @@ async fn fetch_and_extract(cli: &Cli) -> Result<FetchOutput, String> {
|
||||||
if let Some(ref path) = cli.file {
|
if let Some(ref path) = cli.file {
|
||||||
let html =
|
let html =
|
||||||
std::fs::read_to_string(path).map_err(|e| format!("failed to read {path}: {e}"))?;
|
std::fs::read_to_string(path).map_err(|e| format!("failed to read {path}: {e}"))?;
|
||||||
let options = build_extraction_options(cli);
|
let options = build_extraction_options(resolved);
|
||||||
return extract_with_options(&html, None, &options)
|
return extract_with_options(&html, None, &options)
|
||||||
.map(|r| FetchOutput::Local(Box::new(r)))
|
.map(|r| FetchOutput::Local(Box::new(r)))
|
||||||
.map_err(|e| format!("extraction error: {e}"));
|
.map_err(|e| format!("extraction error: {e}"));
|
||||||
|
|
@ -658,8 +657,8 @@ async fn fetch_and_extract(cli: &Cli) -> Result<FetchOutput, String> {
|
||||||
if cli.cloud {
|
if cli.cloud {
|
||||||
let c =
|
let c =
|
||||||
cloud_client.ok_or("--cloud requires NOXA_API_KEY (set via env or --api-key)")?;
|
cloud_client.ok_or("--cloud requires NOXA_API_KEY (set via env or --api-key)")?;
|
||||||
let options = build_extraction_options(cli);
|
let options = build_extraction_options(resolved);
|
||||||
let format_str = match cli.format {
|
let format_str = match resolved.format {
|
||||||
OutputFormat::Markdown => "markdown",
|
OutputFormat::Markdown => "markdown",
|
||||||
OutputFormat::Json => "json",
|
OutputFormat::Json => "json",
|
||||||
OutputFormat::Text => "text",
|
OutputFormat::Text => "text",
|
||||||
|
|
@ -679,9 +678,9 @@ async fn fetch_and_extract(cli: &Cli) -> Result<FetchOutput, String> {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Normal path: try local first
|
// Normal path: try local first
|
||||||
let client =
|
let client = FetchClient::new(build_fetch_config(cli, resolved))
|
||||||
FetchClient::new(build_fetch_config(cli)).map_err(|e| format!("client error: {e}"))?;
|
.map_err(|e| format!("client error: {e}"))?;
|
||||||
let options = build_extraction_options(cli);
|
let options = build_extraction_options(resolved);
|
||||||
let result = client
|
let result = client
|
||||||
.fetch_and_extract_with_options(url, &options)
|
.fetch_and_extract_with_options(url, &options)
|
||||||
.await
|
.await
|
||||||
|
|
@ -692,7 +691,7 @@ async fn fetch_and_extract(cli: &Cli) -> Result<FetchOutput, String> {
|
||||||
if !matches!(reason, EmptyReason::None) {
|
if !matches!(reason, EmptyReason::None) {
|
||||||
if let Some(ref c) = cloud_client {
|
if let Some(ref c) = cloud_client {
|
||||||
eprintln!("\x1b[36minfo:\x1b[0m falling back to cloud API...");
|
eprintln!("\x1b[36minfo:\x1b[0m falling back to cloud API...");
|
||||||
let format_str = match cli.format {
|
let format_str = match resolved.format {
|
||||||
OutputFormat::Markdown => "markdown",
|
OutputFormat::Markdown => "markdown",
|
||||||
OutputFormat::Json => "json",
|
OutputFormat::Json => "json",
|
||||||
OutputFormat::Text => "text",
|
OutputFormat::Text => "text",
|
||||||
|
|
@ -723,7 +722,7 @@ async fn fetch_and_extract(cli: &Cli) -> Result<FetchOutput, String> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Fetch raw HTML from a URL (no extraction). Used for --raw-html and brand extraction.
|
/// Fetch raw HTML from a URL (no extraction). Used for --raw-html and brand extraction.
|
||||||
async fn fetch_html(cli: &Cli) -> Result<FetchResult, String> {
|
async fn fetch_html(cli: &Cli, resolved: &config::ResolvedConfig) -> Result<FetchResult, String> {
|
||||||
if cli.stdin {
|
if cli.stdin {
|
||||||
let mut buf = String::new();
|
let mut buf = String::new();
|
||||||
io::stdin()
|
io::stdin()
|
||||||
|
|
@ -756,8 +755,8 @@ async fn fetch_html(cli: &Cli) -> Result<FetchResult, String> {
|
||||||
.ok_or("no input provided -- pass a URL, --file, or --stdin")?;
|
.ok_or("no input provided -- pass a URL, --file, or --stdin")?;
|
||||||
let url = normalize_url(raw_url);
|
let url = normalize_url(raw_url);
|
||||||
|
|
||||||
let client =
|
let client = FetchClient::new(build_fetch_config(cli, resolved))
|
||||||
FetchClient::new(build_fetch_config(cli)).map_err(|e| format!("client error: {e}"))?;
|
.map_err(|e| format!("client error: {e}"))?;
|
||||||
client
|
client
|
||||||
.fetch(&url)
|
.fetch(&url)
|
||||||
.await
|
.await
|
||||||
|
|
@ -1171,7 +1170,7 @@ fn format_progress(page: &PageResult, index: usize, max_pages: usize) -> String
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn run_crawl(cli: &Cli) -> Result<(), String> {
|
async fn run_crawl(cli: &Cli, resolved: &config::ResolvedConfig) -> Result<(), String> {
|
||||||
let url = cli
|
let url = cli
|
||||||
.urls
|
.urls
|
||||||
.first()
|
.first()
|
||||||
|
|
@ -1183,16 +1182,8 @@ async fn run_crawl(cli: &Cli) -> Result<(), String> {
|
||||||
return Err("--crawl cannot be used with --file or --stdin".into());
|
return Err("--crawl cannot be used with --file or --stdin".into());
|
||||||
}
|
}
|
||||||
|
|
||||||
let include_patterns: Vec<String> = cli
|
let include_patterns = resolved.include_paths.clone();
|
||||||
.include_paths
|
let exclude_patterns = resolved.exclude_paths.clone();
|
||||||
.as_deref()
|
|
||||||
.map(|s| s.split(',').map(|p| p.trim().to_string()).collect())
|
|
||||||
.unwrap_or_default();
|
|
||||||
let exclude_patterns: Vec<String> = cli
|
|
||||||
.exclude_paths
|
|
||||||
.as_deref()
|
|
||||||
.map(|s| s.split(',').map(|p| p.trim().to_string()).collect())
|
|
||||||
.unwrap_or_default();
|
|
||||||
|
|
||||||
// Set up streaming progress channel
|
// Set up streaming progress channel
|
||||||
let (progress_tx, mut progress_rx) = tokio::sync::broadcast::channel::<PageResult>(100);
|
let (progress_tx, mut progress_rx) = tokio::sync::broadcast::channel::<PageResult>(100);
|
||||||
|
|
@ -1212,13 +1203,13 @@ async fn run_crawl(cli: &Cli) -> Result<(), String> {
|
||||||
}
|
}
|
||||||
|
|
||||||
let config = CrawlConfig {
|
let config = CrawlConfig {
|
||||||
fetch: build_fetch_config(cli),
|
fetch: build_fetch_config(cli, resolved),
|
||||||
max_depth: cli.depth,
|
max_depth: resolved.depth,
|
||||||
max_pages: cli.max_pages,
|
max_pages: resolved.max_pages,
|
||||||
concurrency: cli.concurrency,
|
concurrency: resolved.concurrency,
|
||||||
delay: std::time::Duration::from_millis(cli.delay),
|
delay: std::time::Duration::from_millis(resolved.delay),
|
||||||
path_prefix: cli.path_prefix.clone(),
|
path_prefix: resolved.path_prefix.clone(),
|
||||||
use_sitemap: cli.sitemap,
|
use_sitemap: resolved.use_sitemap,
|
||||||
include_patterns,
|
include_patterns,
|
||||||
exclude_patterns,
|
exclude_patterns,
|
||||||
progress_tx: Some(progress_tx),
|
progress_tx: Some(progress_tx),
|
||||||
|
|
@ -1237,7 +1228,7 @@ async fn run_crawl(cli: &Cli) -> Result<(), String> {
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
let max_pages = cli.max_pages;
|
let max_pages = resolved.max_pages;
|
||||||
let completed_offset = resume_state.as_ref().map_or(0, |s| s.completed_pages);
|
let completed_offset = resume_state.as_ref().map_or(0, |s| s.completed_pages);
|
||||||
|
|
||||||
// Spawn background task to print streaming progress to stderr
|
// Spawn background task to print streaming progress to stderr
|
||||||
|
|
@ -1266,8 +1257,8 @@ async fn run_crawl(cli: &Cli) -> Result<(), String> {
|
||||||
&result.visited,
|
&result.visited,
|
||||||
&result.remaining_frontier,
|
&result.remaining_frontier,
|
||||||
completed_offset + result.pages.len(),
|
completed_offset + result.pages.len(),
|
||||||
cli.max_pages,
|
resolved.max_pages,
|
||||||
cli.depth,
|
resolved.depth,
|
||||||
)?;
|
)?;
|
||||||
eprintln!(
|
eprintln!(
|
||||||
"Crawl state saved to {} ({} pages completed). Resume with --crawl-state {}",
|
"Crawl state saved to {} ({} pages completed). Resume with --crawl-state {}",
|
||||||
|
|
@ -1299,15 +1290,15 @@ async fn run_crawl(cli: &Cli) -> Result<(), String> {
|
||||||
let mut saved = 0usize;
|
let mut saved = 0usize;
|
||||||
for page in &result.pages {
|
for page in &result.pages {
|
||||||
if let Some(ref extraction) = page.extraction {
|
if let Some(ref extraction) = page.extraction {
|
||||||
let filename = url_to_filename(&page.url, &cli.format);
|
let filename = url_to_filename(&page.url, &resolved.format);
|
||||||
let content = format_output(extraction, &cli.format, cli.metadata);
|
let content = format_output(extraction, &resolved.format, resolved.metadata);
|
||||||
write_to_file(dir, &filename, &content)?;
|
write_to_file(dir, &filename, &content)?;
|
||||||
saved += 1;
|
saved += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
eprintln!("Saved {saved} files to {}", dir.display());
|
eprintln!("Saved {saved} files to {}", dir.display());
|
||||||
} else {
|
} else {
|
||||||
print_crawl_output(&result, &cli.format, cli.metadata);
|
print_crawl_output(&result, &resolved.format, resolved.metadata);
|
||||||
}
|
}
|
||||||
|
|
||||||
eprintln!(
|
eprintln!(
|
||||||
|
|
@ -1343,7 +1334,7 @@ async fn run_crawl(cli: &Cli) -> Result<(), String> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn run_map(cli: &Cli) -> Result<(), String> {
|
async fn run_map(cli: &Cli, resolved: &config::ResolvedConfig) -> Result<(), String> {
|
||||||
let url = cli
|
let url = cli
|
||||||
.urls
|
.urls
|
||||||
.first()
|
.first()
|
||||||
|
|
@ -1351,8 +1342,8 @@ async fn run_map(cli: &Cli) -> Result<(), String> {
|
||||||
.map(|u| normalize_url(u))?;
|
.map(|u| normalize_url(u))?;
|
||||||
let url = url.as_str();
|
let url = url.as_str();
|
||||||
|
|
||||||
let client =
|
let client = FetchClient::new(build_fetch_config(cli, resolved))
|
||||||
FetchClient::new(build_fetch_config(cli)).map_err(|e| format!("client error: {e}"))?;
|
.map_err(|e| format!("client error: {e}"))?;
|
||||||
|
|
||||||
let entries = noxa_fetch::sitemap::discover(&client, url)
|
let entries = noxa_fetch::sitemap::discover(&client, url)
|
||||||
.await
|
.await
|
||||||
|
|
@ -1364,19 +1355,24 @@ async fn run_map(cli: &Cli) -> Result<(), String> {
|
||||||
eprintln!("discovered {} URLs", entries.len());
|
eprintln!("discovered {} URLs", entries.len());
|
||||||
}
|
}
|
||||||
|
|
||||||
print_map_output(&entries, &cli.format);
|
print_map_output(&entries, &resolved.format);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn run_batch(cli: &Cli, entries: &[(String, Option<String>)]) -> Result<(), String> {
|
async fn run_batch(
|
||||||
|
cli: &Cli,
|
||||||
|
resolved: &config::ResolvedConfig,
|
||||||
|
entries: &[(String, Option<String>)],
|
||||||
|
) -> Result<(), String> {
|
||||||
let client = Arc::new(
|
let client = Arc::new(
|
||||||
FetchClient::new(build_fetch_config(cli)).map_err(|e| format!("client error: {e}"))?,
|
FetchClient::new(build_fetch_config(cli, resolved))
|
||||||
|
.map_err(|e| format!("client error: {e}"))?,
|
||||||
);
|
);
|
||||||
|
|
||||||
let urls: Vec<&str> = entries.iter().map(|(u, _)| u.as_str()).collect();
|
let urls: Vec<&str> = entries.iter().map(|(u, _)| u.as_str()).collect();
|
||||||
let options = build_extraction_options(cli);
|
let options = build_extraction_options(resolved);
|
||||||
let results = client
|
let results = client
|
||||||
.fetch_and_extract_batch_with_options(&urls, cli.concurrency, &options)
|
.fetch_and_extract_batch_with_options(&urls, resolved.concurrency, &options)
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
let ok = results.iter().filter(|r| r.result.is_ok()).count();
|
let ok = results.iter().filter(|r| r.result.is_ok()).count();
|
||||||
|
|
@ -1407,15 +1403,15 @@ async fn run_batch(cli: &Cli, entries: &[(String, Option<String>)]) -> Result<()
|
||||||
let filename = custom_names
|
let filename = custom_names
|
||||||
.get(r.url.as_str())
|
.get(r.url.as_str())
|
||||||
.map(|s| s.to_string())
|
.map(|s| s.to_string())
|
||||||
.unwrap_or_else(|| url_to_filename(&r.url, &cli.format));
|
.unwrap_or_else(|| url_to_filename(&r.url, &resolved.format));
|
||||||
let content = format_output(extraction, &cli.format, cli.metadata);
|
let content = format_output(extraction, &resolved.format, resolved.metadata);
|
||||||
write_to_file(dir, &filename, &content)?;
|
write_to_file(dir, &filename, &content)?;
|
||||||
saved += 1;
|
saved += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
eprintln!("Saved {saved} files to {}", dir.display());
|
eprintln!("Saved {saved} files to {}", dir.display());
|
||||||
} else {
|
} else {
|
||||||
print_batch_output(&results, &cli.format, cli.metadata);
|
print_batch_output(&results, &resolved.format, resolved.metadata);
|
||||||
}
|
}
|
||||||
|
|
||||||
eprintln!(
|
eprintln!(
|
||||||
|
|
@ -1519,15 +1515,20 @@ fn fire_webhook(url: &str, payload: &serde_json::Value) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn run_watch(cli: &Cli, urls: &[String]) -> Result<(), String> {
|
async fn run_watch(
|
||||||
|
cli: &Cli,
|
||||||
|
resolved: &config::ResolvedConfig,
|
||||||
|
urls: &[String],
|
||||||
|
) -> Result<(), String> {
|
||||||
if urls.is_empty() {
|
if urls.is_empty() {
|
||||||
return Err("--watch requires at least one URL".into());
|
return Err("--watch requires at least one URL".into());
|
||||||
}
|
}
|
||||||
|
|
||||||
let client = Arc::new(
|
let client = Arc::new(
|
||||||
FetchClient::new(build_fetch_config(cli)).map_err(|e| format!("client error: {e}"))?,
|
FetchClient::new(build_fetch_config(cli, resolved))
|
||||||
|
.map_err(|e| format!("client error: {e}"))?,
|
||||||
);
|
);
|
||||||
let options = build_extraction_options(cli);
|
let options = build_extraction_options(resolved);
|
||||||
|
|
||||||
// Ctrl+C handler
|
// Ctrl+C handler
|
||||||
let cancelled = Arc::new(AtomicBool::new(false));
|
let cancelled = Arc::new(AtomicBool::new(false));
|
||||||
|
|
@ -1539,16 +1540,17 @@ async fn run_watch(cli: &Cli, urls: &[String]) -> Result<(), String> {
|
||||||
|
|
||||||
// Single-URL mode: preserve original behavior exactly
|
// Single-URL mode: preserve original behavior exactly
|
||||||
if urls.len() == 1 {
|
if urls.len() == 1 {
|
||||||
return run_watch_single(cli, &client, &options, &urls[0], &cancelled).await;
|
return run_watch_single(cli, resolved, &client, &options, &urls[0], &cancelled).await;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Multi-URL mode: batch fetch, diff each, report aggregate
|
// Multi-URL mode: batch fetch, diff each, report aggregate
|
||||||
run_watch_multi(cli, &client, &options, urls, &cancelled).await
|
run_watch_multi(cli, resolved, &client, &options, urls, &cancelled).await
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Original single-URL watch loop -- backward compatible.
|
/// Original single-URL watch loop -- backward compatible.
|
||||||
async fn run_watch_single(
|
async fn run_watch_single(
|
||||||
cli: &Cli,
|
cli: &Cli,
|
||||||
|
resolved: &config::ResolvedConfig,
|
||||||
client: &Arc<FetchClient>,
|
client: &Arc<FetchClient>,
|
||||||
options: &ExtractionOptions,
|
options: &ExtractionOptions,
|
||||||
url: &str,
|
url: &str,
|
||||||
|
|
@ -1585,7 +1587,7 @@ async fn run_watch_single(
|
||||||
if diff.status == ChangeStatus::Same {
|
if diff.status == ChangeStatus::Same {
|
||||||
eprintln!("[watch] No changes ({})", timestamp());
|
eprintln!("[watch] No changes ({})", timestamp());
|
||||||
} else {
|
} else {
|
||||||
print_diff_output(&diff, &cli.format);
|
print_diff_output(&diff, &resolved.format);
|
||||||
eprintln!("[watch] Changes detected! ({})", timestamp());
|
eprintln!("[watch] Changes detected! ({})", timestamp());
|
||||||
|
|
||||||
if let Some(ref cmd) = cli.on_change {
|
if let Some(ref cmd) = cli.on_change {
|
||||||
|
|
@ -1632,6 +1634,7 @@ async fn run_watch_single(
|
||||||
/// Multi-URL watch loop -- batch fetch all URLs, diff each, report aggregate.
|
/// Multi-URL watch loop -- batch fetch all URLs, diff each, report aggregate.
|
||||||
async fn run_watch_multi(
|
async fn run_watch_multi(
|
||||||
cli: &Cli,
|
cli: &Cli,
|
||||||
|
resolved: &config::ResolvedConfig,
|
||||||
client: &Arc<FetchClient>,
|
client: &Arc<FetchClient>,
|
||||||
options: &ExtractionOptions,
|
options: &ExtractionOptions,
|
||||||
urls: &[String],
|
urls: &[String],
|
||||||
|
|
@ -1641,7 +1644,7 @@ async fn run_watch_multi(
|
||||||
|
|
||||||
// Initial pass: fetch all URLs in parallel
|
// Initial pass: fetch all URLs in parallel
|
||||||
let initial_results = client
|
let initial_results = client
|
||||||
.fetch_and_extract_batch_with_options(&url_refs, cli.concurrency, options)
|
.fetch_and_extract_batch_with_options(&url_refs, resolved.concurrency, options)
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
let mut snapshots = std::collections::HashMap::new();
|
let mut snapshots = std::collections::HashMap::new();
|
||||||
|
|
@ -1681,7 +1684,7 @@ async fn run_watch_multi(
|
||||||
check_number += 1;
|
check_number += 1;
|
||||||
|
|
||||||
let current_results = client
|
let current_results = client
|
||||||
.fetch_and_extract_batch_with_options(&url_refs, cli.concurrency, options)
|
.fetch_and_extract_batch_with_options(&url_refs, resolved.concurrency, options)
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
let mut changed: Vec<serde_json::Value> = Vec::new();
|
let mut changed: Vec<serde_json::Value> = Vec::new();
|
||||||
|
|
@ -1785,7 +1788,11 @@ async fn run_watch_multi(
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn run_diff(cli: &Cli, snapshot_path: &str) -> Result<(), String> {
|
async fn run_diff(
|
||||||
|
cli: &Cli,
|
||||||
|
resolved: &config::ResolvedConfig,
|
||||||
|
snapshot_path: &str,
|
||||||
|
) -> Result<(), String> {
|
||||||
// Load previous snapshot
|
// Load previous snapshot
|
||||||
let snapshot_json = std::fs::read_to_string(snapshot_path)
|
let snapshot_json = std::fs::read_to_string(snapshot_path)
|
||||||
.map_err(|e| format!("failed to read snapshot {snapshot_path}: {e}"))?;
|
.map_err(|e| format!("failed to read snapshot {snapshot_path}: {e}"))?;
|
||||||
|
|
@ -1793,16 +1800,16 @@ async fn run_diff(cli: &Cli, snapshot_path: &str) -> Result<(), String> {
|
||||||
.map_err(|e| format!("failed to parse snapshot JSON: {e}"))?;
|
.map_err(|e| format!("failed to parse snapshot JSON: {e}"))?;
|
||||||
|
|
||||||
// Extract current version (handles PDF detection for URLs)
|
// Extract current version (handles PDF detection for URLs)
|
||||||
let new_result = fetch_and_extract(cli).await?.into_extraction()?;
|
let new_result = fetch_and_extract(cli, resolved).await?.into_extraction()?;
|
||||||
|
|
||||||
let diff = noxa_core::diff::diff(&old, &new_result);
|
let diff = noxa_core::diff::diff(&old, &new_result);
|
||||||
print_diff_output(&diff, &cli.format);
|
print_diff_output(&diff, &resolved.format);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn run_brand(cli: &Cli) -> Result<(), String> {
|
async fn run_brand(cli: &Cli, resolved: &config::ResolvedConfig) -> Result<(), String> {
|
||||||
let result = fetch_html(cli).await?;
|
let result = fetch_html(cli, resolved).await?;
|
||||||
let enriched = enrich_html_with_stylesheets(&result.html, &result.url).await;
|
let enriched = enrich_html_with_stylesheets(&result.html, &result.url).await;
|
||||||
let brand = noxa_core::brand::extract_brand(
|
let brand = noxa_core::brand::extract_brand(
|
||||||
&enriched,
|
&enriched,
|
||||||
|
|
@ -1816,12 +1823,15 @@ async fn run_brand(cli: &Cli) -> Result<(), String> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Build an LLM provider based on CLI flags, or fall back to the default chain.
|
/// Build an LLM provider based on CLI flags, or fall back to the default chain.
|
||||||
async fn build_llm_provider(cli: &Cli) -> Result<Box<dyn LlmProvider>, String> {
|
async fn build_llm_provider(
|
||||||
if let Some(ref name) = cli.llm_provider {
|
cli: &Cli,
|
||||||
|
resolved: &config::ResolvedConfig,
|
||||||
|
) -> Result<Box<dyn LlmProvider>, String> {
|
||||||
|
if let Some(ref name) = resolved.llm_provider {
|
||||||
match name.as_str() {
|
match name.as_str() {
|
||||||
"gemini" => {
|
"gemini" => {
|
||||||
let provider = noxa_llm::providers::gemini_cli::GeminiCliProvider::new(
|
let provider = noxa_llm::providers::gemini_cli::GeminiCliProvider::new(
|
||||||
cli.llm_model.clone(),
|
resolved.llm_model.clone(),
|
||||||
);
|
);
|
||||||
if !provider.is_available().await {
|
if !provider.is_available().await {
|
||||||
return Err(
|
return Err(
|
||||||
|
|
@ -1833,7 +1843,7 @@ async fn build_llm_provider(cli: &Cli) -> Result<Box<dyn LlmProvider>, String> {
|
||||||
"ollama" => {
|
"ollama" => {
|
||||||
let provider = noxa_llm::providers::ollama::OllamaProvider::new(
|
let provider = noxa_llm::providers::ollama::OllamaProvider::new(
|
||||||
cli.llm_base_url.clone(),
|
cli.llm_base_url.clone(),
|
||||||
cli.llm_model.clone(),
|
resolved.llm_model.clone(),
|
||||||
);
|
);
|
||||||
if !provider.is_available().await {
|
if !provider.is_available().await {
|
||||||
return Err("ollama is not running or unreachable".into());
|
return Err("ollama is not running or unreachable".into());
|
||||||
|
|
@ -1844,7 +1854,7 @@ async fn build_llm_provider(cli: &Cli) -> Result<Box<dyn LlmProvider>, String> {
|
||||||
let provider = noxa_llm::providers::openai::OpenAiProvider::new(
|
let provider = noxa_llm::providers::openai::OpenAiProvider::new(
|
||||||
None,
|
None,
|
||||||
cli.llm_base_url.clone(),
|
cli.llm_base_url.clone(),
|
||||||
cli.llm_model.clone(),
|
resolved.llm_model.clone(),
|
||||||
)
|
)
|
||||||
.ok_or("OPENAI_API_KEY not set")?;
|
.ok_or("OPENAI_API_KEY not set")?;
|
||||||
Ok(Box::new(provider))
|
Ok(Box::new(provider))
|
||||||
|
|
@ -1852,7 +1862,7 @@ async fn build_llm_provider(cli: &Cli) -> Result<Box<dyn LlmProvider>, String> {
|
||||||
"anthropic" => {
|
"anthropic" => {
|
||||||
let provider = noxa_llm::providers::anthropic::AnthropicProvider::new(
|
let provider = noxa_llm::providers::anthropic::AnthropicProvider::new(
|
||||||
None,
|
None,
|
||||||
cli.llm_model.clone(),
|
resolved.llm_model.clone(),
|
||||||
)
|
)
|
||||||
.ok_or("ANTHROPIC_API_KEY not set")?;
|
.ok_or("ANTHROPIC_API_KEY not set")?;
|
||||||
Ok(Box::new(provider))
|
Ok(Box::new(provider))
|
||||||
|
|
@ -1873,12 +1883,12 @@ async fn build_llm_provider(cli: &Cli) -> Result<Box<dyn LlmProvider>, String> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn run_llm(cli: &Cli) -> Result<(), String> {
|
async fn run_llm(cli: &Cli, resolved: &config::ResolvedConfig) -> Result<(), String> {
|
||||||
// Extract content from source first (handles PDF detection for URLs)
|
// Extract content from source first (handles PDF detection for URLs)
|
||||||
let result = fetch_and_extract(cli).await?.into_extraction()?;
|
let result = fetch_and_extract(cli, resolved).await?.into_extraction()?;
|
||||||
|
|
||||||
let provider = build_llm_provider(cli).await?;
|
let provider = build_llm_provider(cli, resolved).await?;
|
||||||
let model = cli.llm_model.as_deref();
|
let model = resolved.llm_model.as_deref();
|
||||||
|
|
||||||
if let Some(ref schema_input) = cli.extract_json {
|
if let Some(ref schema_input) = cli.extract_json {
|
||||||
// Support @file syntax for loading schema from file
|
// Support @file syntax for loading schema from file
|
||||||
|
|
@ -1937,12 +1947,16 @@ async fn run_llm(cli: &Cli) -> Result<(), String> {
|
||||||
|
|
||||||
/// Batch LLM extraction: fetch each URL, run LLM on extracted content, save/print results.
|
/// Batch LLM extraction: fetch each URL, run LLM on extracted content, save/print results.
|
||||||
/// URLs are processed sequentially to respect LLM provider rate limits.
|
/// URLs are processed sequentially to respect LLM provider rate limits.
|
||||||
async fn run_batch_llm(cli: &Cli, entries: &[(String, Option<String>)]) -> Result<(), String> {
|
async fn run_batch_llm(
|
||||||
let client =
|
cli: &Cli,
|
||||||
FetchClient::new(build_fetch_config(cli)).map_err(|e| format!("client error: {e}"))?;
|
resolved: &config::ResolvedConfig,
|
||||||
let options = build_extraction_options(cli);
|
entries: &[(String, Option<String>)],
|
||||||
let provider = build_llm_provider(cli).await?;
|
) -> Result<(), String> {
|
||||||
let model = cli.llm_model.as_deref();
|
let client = FetchClient::new(build_fetch_config(cli, resolved))
|
||||||
|
.map_err(|e| format!("client error: {e}"))?;
|
||||||
|
let options = build_extraction_options(resolved);
|
||||||
|
let provider = build_llm_provider(cli, resolved).await?;
|
||||||
|
let model = resolved.llm_model.as_deref();
|
||||||
|
|
||||||
// Pre-parse schema once if --extract-json is used
|
// Pre-parse schema once if --extract-json is used
|
||||||
let schema = if let Some(ref schema_input) = cli.extract_json {
|
let schema = if let Some(ref schema_input) = cli.extract_json {
|
||||||
|
|
@ -2231,12 +2245,19 @@ async fn run_research(cli: &Cli, query: &str) -> Result<(), String> {
|
||||||
async fn main() {
|
async fn main() {
|
||||||
dotenvy::dotenv().ok();
|
dotenvy::dotenv().ok();
|
||||||
|
|
||||||
let cli = Cli::parse();
|
// Use low-level API to get both typed Cli and ArgMatches for ValueSource detection.
|
||||||
init_logging(cli.verbose);
|
let matches = Cli::command().get_matches();
|
||||||
|
let cli = Cli::from_arg_matches(&matches).unwrap_or_else(|e| e.exit());
|
||||||
|
|
||||||
|
// Load config BEFORE init_logging so verbose from config takes effect.
|
||||||
|
let cfg = config::NoxaConfig::load(cli.config.as_deref());
|
||||||
|
let resolved = config::resolve(&cli, &matches, &cfg);
|
||||||
|
|
||||||
|
init_logging(resolved.verbose);
|
||||||
|
|
||||||
// --map: sitemap discovery mode
|
// --map: sitemap discovery mode
|
||||||
if cli.map {
|
if cli.map {
|
||||||
if let Err(e) = run_map(&cli).await {
|
if let Err(e) = run_map(&cli, &resolved).await {
|
||||||
eprintln!("error: {e}");
|
eprintln!("error: {e}");
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
}
|
}
|
||||||
|
|
@ -2245,7 +2266,7 @@ async fn main() {
|
||||||
|
|
||||||
// --crawl: recursive crawl mode
|
// --crawl: recursive crawl mode
|
||||||
if cli.crawl {
|
if cli.crawl {
|
||||||
if let Err(e) = run_crawl(&cli).await {
|
if let Err(e) = run_crawl(&cli, &resolved).await {
|
||||||
eprintln!("error: {e}");
|
eprintln!("error: {e}");
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
}
|
}
|
||||||
|
|
@ -2261,7 +2282,7 @@ async fn main() {
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
if let Err(e) = run_watch(&cli, &watch_urls).await {
|
if let Err(e) = run_watch(&cli, &resolved, &watch_urls).await {
|
||||||
eprintln!("error: {e}");
|
eprintln!("error: {e}");
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
}
|
}
|
||||||
|
|
@ -2270,7 +2291,7 @@ async fn main() {
|
||||||
|
|
||||||
// --diff-with: change tracking mode
|
// --diff-with: change tracking mode
|
||||||
if let Some(ref snapshot_path) = cli.diff_with {
|
if let Some(ref snapshot_path) = cli.diff_with {
|
||||||
if let Err(e) = run_diff(&cli, snapshot_path).await {
|
if let Err(e) = run_diff(&cli, &resolved, snapshot_path).await {
|
||||||
eprintln!("error: {e}");
|
eprintln!("error: {e}");
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
}
|
}
|
||||||
|
|
@ -2279,7 +2300,7 @@ async fn main() {
|
||||||
|
|
||||||
// --brand: brand identity extraction mode
|
// --brand: brand identity extraction mode
|
||||||
if cli.brand {
|
if cli.brand {
|
||||||
if let Err(e) = run_brand(&cli).await {
|
if let Err(e) = run_brand(&cli, &resolved).await {
|
||||||
eprintln!("error: {e}");
|
eprintln!("error: {e}");
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
}
|
}
|
||||||
|
|
@ -2308,11 +2329,11 @@ async fn main() {
|
||||||
// When multiple URLs are provided, run batch LLM extraction over all of them.
|
// When multiple URLs are provided, run batch LLM extraction over all of them.
|
||||||
if has_llm_flags(&cli) {
|
if has_llm_flags(&cli) {
|
||||||
if entries.len() > 1 {
|
if entries.len() > 1 {
|
||||||
if let Err(e) = run_batch_llm(&cli, &entries).await {
|
if let Err(e) = run_batch_llm(&cli, &resolved, &entries).await {
|
||||||
eprintln!("error: {e}");
|
eprintln!("error: {e}");
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
}
|
}
|
||||||
} else if let Err(e) = run_llm(&cli).await {
|
} else if let Err(e) = run_llm(&cli, &resolved).await {
|
||||||
eprintln!("error: {e}");
|
eprintln!("error: {e}");
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
}
|
}
|
||||||
|
|
@ -2321,7 +2342,7 @@ async fn main() {
|
||||||
|
|
||||||
// Multi-URL batch mode
|
// Multi-URL batch mode
|
||||||
if entries.len() > 1 {
|
if entries.len() > 1 {
|
||||||
if let Err(e) = run_batch(&cli, &entries).await {
|
if let Err(e) = run_batch(&cli, &resolved, &entries).await {
|
||||||
eprintln!("error: {e}");
|
eprintln!("error: {e}");
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
}
|
}
|
||||||
|
|
@ -2330,7 +2351,7 @@ async fn main() {
|
||||||
|
|
||||||
// --raw-html: skip extraction, dump the fetched HTML
|
// --raw-html: skip extraction, dump the fetched HTML
|
||||||
if cli.raw_html && cli.include.is_none() && cli.exclude.is_none() {
|
if cli.raw_html && cli.include.is_none() && cli.exclude.is_none() {
|
||||||
match fetch_html(&cli).await {
|
match fetch_html(&cli, &resolved).await {
|
||||||
Ok(r) => println!("{}", r.html),
|
Ok(r) => println!("{}", r.html),
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
eprintln!("error: {e}");
|
eprintln!("error: {e}");
|
||||||
|
|
@ -2341,7 +2362,7 @@ async fn main() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Single-page extraction (handles both HTML and PDF via content-type detection)
|
// Single-page extraction (handles both HTML and PDF via content-type detection)
|
||||||
match fetch_and_extract(&cli).await {
|
match fetch_and_extract(&cli, &resolved).await {
|
||||||
Ok(FetchOutput::Local(result)) => {
|
Ok(FetchOutput::Local(result)) => {
|
||||||
if let Some(ref dir) = cli.output_dir {
|
if let Some(ref dir) = cli.output_dir {
|
||||||
let url = cli
|
let url = cli
|
||||||
|
|
@ -2350,18 +2371,19 @@ async fn main() {
|
||||||
.map(|u| normalize_url(u))
|
.map(|u| normalize_url(u))
|
||||||
.unwrap_or_default();
|
.unwrap_or_default();
|
||||||
let custom_name = entries.first().and_then(|(_, name)| name.clone());
|
let custom_name = entries.first().and_then(|(_, name)| name.clone());
|
||||||
let filename = custom_name.unwrap_or_else(|| url_to_filename(&url, &cli.format));
|
let filename =
|
||||||
let content = format_output(&result, &cli.format, cli.metadata);
|
custom_name.unwrap_or_else(|| url_to_filename(&url, &resolved.format));
|
||||||
|
let content = format_output(&result, &resolved.format, resolved.metadata);
|
||||||
if let Err(e) = write_to_file(dir, &filename, &content) {
|
if let Err(e) = write_to_file(dir, &filename, &content) {
|
||||||
eprintln!("error: {e}");
|
eprintln!("error: {e}");
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
print_output(&result, &cli.format, cli.metadata);
|
print_output(&result, &resolved.format, resolved.metadata);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(FetchOutput::Cloud(resp)) => {
|
Ok(FetchOutput::Cloud(resp)) => {
|
||||||
print_cloud_output(&resp, &cli.format);
|
print_cloud_output(&resp, &resolved.format);
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
eprintln!("{e}");
|
eprintln!("{e}");
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue