diff --git a/CHANGELOG.md b/CHANGELOG.md index 27e0966..8e8aecb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,16 @@ All notable changes to webclaw are documented here. Format follows [Keep a Changelog](https://keepachangelog.com/). +## [0.3.6] — 2026-04-02 + +### Added +- **Structured data in markdown/LLM output**: `__NEXT_DATA__`, SvelteKit, and JSON-LD data now appears as a `## Structured Data` section with a JSON code block at the end of `-f markdown` and `-f llm` output. Works with `--only-main-content` and all other flags. + +### Fixed +- **Homebrew CI**: formula now updates all 4 platform checksums after Docker build completes, preventing SHA mismatch on Linux installs (#12). + +--- + ## [0.3.5] — 2026-04-02 ### Added diff --git a/Cargo.lock b/Cargo.lock index 37eebb2..740cbe9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3055,7 +3055,7 @@ dependencies = [ [[package]] name = "webclaw-cli" -version = "0.3.5" +version = "0.3.6" dependencies = [ "clap", "dotenvy", @@ -3075,7 +3075,7 @@ dependencies = [ [[package]] name = "webclaw-core" -version = "0.3.5" +version = "0.3.6" dependencies = [ "ego-tree", "once_cell", @@ -3093,7 +3093,7 @@ dependencies = [ [[package]] name = "webclaw-fetch" -version = "0.3.5" +version = "0.3.6" dependencies = [ "bytes", "calamine", @@ -3115,7 +3115,7 @@ dependencies = [ [[package]] name = "webclaw-llm" -version = "0.3.5" +version = "0.3.6" dependencies = [ "async-trait", "reqwest", @@ -3128,7 +3128,7 @@ dependencies = [ [[package]] name = "webclaw-mcp" -version = "0.3.5" +version = "0.3.6" dependencies = [ "dotenvy", "reqwest", @@ -3148,7 +3148,7 @@ dependencies = [ [[package]] name = "webclaw-pdf" -version = "0.3.5" +version = "0.3.6" dependencies = [ "pdf-extract", "thiserror", diff --git a/Cargo.toml b/Cargo.toml index a4f1dd4..f45fb12 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ resolver = "2" members = ["crates/*"] [workspace.package] -version = "0.3.5" +version = "0.3.6" edition = "2024" license = "AGPL-3.0" repository = "https://github.com/0xMassi/webclaw" diff --git a/crates/webclaw-cli/src/main.rs b/crates/webclaw-cli/src/main.rs index 24609e3..3e5ab9f 100644 --- a/crates/webclaw-cli/src/main.rs +++ b/crates/webclaw-cli/src/main.rs @@ -535,6 +535,13 @@ fn format_output(result: &ExtractionResult, format: &OutputFormat, show_metadata out.push_str(&format_frontmatter(&result.metadata)); } out.push_str(&result.content.markdown); + if !result.structured_data.is_empty() { + out.push_str("\n\n## Structured Data\n\n```json\n"); + out.push_str( + &serde_json::to_string_pretty(&result.structured_data).unwrap_or_default(), + ); + out.push_str("\n```"); + } out } OutputFormat::Json => serde_json::to_string_pretty(result).expect("serialization failed"), @@ -838,6 +845,12 @@ fn print_output(result: &ExtractionResult, format: &OutputFormat, show_metadata: print!("{}", format_frontmatter(&result.metadata)); } println!("{}", result.content.markdown); + if !result.structured_data.is_empty() { + println!( + "\n## Structured Data\n\n```json\n{}\n```", + serde_json::to_string_pretty(&result.structured_data).unwrap_or_default() + ); + } } OutputFormat::Json => { // serde_json::to_string_pretty won't fail on our types diff --git a/crates/webclaw-core/src/llm/mod.rs b/crates/webclaw-core/src/llm/mod.rs index d34f925..126558f 100644 --- a/crates/webclaw-core/src/llm/mod.rs +++ b/crates/webclaw-core/src/llm/mod.rs @@ -45,6 +45,13 @@ pub fn to_llm_text(result: &ExtractionResult, url: Option<&str>) -> String { } } + // -- 4. Structured data (NEXT_DATA, SvelteKit, JSON-LD) -- + if !result.structured_data.is_empty() { + out.push_str("\n\n## Structured Data\n\n```json\n"); + out.push_str(&serde_json::to_string_pretty(&result.structured_data).unwrap_or_default()); + out.push_str("\n```"); + } + out.trim().to_string() }