feat: structured data in markdown/LLM output + v0.3.6

__NEXT_DATA__, SvelteKit, and JSON-LD now appear as a
## Structured Data section in -f markdown and -f llm output.
Works with --only-main-content and all extraction flags.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Valerio 2026-04-02 19:16:56 +02:00
parent b219fc3648
commit 344eea74d9
5 changed files with 37 additions and 7 deletions

View file

@ -3,6 +3,16 @@
All notable changes to webclaw are documented here. All notable changes to webclaw are documented here.
Format follows [Keep a Changelog](https://keepachangelog.com/). Format follows [Keep a Changelog](https://keepachangelog.com/).
## [0.3.6] — 2026-04-02
### Added
- **Structured data in markdown/LLM output**: `__NEXT_DATA__`, SvelteKit, and JSON-LD data now appears as a `## Structured Data` section with a JSON code block at the end of `-f markdown` and `-f llm` output. Works with `--only-main-content` and all other flags.
### Fixed
- **Homebrew CI**: formula now updates all 4 platform checksums after Docker build completes, preventing SHA mismatch on Linux installs (#12).
---
## [0.3.5] — 2026-04-02 ## [0.3.5] — 2026-04-02
### Added ### Added

12
Cargo.lock generated
View file

@ -3055,7 +3055,7 @@ dependencies = [
[[package]] [[package]]
name = "webclaw-cli" name = "webclaw-cli"
version = "0.3.5" version = "0.3.6"
dependencies = [ dependencies = [
"clap", "clap",
"dotenvy", "dotenvy",
@ -3075,7 +3075,7 @@ dependencies = [
[[package]] [[package]]
name = "webclaw-core" name = "webclaw-core"
version = "0.3.5" version = "0.3.6"
dependencies = [ dependencies = [
"ego-tree", "ego-tree",
"once_cell", "once_cell",
@ -3093,7 +3093,7 @@ dependencies = [
[[package]] [[package]]
name = "webclaw-fetch" name = "webclaw-fetch"
version = "0.3.5" version = "0.3.6"
dependencies = [ dependencies = [
"bytes", "bytes",
"calamine", "calamine",
@ -3115,7 +3115,7 @@ dependencies = [
[[package]] [[package]]
name = "webclaw-llm" name = "webclaw-llm"
version = "0.3.5" version = "0.3.6"
dependencies = [ dependencies = [
"async-trait", "async-trait",
"reqwest", "reqwest",
@ -3128,7 +3128,7 @@ dependencies = [
[[package]] [[package]]
name = "webclaw-mcp" name = "webclaw-mcp"
version = "0.3.5" version = "0.3.6"
dependencies = [ dependencies = [
"dotenvy", "dotenvy",
"reqwest", "reqwest",
@ -3148,7 +3148,7 @@ dependencies = [
[[package]] [[package]]
name = "webclaw-pdf" name = "webclaw-pdf"
version = "0.3.5" version = "0.3.6"
dependencies = [ dependencies = [
"pdf-extract", "pdf-extract",
"thiserror", "thiserror",

View file

@ -3,7 +3,7 @@ resolver = "2"
members = ["crates/*"] members = ["crates/*"]
[workspace.package] [workspace.package]
version = "0.3.5" version = "0.3.6"
edition = "2024" edition = "2024"
license = "AGPL-3.0" license = "AGPL-3.0"
repository = "https://github.com/0xMassi/webclaw" repository = "https://github.com/0xMassi/webclaw"

View file

@ -535,6 +535,13 @@ fn format_output(result: &ExtractionResult, format: &OutputFormat, show_metadata
out.push_str(&format_frontmatter(&result.metadata)); out.push_str(&format_frontmatter(&result.metadata));
} }
out.push_str(&result.content.markdown); out.push_str(&result.content.markdown);
if !result.structured_data.is_empty() {
out.push_str("\n\n## Structured Data\n\n```json\n");
out.push_str(
&serde_json::to_string_pretty(&result.structured_data).unwrap_or_default(),
);
out.push_str("\n```");
}
out out
} }
OutputFormat::Json => serde_json::to_string_pretty(result).expect("serialization failed"), OutputFormat::Json => serde_json::to_string_pretty(result).expect("serialization failed"),
@ -838,6 +845,12 @@ fn print_output(result: &ExtractionResult, format: &OutputFormat, show_metadata:
print!("{}", format_frontmatter(&result.metadata)); print!("{}", format_frontmatter(&result.metadata));
} }
println!("{}", result.content.markdown); println!("{}", result.content.markdown);
if !result.structured_data.is_empty() {
println!(
"\n## Structured Data\n\n```json\n{}\n```",
serde_json::to_string_pretty(&result.structured_data).unwrap_or_default()
);
}
} }
OutputFormat::Json => { OutputFormat::Json => {
// serde_json::to_string_pretty won't fail on our types // serde_json::to_string_pretty won't fail on our types

View file

@ -45,6 +45,13 @@ pub fn to_llm_text(result: &ExtractionResult, url: Option<&str>) -> String {
} }
} }
// -- 4. Structured data (NEXT_DATA, SvelteKit, JSON-LD) --
if !result.structured_data.is_empty() {
out.push_str("\n\n## Structured Data\n\n```json\n");
out.push_str(&serde_json::to_string_pretty(&result.structured_data).unwrap_or_default());
out.push_str("\n```");
}
out.trim().to_string() out.trim().to_string()
} }