mirror of
https://github.com/0xMassi/webclaw.git
synced 2026-04-25 00:06:21 +02:00
feat: structured data in markdown/LLM output + v0.3.6
__NEXT_DATA__, SvelteKit, and JSON-LD now appear as a ## Structured Data section in -f markdown and -f llm output. Works with --only-main-content and all extraction flags. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
b219fc3648
commit
344eea74d9
5 changed files with 37 additions and 7 deletions
10
CHANGELOG.md
10
CHANGELOG.md
|
|
@ -3,6 +3,16 @@
|
|||
All notable changes to webclaw are documented here.
|
||||
Format follows [Keep a Changelog](https://keepachangelog.com/).
|
||||
|
||||
## [0.3.6] — 2026-04-02
|
||||
|
||||
### Added
|
||||
- **Structured data in markdown/LLM output**: `__NEXT_DATA__`, SvelteKit, and JSON-LD data now appears as a `## Structured Data` section with a JSON code block at the end of `-f markdown` and `-f llm` output. Works with `--only-main-content` and all other flags.
|
||||
|
||||
### Fixed
|
||||
- **Homebrew CI**: formula now updates all 4 platform checksums after Docker build completes, preventing SHA mismatch on Linux installs (#12).
|
||||
|
||||
---
|
||||
|
||||
## [0.3.5] — 2026-04-02
|
||||
|
||||
### Added
|
||||
|
|
|
|||
12
Cargo.lock
generated
12
Cargo.lock
generated
|
|
@ -3055,7 +3055,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-cli"
|
||||
version = "0.3.5"
|
||||
version = "0.3.6"
|
||||
dependencies = [
|
||||
"clap",
|
||||
"dotenvy",
|
||||
|
|
@ -3075,7 +3075,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-core"
|
||||
version = "0.3.5"
|
||||
version = "0.3.6"
|
||||
dependencies = [
|
||||
"ego-tree",
|
||||
"once_cell",
|
||||
|
|
@ -3093,7 +3093,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-fetch"
|
||||
version = "0.3.5"
|
||||
version = "0.3.6"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"calamine",
|
||||
|
|
@ -3115,7 +3115,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-llm"
|
||||
version = "0.3.5"
|
||||
version = "0.3.6"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"reqwest",
|
||||
|
|
@ -3128,7 +3128,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-mcp"
|
||||
version = "0.3.5"
|
||||
version = "0.3.6"
|
||||
dependencies = [
|
||||
"dotenvy",
|
||||
"reqwest",
|
||||
|
|
@ -3148,7 +3148,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-pdf"
|
||||
version = "0.3.5"
|
||||
version = "0.3.6"
|
||||
dependencies = [
|
||||
"pdf-extract",
|
||||
"thiserror",
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ resolver = "2"
|
|||
members = ["crates/*"]
|
||||
|
||||
[workspace.package]
|
||||
version = "0.3.5"
|
||||
version = "0.3.6"
|
||||
edition = "2024"
|
||||
license = "AGPL-3.0"
|
||||
repository = "https://github.com/0xMassi/webclaw"
|
||||
|
|
|
|||
|
|
@ -535,6 +535,13 @@ fn format_output(result: &ExtractionResult, format: &OutputFormat, show_metadata
|
|||
out.push_str(&format_frontmatter(&result.metadata));
|
||||
}
|
||||
out.push_str(&result.content.markdown);
|
||||
if !result.structured_data.is_empty() {
|
||||
out.push_str("\n\n## Structured Data\n\n```json\n");
|
||||
out.push_str(
|
||||
&serde_json::to_string_pretty(&result.structured_data).unwrap_or_default(),
|
||||
);
|
||||
out.push_str("\n```");
|
||||
}
|
||||
out
|
||||
}
|
||||
OutputFormat::Json => serde_json::to_string_pretty(result).expect("serialization failed"),
|
||||
|
|
@ -838,6 +845,12 @@ fn print_output(result: &ExtractionResult, format: &OutputFormat, show_metadata:
|
|||
print!("{}", format_frontmatter(&result.metadata));
|
||||
}
|
||||
println!("{}", result.content.markdown);
|
||||
if !result.structured_data.is_empty() {
|
||||
println!(
|
||||
"\n## Structured Data\n\n```json\n{}\n```",
|
||||
serde_json::to_string_pretty(&result.structured_data).unwrap_or_default()
|
||||
);
|
||||
}
|
||||
}
|
||||
OutputFormat::Json => {
|
||||
// serde_json::to_string_pretty won't fail on our types
|
||||
|
|
|
|||
|
|
@ -45,6 +45,13 @@ pub fn to_llm_text(result: &ExtractionResult, url: Option<&str>) -> String {
|
|||
}
|
||||
}
|
||||
|
||||
// -- 4. Structured data (NEXT_DATA, SvelteKit, JSON-LD) --
|
||||
if !result.structured_data.is_empty() {
|
||||
out.push_str("\n\n## Structured Data\n\n```json\n");
|
||||
out.push_str(&serde_json::to_string_pretty(&result.structured_data).unwrap_or_default());
|
||||
out.push_str("\n```");
|
||||
}
|
||||
|
||||
out.trim().to_string()
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue