mirror of
https://github.com/0xMassi/webclaw.git
synced 2026-04-25 00:06:21 +02:00
feat: extract __NEXT_DATA__ into structured_data
Next.js pages embed server-rendered data in <script id="__NEXT_DATA__">. Now extracted as structured JSON (pageProps) in the structured_data field. Tested on 45 sites — 13 return rich structured data including prices, product info, and page state not visible in the DOM. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
4e81c3430d
commit
8d29382b25
5 changed files with 72 additions and 10 deletions
12
Cargo.lock
generated
12
Cargo.lock
generated
|
|
@ -3055,7 +3055,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-cli"
|
||||
version = "0.3.4"
|
||||
version = "0.3.5"
|
||||
dependencies = [
|
||||
"clap",
|
||||
"dotenvy",
|
||||
|
|
@ -3075,7 +3075,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-core"
|
||||
version = "0.3.4"
|
||||
version = "0.3.5"
|
||||
dependencies = [
|
||||
"ego-tree",
|
||||
"once_cell",
|
||||
|
|
@ -3093,7 +3093,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-fetch"
|
||||
version = "0.3.4"
|
||||
version = "0.3.5"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"calamine",
|
||||
|
|
@ -3115,7 +3115,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-llm"
|
||||
version = "0.3.4"
|
||||
version = "0.3.5"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"reqwest",
|
||||
|
|
@ -3128,7 +3128,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-mcp"
|
||||
version = "0.3.4"
|
||||
version = "0.3.5"
|
||||
dependencies = [
|
||||
"dotenvy",
|
||||
"reqwest",
|
||||
|
|
@ -3148,7 +3148,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "webclaw-pdf"
|
||||
version = "0.3.4"
|
||||
version = "0.3.5"
|
||||
dependencies = [
|
||||
"pdf-extract",
|
||||
"thiserror",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue