/// Extract JSON-LD structured data from HTML. /// /// Parses ` let content_start = abs_start + tag_end_offset + 1; let remaining = &html[content_start..]; let Some(close_offset) = remaining.to_lowercase().find("") else { search_from = content_start; continue; }; let json_str = remaining[..close_offset].trim(); search_from = content_start + close_offset + 9; if json_str.is_empty() { continue; } // Parse — some sites have arrays at top level match serde_json::from_str::(json_str) { Ok(Value::Array(arr)) => results.extend(arr), Ok(val) => results.push(val), Err(_) => {} } } results } #[cfg(test)] mod tests { use super::*; #[test] fn extracts_single_json_ld() { let html = r#" "#; let results = extract_json_ld(html); assert_eq!(results.len(), 1); assert_eq!(results[0]["@type"], "Product"); assert_eq!(results[0]["name"], "Test"); } #[test] fn extracts_multiple_json_ld_blocks() { let html = r#" "#; let results = extract_json_ld(html); assert_eq!(results.len(), 2); assert_eq!(results[0]["@type"], "WebSite"); assert_eq!(results[1]["@type"], "Product"); } #[test] fn handles_array_json_ld() { let html = r#" "#; let results = extract_json_ld(html); assert_eq!(results.len(), 2); } #[test] fn skips_invalid_json() { let html = r#" "#; let results = extract_json_ld(html); assert_eq!(results.len(), 1); assert_eq!(results[0]["name"], "Valid"); } #[test] fn ignores_regular_script_tags() { let html = r#" "#; let results = extract_json_ld(html); assert_eq!(results.len(), 1); } #[test] fn handles_no_json_ld() { let html = "

No structured data here

"; let results = extract_json_ld(html); assert!(results.is_empty()); } #[test] fn case_insensitive_type() { let html = r#" "#; let results = extract_json_ld(html); assert_eq!(results.len(), 1); } #[test] fn handles_whitespace_in_json() { let html = r#" "#; let results = extract_json_ld(html); assert_eq!(results.len(), 1); assert_eq!(results[0]["name"], "Test"); } #[test] fn empty_script_tag_skipped() { let html = r#" "#; let results = extract_json_ld(html); assert_eq!(results.len(), 1); } }