/// Shared noise detection for web content extraction. /// /// Identifies elements that don't contribute to main content: /// navigation, sidebars, footers, ads, cookie banners, modals, etc. /// Used by both the extractor (candidate filtering) and the markdown /// converter (output-time stripping). use scraper::ElementRef; const NOISE_TAGS: &[&str] = &[ "script", "style", "noscript", "iframe", "svg", "nav", "aside", "footer", "header", "video", "audio", "canvas", // NOTE: