From 217bfe088bc177057939e26e0aecb9690be89322 Mon Sep 17 00:00:00 2001 From: Valerio Date: Thu, 4 Jun 2026 16:16:08 +0200 Subject: [PATCH] feat(reddit): parse old.reddit.com HTML instead of the dead .json API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reddit blocked unauthenticated `.json` access, so the previous extractor returned block pages or timed out on every thread. Switch to parsing old.reddit.com's server-rendered HTML, which needs no API key or JS. Fetch layer: - Rewrite every Reddit host to old.reddit.com before fetching; drop all `.json` URL handling and the JSON response parser. Extraction (webclaw-core::reddit): - New HTML parser producing a typed post + nested comment tree. - Comments nest structurally (.comment > .child > .sitetable > .comment); old.reddit omits a usable depth attribute, so the tree is walked recursively. Bodies live in .entry > form > .usertext-body > .md. - Post metadata: title, author, subreddit, score, comment count (data-comments-count), self-vs-link (self class / self.* domain), flair, self-text body. - Comment scores read the .score.unvoted title (the displayed value, not the ±1 vote-state siblings); hidden scores are None, not 0. - Deleted comments are kept in place so their replies aren't orphaned; "load more comments" stubs are skipped. Markdown output: - Reply nesting via blockquote depth (avoids 4-space indentation turning text and code fences into broken indented-code blocks). - Links keep their target as [text](url); root-relative reddit links resolve against old.reddit.com. Nested lists indent correctly. - A recognised but unparseable /comments/ page returns no content rather than falling through to generic extraction of Reddit chrome. Tests: regression suite runs against real old.reddit.com fixtures (testdata/reddit/), the ground truth that surfaced the parsing and markdown bugs synthetic HTML had hidden. Fixtures are excluded from the published crate. --- crates/webclaw-core/Cargo.toml | 4 + crates/webclaw-core/src/lib.rs | 19 + crates/webclaw-core/src/reddit.rs | 968 ++++++++++++++++++ .../reddit/askreddit_deep_morechildren.html | 596 +++++++++++ .../testdata/reddit/ebpf_6comments.html | 82 ++ .../testdata/reddit/elixir_60comments.html | 312 ++++++ .../testdata/reddit/pandas_34comments.html | 227 ++++ .../reddit/rust_selfpost_36comments.html | 234 +++++ crates/webclaw-fetch/src/client.rs | 65 +- crates/webclaw-fetch/src/extractors/reddit.rs | 208 +--- crates/webclaw-fetch/src/reddit.rs | 198 +--- 11 files changed, 2522 insertions(+), 391 deletions(-) create mode 100644 crates/webclaw-core/src/reddit.rs create mode 100644 crates/webclaw-core/testdata/reddit/askreddit_deep_morechildren.html create mode 100644 crates/webclaw-core/testdata/reddit/ebpf_6comments.html create mode 100644 crates/webclaw-core/testdata/reddit/elixir_60comments.html create mode 100644 crates/webclaw-core/testdata/reddit/pandas_34comments.html create mode 100644 crates/webclaw-core/testdata/reddit/rust_selfpost_36comments.html diff --git a/crates/webclaw-core/Cargo.toml b/crates/webclaw-core/Cargo.toml index 19b2e08..5c2743a 100644 --- a/crates/webclaw-core/Cargo.toml +++ b/crates/webclaw-core/Cargo.toml @@ -4,6 +4,10 @@ description = "Pure HTML content extraction engine for LLMs" version.workspace = true edition.workspace = true license.workspace = true +# Reddit regression fixtures are real old.reddit.com pages read at test time; +# they're large and only needed to run the test suite from the repo, so keep +# them out of the published crate. +exclude = ["testdata/reddit/*.html"] [features] default = ["quickjs"] diff --git a/crates/webclaw-core/src/lib.rs b/crates/webclaw-core/src/lib.rs index 1ddd1f0..8cdfbbb 100644 --- a/crates/webclaw-core/src/lib.rs +++ b/crates/webclaw-core/src/lib.rs @@ -17,6 +17,7 @@ pub mod markdown; pub mod metadata; #[allow(dead_code)] pub(crate) mod noise; +pub mod reddit; pub mod structured_data; pub mod types; pub mod youtube; @@ -94,6 +95,24 @@ fn extract_with_options_inner( return Err(ExtractError::NoContent); } + // Reddit fast path: parse old.reddit.com HTML directly. + // The fetch layer rewrites all Reddit hosts to old.reddit.com before + // calling extract, so we always get stable server-rendered HTML here. + if let Some(u) = url + && reddit::is_reddit_url(u) + { + if let Some(result) = reddit::try_extract(html, u) { + return Ok(result); + } + // A recognised comment thread that we couldn't parse (Reddit markup + // change, or a block/challenge page) — don't fall through to generic + // extraction, which would emit Reddit nav/sidebar chrome. Listings + // and profiles (no `/comments/`) intentionally fall through below. + if u.contains("/comments/") { + return Err(ExtractError::NoContent); + } + } + // YouTube fast path: if the URL is a YouTube video page, try extracting // structured metadata from ytInitialPlayerResponse before DOM scoring. // This gives LLMs a clean, structured view of video metadata. diff --git a/crates/webclaw-core/src/reddit.rs b/crates/webclaw-core/src/reddit.rs new file mode 100644 index 0000000..ab1d394 --- /dev/null +++ b/crates/webclaw-core/src/reddit.rs @@ -0,0 +1,968 @@ +//! Reddit thread extractor — parses old.reddit.com HTML directly. +//! +//! old.reddit.com serves fully server-rendered HTML with stable class names +//! and data attributes. No JS, no API key, no `.json` trick needed. + +use scraper::{ElementRef, Html, Selector}; +use serde::Serialize; + +use crate::{Content, DomainData, DomainType, ExtractionResult, Metadata}; + +// ─── Public types ────────────────────────────────────────────────────────────── + +#[derive(Serialize)] +pub struct RedditPost { + pub id: Option, + pub title: String, + pub author: String, + pub subreddit: Option, + pub score: i64, + pub body: Option, + pub num_comments: usize, + pub permalink: String, + pub url: Option, + pub is_self: bool, + pub flair: Option, + pub created_utc: Option, +} + +#[derive(Serialize)] +pub struct RedditComment { + pub id: Option, + pub author: String, + pub body: String, + /// `None` when Reddit hides the score (fresh comments). Distinct from + /// `Some(0)`, which is a real net-zero score. + pub score: Option, + pub depth: usize, + pub is_op: bool, + pub created_utc: Option, + pub replies: Vec, +} + +#[derive(Serialize)] +pub struct RedditThread { + #[serde(rename = "url")] + pub source_url: String, + pub post: Option, + pub comments: Vec, +} + +// ─── Public API ──────────────────────────────────────────────────────────────── + +pub fn is_reddit_url(url: &str) -> bool { + matches!( + host_of(url), + "reddit.com" | "www.reddit.com" | "old.reddit.com" | "np.reddit.com" | "new.reddit.com" + ) +} + +/// Try to parse a Reddit thread from old.reddit.com HTML. +/// Returns `None` if the page doesn't have recognisable Reddit structure. +pub fn try_extract_thread(html: &str, url: &str) -> Option { + if !url.contains("/comments/") { + return None; + } + let doc = Html::parse_document(html); + let post = parse_post(&doc); + let op = post.as_ref().map(|p| p.author.as_str()).unwrap_or(""); + let comments = parse_comments(&doc, op); + + if post.is_none() && comments.is_empty() { + return None; + } + + Some(RedditThread { + source_url: url.to_string(), + post, + comments, + }) +} + +/// Entry point for `webclaw-core`'s extraction fast path. +pub fn try_extract(html: &str, url: &str) -> Option { + let thread = try_extract_thread(html, url)?; + Some(to_extraction_result(&thread)) +} + +// ─── ExtractionResult builder ────────────────────────────────────────────────── + +fn to_extraction_result(thread: &RedditThread) -> ExtractionResult { + let md = to_markdown(thread); + let plain = plain_text(&md); + let wc = md.split_whitespace().count(); + + let (title, author, site_name) = thread + .post + .as_ref() + .map(|p| { + ( + Some(p.title.clone()), + Some(p.author.clone()), + p.subreddit.clone(), + ) + }) + .unwrap_or_default(); + + ExtractionResult { + metadata: Metadata { + title, + description: None, + author, + published_date: None, + language: Some("en".to_string()), + url: Some(thread.source_url.clone()), + site_name, + image: None, + favicon: None, + word_count: wc, + }, + content: Content { + markdown: md, + plain_text: plain, + links: vec![], + images: vec![], + code_blocks: vec![], + raw_html: None, + }, + domain_data: Some(DomainData { + domain_type: DomainType::Social, + }), + structured_data: vec![], + } +} + +// ─── Markdown rendering ──────────────────────────────────────────────────────── + +pub fn to_markdown(thread: &RedditThread) -> String { + let mut out = String::new(); + + if let Some(p) = &thread.post { + out.push_str(&format!("# {}\n\n", p.title)); + + let pts = pt_label(Some(p.score)); + let cmt = match p.num_comments { + 0 => String::new(), + 1 => " · 1 comment".to_string(), + n => format!(" · {n} comments"), + }; + let sub = p.subreddit.as_deref().unwrap_or("?"); + out.push_str(&format!("**u/{}** · r/{sub} · {pts}{cmt}\n\n", p.author)); + + if let Some(ref body) = p.body + && !body.is_empty() + { + out.push_str(body); + out.push_str("\n\n"); + } + if let Some(ref link) = p.url + && !p.is_self + { + out.push_str(&format!("[Link]({link})\n\n")); + } + out.push_str("---\n\n"); + } + + if !thread.comments.is_empty() { + out.push_str("## Comments\n\n"); + for c in &thread.comments { + render_comment(c, &mut out); + } + } + + collapse_blank_lines(out.trim_end()) +} + +/// Render one comment + its replies. Nesting is expressed with blockquote +/// depth (`> ` per level) rather than leading spaces: space-indentation of +/// 4+ would turn ordinary text and ``` fences into CommonMark indented code +/// blocks, corrupting any comment at depth ≥ 2. +fn render_comment(c: &RedditComment, out: &mut String) { + let q = "> ".repeat(c.depth); + let blank = ">".repeat(c.depth); + let author = if c.is_op { + format!("**u/{} [OP]**", c.author) + } else { + format!("**u/{}**", c.author) + }; + out.push_str(&format!("{q}{author} · {}\n", pt_label(c.score))); + for line in c.body.lines() { + if line.is_empty() { + out.push_str(&blank); + out.push('\n'); + } else { + out.push_str(&q); + out.push_str(line); + out.push('\n'); + } + } + out.push('\n'); + for reply in &c.replies { + render_comment(reply, out); + } +} + +fn pt_label(n: Option) -> String { + match n { + None => "score hidden".to_string(), + Some(1) => "1 pt".to_string(), + Some(-1) => "-1 pt".to_string(), + Some(n) => format!("{n} pts"), + } +} + +/// Collapse runs of 3+ newlines down to a blank-line separator so the +/// blockquote prefixes and `
` spacing don't leave large gaps.
+fn collapse_blank_lines(s: &str) -> String {
+    let mut out = String::with_capacity(s.len());
+    let mut newlines = 0;
+    for ch in s.chars() {
+        if ch == '\n' {
+            newlines += 1;
+            if newlines <= 2 {
+                out.push(ch);
+            }
+        } else {
+            newlines = 0;
+            out.push(ch);
+        }
+    }
+    out
+}
+
+fn plain_text(md: &str) -> String {
+    md.lines()
+        .map(|l| {
+            // Strip a single leading blockquote / heading marker, then drop
+            // emphasis markers. Greedy char-class stripping (the old approach)
+            // ate legitimate content like ">"-prefixed quotes.
+            let l = l.trim_start();
+            let l = l
+                .strip_prefix("> ")
+                .or_else(|| l.strip_prefix('>'))
+                .unwrap_or(l);
+            let l = l.trim_start_matches('#').trim_start();
+            l.replace("**", "")
+                .replace("~~", "")
+                .replace(['*', '`'], "")
+        })
+        .collect::>()
+        .join("\n")
+}
+
+// ─── HTML parsing ──────────────────────────────────────────────────────────────
+
+fn parse_post(doc: &Html) -> Option {
+    let sel = Selector::parse("#siteTable .thing.link").ok()?;
+    let thing = doc.select(&sel).next()?;
+    let v = thing.value();
+
+    let id = v
+        .attr("data-fullname")
+        .map(|s| s.trim_start_matches("t3_").to_string());
+    let author = v.attr("data-author").unwrap_or("[deleted]").to_string();
+    let subreddit = v.attr("data-subreddit").map(str::to_string);
+    let score: i64 = v
+        .attr("data-score")
+        .and_then(|s| s.parse().ok())
+        .unwrap_or(0);
+    let num_comments: usize = v
+        .attr("data-comments-count")
+        .and_then(|s| s.parse().ok())
+        .unwrap_or(0);
+    let permalink_path = v.attr("data-permalink").unwrap_or("");
+    let permalink = format!("https://old.reddit.com{permalink_path}");
+    // Self-posts carry the `self` class and a `self.` domain; their
+    // data-url points back at the permalink rather than an external site.
+    let is_self = v.has_class("self", scraper::CaseSensitivity::AsciiCaseInsensitive)
+        || v.attr("data-domain")
+            .is_some_and(|d| d.starts_with("self."));
+    let link_url = v.attr("data-url").map(str::to_string);
+    let url = if is_self { None } else { link_url };
+
+    // Title
+    let sel_title = Selector::parse(".title a.title").ok()?;
+    let title = thing
+        .select(&sel_title)
+        .next()
+        .map(|el| el.text().collect::().trim().to_string())
+        .filter(|s| !s.is_empty())?;
+
+    // Flair
+    let flair = Selector::parse(".linkflairlabel")
+        .ok()
+        .and_then(|s| thing.select(&s).next())
+        .map(|el| el.text().collect::().trim().to_string())
+        .filter(|s| !s.is_empty());
+
+    // Self-text body: thing > .entry > .expando > .usertext-body [> .md]
+    let body = direct_child(thing, "entry")
+        .and_then(|entry| find_class(entry, "expando"))
+        .and_then(|expando| find_class(expando, "usertext-body"))
+        .and_then(|ut| find_class(ut, "md"))
+        .map(md_to_markdown)
+        .filter(|s| !s.is_empty());
+
+    // Datetime
+    let created_utc = Selector::parse("time[datetime]")
+        .ok()
+        .and_then(|s| thing.select(&s).next())
+        .and_then(|t| t.value().attr("datetime"))
+        .map(str::to_string);
+
+    Some(RedditPost {
+        id,
+        title,
+        author,
+        subreddit,
+        score,
+        body,
+        num_comments,
+        permalink,
+        url,
+        is_self,
+        flair,
+        created_utc,
+    })
+}
+
+// ─── Comment parsing ───────────────────────────────────────────────────────────
+//
+// old.reddit.com nests comments structurally, not via a depth attribute:
+//
+//   .commentarea
+//     .sitetable.nestedlisting
+//       .comment.thing                          ← root comment
+//         .entry → form → .usertext-body → .md  ← its own body
+//         .child
+//           .sitetable.listing
+//             .comment.thing                    ← reply (recurse)
+//
+// `data-depth`/`data-replies` are absent or always "0" in the logged-out
+// HTML, so we walk the tree by recursing into each comment's `.child`.
+
+fn parse_comments(doc: &Html, op: &str) -> Vec {
+    // Root listing is `.sitetable.nestedlisting` inside `.commentarea`
+    // (note: `commentarea` is a class on old.reddit, not an id). Fall back
+    // to the first `.nestedlisting` anywhere for comment-permalink pages.
+    let listing = Selector::parse(".commentarea .sitetable.nestedlisting")
+        .ok()
+        .and_then(|s| doc.select(&s).next())
+        .or_else(|| {
+            Selector::parse(".sitetable.nestedlisting")
+                .ok()
+                .and_then(|s| doc.select(&s).next())
+        });
+
+    match listing {
+        Some(l) => walk_comment_level(l, op, 0),
+        None => vec![],
+    }
+}
+
+/// Parse the direct-child `.comment.thing` elements of a comment listing.
+fn walk_comment_level(listing: ElementRef, op: &str, depth: usize) -> Vec {
+    listing
+        .children()
+        .filter_map(ElementRef::wrap)
+        .filter(|c| {
+            let val = c.value();
+            val.has_class("comment", scraper::CaseSensitivity::AsciiCaseInsensitive)
+                && val.has_class("thing", scraper::CaseSensitivity::AsciiCaseInsensitive)
+        })
+        .filter_map(|c| parse_one_comment(c, op, depth))
+        .collect()
+}
+
+fn parse_one_comment(c: ElementRef, op: &str, depth: usize) -> Option {
+    let v = c.value();
+
+    // "load more comments" placeholders are `.thing` with type=morechildren.
+    // They carry a t1_ fullname but no real content — skip them.
+    if v.attr("data-type") == Some("morechildren")
+        || v.has_class(
+            "morechildren",
+            scraper::CaseSensitivity::AsciiCaseInsensitive,
+        )
+    {
+        return None;
+    }
+
+    let is_deleted = v.has_class("deleted", scraper::CaseSensitivity::AsciiCaseInsensitive);
+    let id = v
+        .attr("data-fullname")
+        .map(|s| s.trim_start_matches("t1_").to_string());
+    let author = v
+        .attr("data-author")
+        .filter(|a| !a.is_empty())
+        .unwrap_or("[deleted]")
+        .to_string();
+
+    // Own body lives in `.entry > form > .usertext-body > .md`. `.child`
+    // (nested replies) is a sibling of `.entry`, so descending within
+    // `.entry` never crosses into a reply's body.
+    let entry = direct_child(c, "entry");
+    let body = entry
+        .and_then(|e| find_class(e, "usertext-body"))
+        .and_then(|ut| find_class(ut, "md"))
+        .map(md_to_markdown)
+        .filter(|s| !s.is_empty())
+        .unwrap_or_else(|| {
+            if is_deleted {
+                "[removed]".into()
+            } else {
+                String::new()
+            }
+        });
+
+    // Displayed score is `.score.unvoted`, whose `title` holds the exact
+    // integer (the sibling likes/dislikes spans are ±1). Hidden-score
+    // comments have no `.score.unvoted` span, so `comment_score` returns
+    // None — kept distinct from a genuine 0.
+    let score = entry.and_then(comment_score);
+
+    let created_utc = entry
+        .and_then(|e| Selector::parse("time[datetime]").ok().map(|s| (e, s)))
+        .and_then(|(e, s)| e.select(&s).next())
+        .and_then(|t| t.value().attr("datetime"))
+        .map(str::to_string);
+
+    let is_op = !is_deleted && author != "[deleted]" && author == op;
+
+    // Replies: `.comment > .child > .sitetable > .comment`.
+    let replies = direct_child(c, "child")
+        .and_then(|child| direct_child(child, "sitetable"))
+        .map(|st| walk_comment_level(st, op, depth + 1))
+        .unwrap_or_default();
+
+    Some(RedditComment {
+        id,
+        author,
+        body,
+        score,
+        depth,
+        is_op,
+        created_utc,
+        replies,
+    })
+}
+
+/// Read a comment's score from the `.score.unvoted` span inside `.entry`.
+/// Prefers the `title` attribute (exact integer); falls back to the text.
+/// Returns `None` when Reddit hides the score (no `.score.unvoted` span).
+fn comment_score(entry: ElementRef) -> Option {
+    let sel = Selector::parse("span.score.unvoted").ok()?;
+    let span = entry.select(&sel).next()?;
+    span.value()
+        .attr("title")
+        .and_then(|t| t.trim().parse().ok())
+        .or_else(|| parse_score(&span.text().collect::()))
+}
+
+// ─── DOM helpers ───────────────────────────────────────────────────────────────
+
+/// First direct child element whose class list includes `class`.
+fn direct_child<'a>(el: ElementRef<'a>, class: &str) -> Option> {
+    el.children().filter_map(ElementRef::wrap).find(|c| {
+        c.value()
+            .has_class(class, scraper::CaseSensitivity::AsciiCaseInsensitive)
+    })
+}
+
+/// First descendant (any depth) whose class list includes `class`.
+fn find_class<'a>(el: ElementRef<'a>, class: &str) -> Option> {
+    el.children().filter_map(ElementRef::wrap).find_map(|c| {
+        if c.value()
+            .has_class(class, scraper::CaseSensitivity::AsciiCaseInsensitive)
+        {
+            Some(c)
+        } else {
+            find_class(c, class)
+        }
+    })
+}
+
+fn parse_score(text: &str) -> Option {
+    text.split_whitespace()
+        .next()
+        .map(|w| w.replace('−', "-"))
+        .and_then(|w| w.parse().ok())
+}
+
+// ─── .md div → markdown ────────────────────────────────────────────────────────
+
+fn md_to_markdown(el: ElementRef) -> String {
+    let mut out = String::new();
+    render_children(el, &mut out);
+    out.trim().to_string()
+}
+
+fn render_children(el: ElementRef, out: &mut String) {
+    use scraper::node::Node;
+    for child in el.children() {
+        match child.value() {
+            Node::Text(t) => out.push_str(t.as_ref()),
+            Node::Element(_) => {
+                if let Some(c) = ElementRef::wrap(child) {
+                    render_node(c, out);
+                }
+            }
+            _ => {}
+        }
+    }
+}
+
+fn render_node(el: ElementRef, out: &mut String) {
+    match el.value().name() {
+        "p" | "div" => {
+            let mut inner = String::new();
+            render_children(el, &mut inner);
+            let t = inner.trim();
+            if !t.is_empty() {
+                out.push_str(t);
+                out.push_str("\n\n");
+            }
+        }
+        "br" => out.push('\n'),
+        "strong" | "b" => {
+            let t: String = el.text().collect();
+            let t = t.trim();
+            if !t.is_empty() {
+                out.push_str(&format!("**{t}**"));
+            }
+        }
+        "em" | "i" => {
+            let t: String = el.text().collect();
+            let t = t.trim();
+            if !t.is_empty() {
+                out.push_str(&format!("*{t}*"));
+            }
+        }
+        "del" | "s" | "strike" => {
+            let t: String = el.text().collect();
+            let t = t.trim();
+            if !t.is_empty() {
+                out.push_str(&format!("~~{t}~~"));
+            }
+        }
+        "code" => {
+            let t: String = el.text().collect();
+            out.push('`');
+            out.push_str(t.trim());
+            out.push('`');
+        }
+        "pre" => {
+            let t: String = el.text().collect();
+            out.push_str("```\n");
+            out.push_str(t.trim_end_matches('\n'));
+            out.push_str("\n```\n\n");
+        }
+        "a" => {
+            let text: String = el.text().collect();
+            let text = text.trim();
+            if !text.is_empty() {
+                // Preserve the destination as a markdown link. Resolve
+                // root-relative reddit hrefs (/r/, /user/, /wiki/, ...) and
+                // drop non-navigational ones (javascript:, #fragment, mailto:).
+                let href = el.value().attr("href").unwrap_or("");
+                if href.starts_with("http://") || href.starts_with("https://") {
+                    out.push_str(&format!("[{text}]({href})"));
+                } else if href.starts_with('/') {
+                    out.push_str(&format!("[{text}](https://old.reddit.com{href})"));
+                } else {
+                    out.push_str(text);
+                }
+            }
+        }
+        "blockquote" => {
+            let mut inner = String::new();
+            render_children(el, &mut inner);
+            let trimmed = inner.trim();
+            for line in trimmed.lines() {
+                out.push('>');
+                if !line.is_empty() {
+                    out.push(' ');
+                    out.push_str(line);
+                }
+                out.push('\n');
+            }
+            out.push('\n');
+        }
+        "ul" => render_list(el, false, 0, out),
+        "ol" => render_list(el, true, 0, out),
+        "h1" | "h2" | "h3" | "h4" | "h5" | "h6" => {
+            let level = el
+                .value()
+                .name()
+                .chars()
+                .nth(1)
+                .and_then(|c| c.to_digit(10))
+                .unwrap_or(2) as usize;
+            let t: String = el.text().collect();
+            let t = t.trim();
+            if !t.is_empty() {
+                out.push_str(&"#".repeat(level));
+                out.push(' ');
+                out.push_str(t);
+                out.push_str("\n\n");
+            }
+        }
+        "hr" => out.push_str("---\n\n"),
+        "sup" => {
+            let t: String = el.text().collect();
+            out.push_str(t.trim());
+        }
+        // Unknown / generic containers: recurse
+        _ => render_children(el, out),
+    }
+}
+
+/// Render a `
    `/`
      `, indenting nested lists by two spaces per level so +/// child items keep their own line instead of being glued to the parent. +fn render_list(list: ElementRef, ordered: bool, indent: usize, out: &mut String) { + use scraper::node::Node; + let pad = " ".repeat(indent); + let mut n = 0; + for li in list + .children() + .filter_map(ElementRef::wrap) + .filter(|c| c.value().name() == "li") + { + n += 1; + // Inline content of this
    1. , excluding nested lists (rendered after). + let mut inline = String::new(); + for child in li.children() { + match child.value() { + Node::Text(t) => inline.push_str(t.as_ref()), + Node::Element(e) if e.name() == "ul" || e.name() == "ol" => {} + Node::Element(_) => { + if let Some(c) = ElementRef::wrap(child) { + render_node(c, &mut inline); + } + } + _ => {} + } + } + let marker = if ordered { + format!("{n}. ") + } else { + "- ".to_string() + }; + out.push_str(&format!("{pad}{marker}{}\n", inline.trim())); + + for child in li.children().filter_map(ElementRef::wrap) { + match child.value().name() { + "ul" => render_list(child, false, indent + 1, out), + "ol" => render_list(child, true, indent + 1, out), + _ => {} + } + } + } + if indent == 0 { + out.push('\n'); + } +} + +// ─── URL helpers ─────────────────────────────────────────────────────────────── + +fn host_of(url: &str) -> &str { + url.split("://") + .nth(1) + .unwrap_or(url) + .split(['/', '?', '#']) + .next() + .unwrap_or("") +} + +// ─── Tests ───────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn is_reddit_url_recognises_variants() { + assert!(is_reddit_url( + "https://www.reddit.com/r/rust/comments/abc/x/" + )); + assert!(is_reddit_url( + "https://old.reddit.com/r/rust/comments/abc/x/" + )); + assert!(is_reddit_url("https://reddit.com/r/rust/comments/abc/x/")); + assert!(!is_reddit_url("https://example.com")); + } + + #[test] + fn try_extract_thread_returns_none_for_listing_url() { + let html = ""; + assert!(try_extract_thread(html, "https://old.reddit.com/r/rust/").is_none()); + } + + #[test] + fn md_to_markdown_basic() { + let html = + Html::parse_fragment(r#"

      Hello world!

      "#); + let sel = Selector::parse(".md").unwrap(); + let el = html.select(&sel).next().unwrap(); + let md = md_to_markdown(el); + assert!(md.contains("**world**")); + assert!(md.contains("Hello")); + } + + #[test] + fn md_to_markdown_blockquote_and_code() { + let html = Html::parse_fragment( + r#"

      Quoted

      fn main() {}
      "#, + ); + let sel = Selector::parse(".md").unwrap(); + let el = html.select(&sel).next().unwrap(); + let md = md_to_markdown(el); + assert!(md.contains("> Quoted")); + assert!(md.contains("```")); + assert!(md.contains("fn main()")); + } + + #[test] + fn md_to_markdown_link_preserves_href() { + let abs = Html::parse_fragment( + r#"

      see this

      "#, + ); + let sel = Selector::parse(".md").unwrap(); + let el = abs.select(&sel).next().unwrap(); + assert!(md_to_markdown(el).contains("[this](https://example.com/x)")); + + // Root-relative reddit links resolve against old.reddit.com. + let rel = Html::parse_fragment( + r#""#, + ); + let el = rel.select(&sel).next().unwrap(); + assert!(md_to_markdown(el).contains("[faq](https://old.reddit.com/r/rust/wiki/faq)")); + + // javascript: / fragment hrefs degrade to bare text. + let js = Html::parse_fragment( + r#""#, + ); + let el = js.select(&sel).next().unwrap(); + let out = md_to_markdown(el); + assert!(out.contains('x') && !out.contains("javascript")); + } + + // ── Regression tests against REAL old.reddit.com HTML ────────────────── + // + // These fixtures are genuine pages fetched from old.reddit.com (see + // testdata/reddit/). They are the ground truth — synthetic HTML is too + // easy to write to match wrong assumptions, which is exactly how the + // first version of this parser shipped silently broken. + + fn fixture(name: &str) -> String { + std::fs::read_to_string(format!("testdata/reddit/{name}")).unwrap() + } + + fn total_comments(cs: &[RedditComment]) -> usize { + cs.len() + cs.iter().map(|c| total_comments(&c.replies)).sum::() + } + + fn collect<'a>(cs: &'a [RedditComment], out: &mut Vec<&'a RedditComment>) { + for c in cs { + out.push(c); + collect(&c.replies, out); + } + } + + #[test] + fn real_link_post_metadata() { + // pandas: external-link post (blog.geekuni.com), 34 comments. + let html = fixture("pandas_34comments.html"); + let t = try_extract_thread( + &html, + "https://old.reddit.com/r/programming/comments/abc123/t/", + ) + .expect("should parse"); + let p = t.post.expect("post"); + assert_eq!(p.author, "Horror-Willingness74"); + assert_eq!(p.subreddit.as_deref(), Some("programming")); + assert_eq!(p.score, 43); + assert_eq!(p.num_comments, 34, "data-comments-count"); + assert!(!p.is_self, "external blog link, not a self post"); + assert_eq!( + p.url.as_deref(), + Some("https://blog.geekuni.com/2026/06/why-learn-pandas.html") + ); + assert!(p.title.contains("Pandas")); + } + + #[test] + fn real_self_post_metadata() { + // A self-post (text) on r/rust: `self.rust` domain, self-text body, + // no external url. + let html = fixture("rust_selfpost_36comments.html"); + let t = try_extract_thread(&html, "https://old.reddit.com/r/rust/comments/abc123/t/") + .expect("should parse"); + let p = t.post.expect("post"); + assert!(p.is_self, "self.rust domain → self post"); + assert_eq!(p.url, None, "self posts carry no external url"); + assert_eq!(p.subreddit.as_deref(), Some("rust")); + assert!( + p.body + .as_deref() + .unwrap_or("") + .contains("IT project manager"), + "self-text body should be extracted: {:?}", + p.body + ); + } + + #[test] + fn real_comment_bodies_and_scores() { + // The original bug: every comment body came back empty because + // .usertext-body sits inside a
      , not directly under .entry. + let html = fixture("ebpf_6comments.html"); + let t = try_extract_thread( + &html, + "https://old.reddit.com/r/programming/comments/abc123/t/", + ) + .expect("should parse"); + // 6 comments total: 5 top-level + 1 nested reply (admalledd under ejrh). + assert_eq!(t.comments.len(), 5, "5 top-level comments"); + assert_eq!(total_comments(&t.comments), 6, "6 comments incl. nested"); + let teerre = t + .comments + .iter() + .find(|c| c.author == "teerre") + .expect("teerre"); + assert!( + teerre.body.contains("Very cool blog"), + "body must be populated, got {:?}", + teerre.body + ); + // Score comes from .score.unvoted title (the real value), not the + // ±1 likes/dislikes siblings. + assert_eq!( + teerre.score, + Some(10), + "unvoted score, not dislikes(9)/likes(11)" + ); + assert!( + t.comments.iter().all(|c| !c.body.is_empty()), + "no comment body should be empty" + ); + } + + #[test] + fn real_nested_comment_tree() { + // pandas has structurally-nested replies (.child > .sitetable > + // .comment). data-depth/data-replies are absent in logged-out HTML. + let html = fixture("pandas_34comments.html"); + let t = try_extract_thread( + &html, + "https://old.reddit.com/r/programming/comments/abc123/t/", + ) + .expect("should parse"); + // 34 rendered comments with content + 1 [deleted] node that old.reddit + // still shows because it has live replies = 35 nodes in the tree. + assert_eq!( + total_comments(&t.comments), + 35, + "all comments incl. nested + deleted" + ); + let nested = t.comments.iter().any(|c| !c.replies.is_empty()); + assert!(nested, "at least one comment must have replies"); + let max_depth = { + fn d(cs: &[RedditComment]) -> usize { + cs.iter().map(|c| 1 + d(&c.replies)).max().unwrap_or(0) + } + d(&t.comments) + }; + assert!(max_depth >= 2, "tree should be more than one level deep"); + let a_reply = t.comments.iter().find_map(|c| c.replies.first()); + assert_eq!(a_reply.map(|r| r.depth), Some(1)); + } + + #[test] + fn real_morechildren_stubs_skipped() { + // AskReddit deep thread: 259 .thing[data-fullname=t1_] markers, but + // some are "load more comments" stubs (data-type=morechildren) with + // no author/body. They must not appear as ghost comments. + let html = fixture("askreddit_deep_morechildren.html"); + let t = try_extract_thread( + &html, + "https://old.reddit.com/r/AskReddit/comments/abc123/t/", + ) + .expect("should parse"); + fn check(cs: &[RedditComment]) { + for c in cs { + let ghost = c.body.is_empty() && c.author == "[deleted]" && c.id.is_some(); + assert!(!ghost, "morechildren stub leaked as comment: {:?}", c.id); + check(&c.replies); + } + } + check(&t.comments); + } + + #[test] + fn real_hidden_score_is_none_not_zero() { + // AskReddit has fresh comments with `.score-hidden` (no .score.unvoted + // span). These must be None, distinct from a genuine 0-score comment. + let html = fixture("askreddit_deep_morechildren.html"); + let t = try_extract_thread( + &html, + "https://old.reddit.com/r/AskReddit/comments/abc123/t/", + ) + .expect("should parse"); + let mut all = Vec::new(); + collect(&t.comments, &mut all); + assert!( + all.iter().any(|c| c.score.is_none()), + "some fresh comments have hidden scores → None" + ); + } + + #[test] + fn real_deleted_comment_preserves_subtree() { + // pandas has a [deleted] comment that still has visible replies. The + // structural walk must keep it so its children aren't orphaned. + let html = fixture("pandas_34comments.html"); + let t = try_extract_thread( + &html, + "https://old.reddit.com/r/programming/comments/abc123/t/", + ) + .expect("should parse"); + let mut all = Vec::new(); + collect(&t.comments, &mut all); + let deleted: Vec<_> = all.iter().filter(|c| c.author == "[deleted]").collect(); + assert!(!deleted.is_empty(), "should keep deleted comments"); + assert!( + deleted.iter().any(|c| !c.replies.is_empty()), + "a deleted comment with replies must retain its subtree" + ); + assert!(deleted.iter().all(|c| !c.is_op)); + } + + #[test] + fn real_markdown_is_commonmark_clean() { + // Guards the markdown bugs the verification workflow found: no + // whitespace-only "blank" lines, and ``` fences never indented 4+ + // spaces (which would turn them into literal indented code blocks). + let html = fixture("elixir_60comments.html"); + let result = try_extract( + &html, + "https://old.reddit.com/r/programming/comments/abc123/t/", + ) + .expect("should extract"); + let md = &result.content.markdown; + assert!(md.starts_with("# ")); + assert!(md.contains("## Comments")); + for line in md.lines() { + assert!( + !(line.starts_with(' ') && line.trim().is_empty()), + "whitespace-only line: {line:?}" + ); + let trimmed = line.trim_start_matches(['>', ' ']); + if trimmed.starts_with("```") { + let indent = line.len() - line.trim_start_matches(' ').len(); + assert!(indent < 4, "code fence indented {indent} spaces: {line:?}"); + } + } + assert!(result.metadata.word_count > 20); + } +} diff --git a/crates/webclaw-core/testdata/reddit/askreddit_deep_morechildren.html b/crates/webclaw-core/testdata/reddit/askreddit_deep_morechildren.html new file mode 100644 index 0000000..decf833 --- /dev/null +++ b/crates/webclaw-core/testdata/reddit/askreddit_deep_morechildren.html @@ -0,0 +1,596 @@ +What is going to happen when the lower class will not be able to afford anything anymore? : AskReddit

      use the following search parameters to narrow your results:

      subreddit:subreddit
      find submissions in "subreddit"
      author:username
      find submissions by "username"
      site:example.com
      find submissions from "example.com"
      url:text
      search for "text" in url
      selftext:text
      search for "text" in self post contents
      self:yes (or self:no)
      include (or exclude) self posts
      nsfw:yes (or nsfw:no)
      include (or exclude) results marked as NSFW

      e.g. subreddit:aww site:imgur.com dog

      see the search faq for details.

      advanced search: by author, subreddit...

      this post was submitted on
      553 points (88% upvoted)

      AskReddit

      [ SERIOUS ]
      + +
      Rules:
      + +
        +
      1. You must post a clear and direct question in the title. The title may contain two, short, necessary context sentences. +No text is allowed in the textbox. Your thoughts/responses to the question can go in the comments section. more >>

      2. +
      3. Any post asking for advice should be generic and not specific to your situation alone. more >>

      4. +
      5. AskReddit is for open-ended discussion questions. more >>

      6. +
      7. Posting, or seeking, any identifying personal information, real or fake, will result in a ban without a prior warning. more >>

      8. +
      9. AskReddit is not your soapbox, personal army, or advertising platform. more >>

      10. +
      11. [Serious] tagged posts are off-limits to jokes or irrelevant replies. more >>

      12. +
      13. Soliciting money, goods, services, or favours is not allowed. more >>

      14. +
      15. Mods reserve the right to remove content or restrict users' posting privileges as necessary if it is deemed detrimental to the subreddit or to the experience of others. more >>

      16. +
      17. Comment replies consisting solely of images will be removed. more >>

      18. +
      19. Do not post harmful misinformation. more >>

      20. +
      21. Spam, machine-generated content, and karma farming are not permitted. more >>

      22. +
      23. All content must be written in English so that it is widely understood by the user base of the sub. more >>

      24. +
      + +
      If you think your post has disappeared, see spam or an inappropriate post, please do not hesitate to contact the mods, we're happy to help.
      + +
      + +

      Tags to use:

      + +
      +

      [Serious]

      +
      + +

      Use a [Serious] post tag to designate your post as a serious, on-topic-only thread.

      + +

      + +

      Filter posts by subject:

      + +

      Mod posts +Serious posts +Megathread +Breaking news +Unfilter

      + +

      + +

      Please use spoiler tags to hide spoilers. >!insert spoiler here!<

      + +

      + +

      Other subreddits you might like:

      + + + + + + + + + + + + + + + + + + + + + + + +
      RelatedSubreddits
      Advice and AssistanceAsk Others
      AskReddit OffshootsGeneral Discussion
      Requests & AssistanceHelp Me Identify This
      Reddit/MetaFind Subreddits
      + +

      + +

      Ever read the reddiquette? Take a peek!

      + +

      +
      +
      a community for
      ×
      top 200 commentsshow 500

      [–]ACE_Wrap 886 points887 points  (147 children)

      As long as a few basic needs - like food and water - are secured, nothing much. Once that stops though, all options are on the table.

      +
      +

      [–]stop_deleting_plz 467 points468 points  (76 children)

      Well the billionaires need a trillion gallons of water to cool their shiny new surveillance centers, so it might be more likely than you think!

      +
      +

      [–]idiocy_incarnate [score hidden]  (26 children)

      with a little bit of foresight they would build the data centers on the coast, use seawater for cooling, and the heat generated by the data center to both desalinate the seawater and recover salt and minerals from it.

      + +

      Fat chance of that though, it requires a longer term view that is not compatible with quarterly profit reports.

      +
      +

      [–]ShortWoman [score hidden]  (6 children)

      Real estate on the coast is too expensive and we wouldn't want the wealthy to have that too close by.

      +
      +

      [–]tsunamikidd62 [score hidden]  (0 children)

      Gosh that would be just terrrrrrible if a data center were to be built right next to rich neighborhoods….

      +
      +

      [–]gorpie97 [score hidden]  (1 child)

      China is building them in the ocean, just like we build oil drilling platforms.

      +
      +

      [–]Temelios [score hidden]  (0 children)

      You act like the San Diego, Los Angeles, San Francisco, Astoria, and Seattle areas are the only coastal real estate. The Pacific coastline is massive and has tons of cheap property that the idea is pretty feasible so long as you look in places like Eureka or Coos Bay.

      +
      +

      [–]Dr_Pants7 [score hidden]  (4 children)

      Plus that requires putting resources into science. We can’t do that, science is just a liberal hoax.

      +
      +

      [–]DennisTheBald [score hidden]  (0 children)

      Some civ that thought in terms of several generations rather than a couple terms might. But a stable genius wouldn't care that much about getting water to the serfs

      +
      +

      [–]Proper_Individual578 [score hidden]  (1 child)

      +

      nd the heat generated by the data center to both desalinate the seawater and recover salt and minerals from it.

      +
      + +

      Do these AI chips run hot enough for that, or does desalination work at lower temps than I think it does? Most PC hardware doesn't like temps anywhere near hot enough to boil water

      +
      +

      [–]PipChaos [score hidden]  (0 children)

      The coast is where they built their mansions.

      +
      +

      [–]Dreaunicorn [score hidden]  (0 children)

      I’ve thought about this too but salt is a problem for steel and electronics in general.

      +
      +

      [–]Sportsfan369 33 points34 points  (18 children)

      Enough people don’t talk about the water scarcity that’s upcoming. Water will become more of a commodity. If you have access to free and clean water now, I’d suggest start filling up bottles.

      +
      +

      [–]awkwardmamasloth 12 points13 points  (16 children)

      +

      If you have access to free and clean water now, I’d suggest start filling up bottles.

      +
      + +

      Yea but you cant drink that unless its sanitized and stored properly. I k ow theres a process for treating it but idk what that is.

      +
      +

      [–]amateurbreditor [score hidden]  (9 children)

      in an emergency take 2 large kettles and put a clean cloth at a 45 degree angle. The water goes in the one and the slant goes into the other. The water boils from a fire under it or whatever you have and the steam hits the cloth and forms water and drops into the cloth. Perfect sterile water every time.

      +
      +

      [–]BigUptokes [score hidden]  (6 children)

      The cloth method is good for filtering particulate but I wouldn't call it perfectly sterile, especially in an emergency situation.

      +
      +

      [–]InNominePasta [score hidden]  (0 children)

      Invest in a bunch of Sawyer filters. Then you just need fresh water, regardless of how clean it is. Streams, puddles, whatever

      +
      +

      [–]Masterofnone9 [score hidden]  (2 children)

      Great "The Water Wars".

      +
      +

      [–]kloiberin_time 98 points99 points  (44 children)

      Add gas to that list. How many people are reliant on cars to get food? Or goes beyond just driving to the grocery store or walmart. What happens when the price of bread is 30 bucks a loaf because there's no gas to ship it.

      + +

      Walmart, Costco, Kroger, and Aldi dominate groceries. Sure, there are some franchise Price Choppers or whatever, but most of America buys their food at massive chains. What happens when Walmart can't ship it to Suburban and rural America?

      +
      +

      [–]traveldogmom13 43 points44 points  (3 children)

      I believe there are some instances in history where this had happened before. It didn’t end well

      +
      +

      [–]tulsym 17 points18 points  (1 child)

      Let them eat cake

      +
      +

      [–]fencer1119 [score hidden]  (0 children)

      Little red cookbook

      +
      +

      [–]NoAngel815 17 points18 points  (1 child)

      All food is reliant on gas/diesel, farmers won't be able to plow, plant, or harvest their crops because all farm machines run on diesel, as well as the trucks that all food is shipped on.

      +
      +

      [–]Pretend-Marsupial258 [score hidden]  (0 children)

      Synthetic fertilizer is completely dependent on fossil fuels as well.

      +
      +

      [–]LetterheadNo7323 [score hidden]  (1 child)

      I don’t understand why our shit politicians are so shortsighted. Can’t go to war over sunlight. Well, I know it’s about money so I guess I can’t understand how they could possibly be so greedy and craven.

      +
      +

      [–]whatamidoing71 2 points3 points  (0 children)

      Walmart will always be able to, but will they? (Gotta hold on to those sweet sweet billions…)

      +
      +

      [–]thetechguyv 13 points14 points  (10 children)

      Personal cars that you own are 100% going to become a luxury item in the next 20 years.

      + +

      Robo taxis on subscription are the future. 

      +
      +

      [–]3-2-1-backup 21 points22 points  (0 children)

      Pipe down Elon, nobody believes your horeshit anymore.

      +
      +

      [–]f8Negative 30 points31 points  (5 children)

      Not in rural anywhere. Unfeasible pipedream.

      +
      +

      [–]FormBitter4234 5 points6 points  (0 children)

      Everything has been moving to the subscription model and there are already monthly (non-musk) car rental services in large cities for people who only need a car on occasion so I can totally see this happening especially as vehicles get more expensive.

      +
      +

      [–]mikepi1999 2 points3 points  (0 children)

      This is a fact. The insurance companies are going to force the issue.

      +
      +

      [–]YouArentReallyThere 1 point2 points  (3 children)

      We live in a world of cars. Economies are dependent upon it and everything else that goes with it. Things won’t get so expensive or disrupted to where that gets affected too much.

      +
      +

      [–]Unhappy-Homework-812 1 point2 points  (1 child)

      If it comes down to apocalyptic times without cars we build communes and grow food. Pretty basic survival if they really gotta ask. 

      +
      +

      [–]askthepeanutgallery [score hidden]  (0 children)

      How many of us can even keep houseplants alive?

      +
      +

      [–]Jaereth [score hidden]  (0 children)

      Yup. Auto, Medical, Banking, Construction, Big Tech, etc.

      + +

      These are the "great houses" of the US now and the respective oligarchs aren't going to let their money machines get disrupted.

      +
      +

      [–]Ki-to-Life-5054 1 point2 points  (0 children)

      If working people all have to move to cities, corporations will have to let people work remotely so that office space can be repurposed as housing. People will think they are doing ok. Then, our govt will sell off farmland to the Chinese and for data centers. Depopulating rural areas will make someone money. It will be the beginning of those hellscapes we see in scifi movies.

      +
      +

      [–]jammythesandwich 12 points13 points  (0 children)

      This and a black economy will sprout wings alongside rises in civil unrest and organised crime

      + +

      It’s not going to be pretty thats for sure

      +
      +

      [–]Dear-me113 8 points9 points  (5 children)

      Bread and circuses?

      +
      +

      [–]sambeau 33 points34 points  (2 children)

      Ordinary people are priced out of the circuses. Ticketmaster saw to that.

      + +

      Meanwhile Netflix et al are doing the same to watching the circuses on a screen.

      + +

      The oligarchy aren’t thinking it through. They do not want a hungry bored populace.

      + +

      The Romans understood this; the French royalty did not.

      +
      +

      [–]paigeguy 4 points5 points  (0 children)

      Scooter races and sex parties.

      + +

      "Beggars in Spain" - Nancy Kress

      +
      +

      [–]Ricky_the_Wizard 8 points9 points  (1 child)

      Maslow's strikes again!

      +
      +

      [–]Marmaduke_Nelly 6 points7 points  (0 children)

      Why do you think all these billionaires are building bunkers?

      +
      +

      [–]Polarbrine 4 points5 points  (1 child)

      "all options are on the table" is a very polite way to say guillotines

      +
      +

      [–]tractorpatty 2 points3 points  (0 children)

      Desperation = control if done correctly as mentioned above.

      +
      +

      [–]Fungiblefaith 1 point2 points  (0 children)

      4 missed meals from eating the rich.

      +
      +

      [–]Maketjgreatagain [score hidden]  (0 children)

      We will see Klarna and affirm available at McDonald’s and yet not see anything wrong with it.

      +
      +

      [–]jeexbit [score hidden]  (1 child)

      I think loss of tv/internet could do it too.

      +
      +

      [–]Only_Employer5690 312 points313 points  (15 children)

      It usually builds slowly: higher debt, shared housing, delayed independence, and more reliance on support systems. Over time, it can also drive political pressure for policy changes.

      +
      +

      [–]OneEarthtoShare 119 points120 points  (6 children)

      I"ve been seeing all of these things happening

      +
      +

      [–]qrseek 75 points76 points  (2 children)

      For like, 20 years so far

      +
      +

      [–]karenw 43 points44 points  (1 child)

      At least. I'm 55 and have been watching the decline since Reagan.

      +
      +

      [–]ShortWoman [score hidden]  (0 children)

      And I've been watching in parallel as the Republicans turned less interested in negotiation that wasn't "do it our way" and the Democrats kept falling into their traps.

      +
      +

      [–]DeterrenceTheory [score hidden]  (1 child)

      Around me, there are more and more local zoning changes that are allowing residential property owners to tear down single family homes and build a number of tiny units on the property. The zoning changes were made in the name of creating affordable housing, but what ends up happening is the tiny units get priced each at nearly the same level as the original larger house.

      +
      +

      [–]MrMotorcycle94 6 points7 points  (0 children)

      Any day now then?

      +
      +

      [–]popsicle_of_meat [score hidden]  (1 child)

      And if the change is slow enough, the people don't revolt. They just accept it. The US will become a 3rd world country with some billionaires scattered around in secure sites.

      +
      +

      [–]ArticleInteresting13 [score hidden]  (0 children)

      we are basically seeing this happen right now. It feels like everyone I know has roommates or is stuck living with their parents.

      +
      +

      [–]IamDDT 1 point2 points  (0 children)

      I remember visiting Mary King's close in Edinburgh. There is a lot poor people will put up with, unfortunately.

      +
      +

      [–]DefinitelyRussian [score hidden]  (0 children)

      some countries are already like that, so nothing new

      +
      +

      [–]_Christopher_Crypto 246 points247 points  (13 children)

      What we saw in 2008 was many threw in the towel, stole what they could and punted. Seriously I watched otherwise good people strip the inside of their house bare, cabinets, lights, doors, appliances, and leave the shell for the repo. Moved it all to a cheaper residence and built that with the good stuff. Point is, many will just say F’it, quit their job and stop trying. Then the top feels the pain and things get interesting.

      +
      +

      [–]_Christopher_Crypto 118 points119 points  (7 children)

      Saw one case where a former owner refused to leave their repossessed house. The new owner was forced to pay them a lump sum of cash or risk the former destroying the residence. By this time the bank was out of the picture, squatter’s rights kept law enforcement from doing anything prior to a notification period.

      +
      +

      [–]Potential_Figure4061 32 points33 points  (4 children)

      thats why people are usually thrown out of repos before it goes up for sale

      +
      +

      [–]_Christopher_Crypto 11 points12 points  (2 children)

      Banks have better things to worry about. Houses were being resold within hrs of being repossessed. That and repo’s at that time were not one offs. Banks were dealing with hundreds/thousands at a time.

      +
      +

      [–]PowermanFriendship 5 points6 points  (0 children)

      Back then there could have been some underlying chain-of-ownership problems that allowed the squatting to continue. I was friends with a married couple who lived in a $1M home in 2009 and stopped paying the mortgage when one of them got laid off, because the bank no longer had enough original documentation on hand to prove who actually owned the house. Their lawyer was able to drag the process out for a good 5 years IIRC, before their situation improved and then they just moved into a nice apartment. I don't believe their credit was even affected in the end and I seem to recall this being a tactic that quite a number of people were able to employ to avoid eviction. (Yes, this was in Florida.)

      +
      +

      [–]Nearbyatom 14 points15 points  (1 child)

      The problem is the top is so insulated they won't feel the pain.

      +
      +

      [–]Unhappy-Homework-812 4 points5 points  (0 children)

      You’d be surprised. All their wealth is in stocks. 

      +
      +

      [–]theslimbox [score hidden]  (0 children)

      People are so much smarter than 2008 now though.

      + +

      In 2008, i was making good money because every Pawnshop had so much inventory they were selling it for half of what I could sell it on ebay for. People were selling items to Pawnshops for next to nothing to get a little money. Most people now know they can get more by selling items themselves.

      + +

      There are still people with no financial sense out there, but harder times are making people smarter.

      +
      +

      [–]Admirable-Strike-311 111 points112 points  (13 children)

      There are historical examples where the lower class basically sells themselves into servitude to either the government or the rich.

      +
      +

      [–]AshamedOfMyTypos 89 points90 points  (3 children)

      Top 10% of households account for 50% of spending. We’re there, fam. That’s what working a service job is just with the added seasoning of bootstrap capitalism.

      +
      +

      [–]roodammy44 34 points35 points  (2 children)

      It’s your fault you’re poor, but also if you inherited $100m you will never have to work a day and you will continue to get richer forever.

      +
      +

      [–]Wurm42 20 points21 points  (2 children)

      Second this. We're headed toward an indenture system.

      + +

      You work for one of a handful of mega-corporations. You live in a company apartment, shop at a company store, and you can only leave if you can pay off your debt to the company...which will never happen.

      +
      +

      [–]Unhappy-Homework-812 4 points5 points  (1 child)

      Only way out of that is to find a way to generate money yourself and live frugally. Find a NEED in the world and fill it so people do not need to rely on giant corporations. That is the ONLY way out or back. 

      +
      +

      [–]SlinkyAdmiral 6 points7 points  (0 children)

      Or crime.

      +
      +

      [–]putin_my_ass 14 points15 points  (4 children)

      There are historical examples where the lower class removes the parasite classes. Capitally.

      +
      +

      [–]GoatSage777 137 points138 points  (20 children)

      A lot of redditor fantasy in these comments.

      + +

      The truth is that whatever happens won't be cinematic like some dystopian young adult movie nor like anything hundreds of years ago, especially in major first world countries.

      +
      +

      [–]Rapidzigs 63 points64 points  (5 children)

      Yup most likely things will get bad enough that people start yelling about. That will cause a stop gap short term relief measure by the government which will quiet everyone down for a bit. Rinse and repeat.

      +
      +

      [–]Konzacrafter 14 points15 points  (1 child)

      Everyone looks to France as the example for “get shit done” protesting, but this is exactly what plays out there.

      +
      +

      [–]Lbailey32 [score hidden]  (0 children)

      You have a great point, and I know France is incredibly different than the United States buuuut they do have strong worker protections, maternity leave, nationalized healthcare, required PTO, good public transportation and those are just the things I can think of off the top of my head that I WISH the US had.

      +
      +

      [–]Gsusruls [score hidden]  (3 children)

      reddit has deeply romanticized notions of revolution.

      + +

      Revolution is ugly. Revolution kills. Revolution ends everything you have. It changes the landscape, the whole landscape. And in all likelihood, you might not be here on the other side. Generally, revolution isn't for you and your benefit; it's for your grandkids.

      +
      +

      [–]DirtyRoller 10 points11 points  (0 children)

      The lower classes will turn on each other, the middle classes will also feel the repercussions. The upper class will watch from their ivory towers.

      +
      +

      [–]Unhappy-Homework-812 3 points4 points  (1 child)

      Families will move back in together, possibly multiple families. Start small communes; grow food. There’s already many in the US and all other parts of the world that are 800% worse than the US. people better learn survival and fast 

      +
      +

      [–]Zvenigora 4 points5 points  (0 children)

      There is not enough land to make that work with modern population levels. Subsistence farming is vastly less productive than the industrial farming that supports modern society.

      +
      +

      [–]AmericanScream [score hidden]  (0 children)

      If history is any indication, the worse the economy gets, inevitably someone starts a war and that becomes a more important focus, or at least the scapegoat for why so many people are without so much.

      +
      +

      [–]Dramatic_Movies 127 points128 points  (12 children)

      The scary part is that people don’t collapse all at once — they slowly stop participating. 😞

      +
      +

      [–]Shadbolt 32 points33 points  (10 children)

      yeah my cousin stopped buying groceries first then just stayed home

      +
      +

      [–]Nope_______ 16 points17 points  (5 children)

      He just doesn't eat?

      +
      +

      [–]False_Perspective854 34 points35 points  (4 children)

      What do you think happens when you have no money?

      +
      +

      [–]Nope_______ 14 points15 points  (2 children)

      So he's dead then? Idk people usually get some benefits or start stealing or panhandling to survive rather than starve to death in their home

      +
      +

      [–]ExpertExpert [score hidden]  (1 child)

      +

      benefits

      +
      + +

      sir, this is america

      +
      +

      [–]Sea_Particular9266 21 points22 points  (0 children)

      You become an incel and post relentlessly on reddit

      +
      +

      [–]Unhappy-Homework-812 6 points7 points  (3 children)

      I did that like 6 months ago. I buy a loaf of bread, ham, grow lettuce and tomato. maybe buy 2 boxes of noodles, milk and bananas. Set for the week 

      +
      +

      [–]Loki-L 24 points25 points  (0 children)

      They go into debt and the debt gets sold on as an investment and then they can't pay and the people who invested in the debt also can't pay anymore and the government bails out billionaires with money future generations will have to pay in taxes.

      +
      +

      [–]wastingtoomuchthyme 49 points50 points  (4 children)

      There's examples of this around the world.

      + +

      You'll have beautiful neighborhoods and favellas and a lot of crime. There will be a lot of homeless people and many people living in micro/cage apartments like they do in Hong Kong. People will scrape by with service jobs or micro factories like in India.

      + +

      As long as they are fed... When populations start going hungry is when you get revolution... Which often makes things worse.

      +
      +

      [–]Unhappy-Homework-812 6 points7 points  (2 children)

      Yep no one ever talks about Japan and their coffin homes. It’s already been happening around the world for 45 years. 

      +
      +

      [–]RUKiddingMeReddit [score hidden]  (0 children)

      That has more to due with urban density than poverty.

      +
      +

      [–]ElHeim 141 points142 points  (36 children)

      Look at what happened in the past .

      + +

      Just make sure you're not part of the 1% (or look like it) when that happens

      +
      +

      [–]Pirate_Princess_87 26 points27 points  (4 children)

      Depends. There are examples from the past where there was a violent revolution to overthrow the ruling class. There have also been examples from history where the peasants just quietly starved to death.

      +
      +

      [–]iamtehryan 7 points8 points  (1 child)

      One of these is definitely better than the other.

      +
      +

      [–]09232022 11 points12 points  (0 children)

      The only one that follows within reddit TOS though is peasants starving to death. That tracks. 

      +
      +

      [–]porgy_tirebiter 122 points123 points  (15 children)

      There are countries all over the world that have long had a large poor underclass, and no violent revolution at all. It’s not 1% mega rich and 99% desperately poor, but still there are huge, sprawling slums and townships and favelas. That’s what it’ll be in the US, and everyone will accept it like they do in Brazil and India and the Philippines and South Africa. There will be no uprising against the haves.

      +
      +

      [–]petitecrivain 11 points12 points  (1 child)

      Brazil eventually kicked out their military regime. They have a huge labor movement and saw a leftist wave in the 2000s and again more recently. The Philippines has a lot of issues but back when it was significantly worse in the 1980s they put their foot down and ousted Marcos. 

      +
      +

      [–]porgy_tirebiter 4 points5 points  (0 children)

      Not exactly violent revolution eating the rich like OP suggested though. The shockingly huge and awful slums are still there. I hope things will improve. All I’m saying is people will endure pretty terrible suffering without a French Revolution uprising.

      +
      +

      [–]Pirate_Princess_87 23 points24 points  (2 children)

      No uprising as long as the popes still get their streaming services. Modern day circuses to stop the revolution.

      +
      +

      [–]strictnaturereserve 10 points11 points  (1 child)

      I don't know why your blaming the catholics for all this! /j

      +
      +

      [–]Helen_A_Handbasket [score hidden]  (0 children)

      The Catholic church is pretty fucking rich, dude. They could do a lot more to help people rather than hoarding their shit like a mythical dragon.

      +
      +

      [–]roodammy44 5 points6 points  (2 children)

      Depends on the place. Russia, China, Europe, South America and Africa all had huge revolutions when their populations got desperate enough. I’d say it’s more likely that there will be a revolution than there won’t.

      + +

      Maybe modern USA will accept the conditions, but I don’t think modern France would.

      +
      +

      [–]porgy_tirebiter 1 point2 points  (0 children)

      Americans are not the French. They have Freedom™ after all.

      +
      +

      [–]vacuitee 6 points7 points  (0 children)

      I drove past a home on the main drag of a small town in the rural midwest yesterday. Most of the front of the home was either bare drywall, or plastic tarps. There were four cars outside. There are already plenty of people in the USA living in levels of poverty that most Americans can't really comprehend. That family at least had a house, I guess. So I imagine we will see a lot more of that.

      + +

      Edit: Oh yeah there is a woman down the road from me that doesn't live with power. Given everyone out here is on well water, that means no water, either. Her house looks fine, so it makes me wonder how many people are living like this that most aren't aware of.

      +
      +

      [–]frosteeze 10 points11 points  (0 children)

      People point to the French as exemplars of protesting and rioting. But let’s be real. If a group of people has to riot and protest that frequently it doesn’t matter how intense it is. Because it meant…nothing changed, they’re still discontent and powerless. People are discontent about raising pension age as an example, it gets reverted back just a bit from to make them happy, then repeat to claim new grounds. From the original 62 years of age to proposed 65, then to the legislated 63.

      + +

      The French elite’s propaganda is just different, that’s all.

      +
      +

      [–]Oilfan94 4 points5 points  (0 children)

      Or….try to become the 0.0001%……because those fyckers have been apocalypse planning for decades.

      +
      +

      [–]CactaurSnapper 17 points18 points  (1 child)

      In the Russian example farmers were eventually considered rich. Which led to famine of course.

      +
      +

      [–]King0fthewasteland 5 points6 points  (3 children)

      It won't. If it was going to happend is would have a long time ago. People will just sit and take it

      +
      +

      [–]bluecheetos 8 points9 points  (0 children)

      Because people just want happiness and happiness is on a sliding scale. Poor people can find happiness in getting a coupon for a free pizza....Jeff Bezos finds happiness in owning the moon.

      +
      +

      [–]Newbie4Hire 2 points3 points  (0 children)

      It's because the threshold of tolerance has never been exceeded in a short enough time frame. They can take 10 feet if they take 1 inch at a time. But if they take 2 feet at once, people revolt. So long as they maintain bread and circuses they can do it this way. Obviously if they stop bread or circuses all bets are off.

      +
      +

      [–]TheAmorphous 3 points4 points  (0 children)

      Look at how much the Russians are willing to put up with. Americans are proving to be every bit as docile.

      +
      +

      [–]TheRexRider 15 points16 points  (0 children)

      I have a feeling no one is going to miss Kevin O'Leary or Jeff Bezos.

      +
      +

      [–]summonsays 14 points15 points  (1 child)

      AI / robot automation and then they'll let everyone starve. And we'll see if people continue to peacefully accept death or not. 

      + +

      I'm honestly shocked Luigi was such a one-off.

      +
      +

      [–]Masterweedo 12 points13 points  (1 child)

      Judging by the protests in 2020, and the ones more recently, I fully expect and protests to become mass casualty events where law enforcement or even the military massacres the protestors. The government and oligarchs appear to be preparing for it.

      +
      +

      [–]OgreMk5 11 points12 points  (0 children)

      What will happen has been the ultra-wealthy's plans all along. Re-institute debt slavery.

      + +

      Everyone works for them all the time and all the money comes back to them all the time.

      + +

      Amazon is almost a company store, since you can get almost anything you need from them. You aren't required to if you work there, but it's "so convenient"...

      + +

      Musk has already propose company housing for his "Starbase". Sure, you pay rent to the company you work for. And what happens when you turn in your resignation letter? You're instantly homeless. They lock the door remotely, change the code, and you'll have to sue to get your stuff... which you can't afford. Musk will literally use the money you paid him to pay lawyers to keep you from getting your stuff.

      + +

      Slavery... in all but name.

      +
      +

      [–]ProfessionalReach418 32 points33 points  (0 children)

      Holy larp in the comments.

      + +

      Realistically people stop working/buying things until the top budges.

      +
      +

      [–]TheMazoo 17 points18 points  (0 children)

      Yahtzee! Isn't that kind of the goal? Price people out to gain leverage over their habits they're forced to adopt when they're backed into a corner.

      +
      +

      [–]Gordon_frumann 8 points9 points  (2 children)

      Once catastrophic food shortages star happening, that's when the guillotines get pulled out.

      +
      +

      [–]CasUalNtT 14 points15 points  (5 children)

      They will have to form an anarcho-syndicalist commune.

      +
      +

      [–]tauberculosis 6 points7 points  (0 children)

      Listen. Strange women lying in ponds distributing swords is no basis for a system of government

      +
      +

      [–]Rapidzigs 6 points7 points  (1 child)

      Help help I'm being repressed

      +
      +

      [–]jason4747 3 points4 points  (0 children)

      What if each of us becomes a sort of "executive officer of the weak?"

      + +

      For the week?

      +
      +

      [–]fuzzybad 1 point2 points  (1 child)

      Dennis! There's some lovely filth down here!

      +
      +

      [–]thehappyonionpeel 11 points12 points  (1 child)

      Ah that's why they are building their mega off shore bunkers

      +
      +

      [–]distinctgore 5 points6 points  (0 children)

      All good, just fart into the air vent

      +
      +

      [–]Psychological_Pen765 5 points6 points  (1 child)

      Republicans will host UFC fights on top of the White house ballroom for the masses to forget their troubles

      +
      +

      [–]DJBudGreen 6 points7 points  (0 children)

      You say you want a revolution? The billionaire class can't hire enough protection to prevent being dragged out into the streets once the lower class can't afford food.

      + +

      That's why they'll always keep some type of junk food cheap and the nutritional quality low. If you have a full belly, it's less relevant what it's filled with as long as you aren't hungry.

      + +

      And as long as our devices still do flashy things to keep us distracted from the hoovering of all the wealth to the very few, they will keep control over the lower class.

      + +

      Make the food and distractions unaffordable and we'll snap like a twig.

      +
      +

      [–]HaiKarate 17 points18 points  (1 child)

      The crime rate goes up and whole sections of your city become unliveable for decent folks just trying to get by.

      +
      +

      [–]Serentyr 4 points5 points  (0 children)

      Historically, rebellion, conflict, strife, destruction of property.

      + +

      In an age of fledging AI, autonomous factories and personnel… the leverage that the masses had beyond simple numerical advantage is far weaker.

      + +

      I think it’s harder to predict and rely on history, where the playing field was significantly more level and the power structures were functionally reliant on the cooperation (through whatever means acquired) of the work force.

      +
      +

      [–]RosyRainbows 20 points21 points  (7 children)

      Lot of wealthy people and politicians loose their heads. History has a way of repeating

      +
      +

      [–]blooperonthestoop 13 points14 points  (1 child)

      lose

      +
      +

      [–]qrseek 4 points5 points  (0 children)

      Their heads will also be loose

      +
      +

      [–]Yzelski 3 points4 points  (0 children)

      When demand is lower than supply, prices fall. Basic economics.

      +
      +

      [–]Dale_Cooper_II 2 points3 points  (0 children)

      Read Ready Player 1.

      + +

      Thats where we're heading!

      +
      +

      [–]SwissChzMcGeez 3 points4 points  (0 children)

      Watch the movie Elysium.

      +
      +

      [–]Nearbyatom 3 points4 points  (1 child)

      Crime goes up. When people get desperate, they have nothing to lose.

      +
      +

      [–]tjlazer79 1 point2 points  (0 children)

      Yep. I agree. You reach a point where so many people are desperate, they have nothing to lose, and they will get violent and revolt.

      +
      +

      [–]Kiyohara 3 points4 points  (0 children)

      In general across all of history, Humans tend to be fairly compliant and passive towards the heads of their society. We're a tribal species after all, and the majority have ingrained desires to help each other and work together.

      + +

      However, in nearly every single situation where the populace gets hit by some event that makes it nearly difficult or impossible to get feed themselves, house themselves, or fight off a plague they have either taken the choice to move else where (become refugees) or over throw their leaders and start over (so rebellion).

      + +

      But that bar for "nearly difficult" is pretty high and the choice between becoming a refugee and a rebel is really based on realistic it is to perform each action. In societies where movement is controlled or restricted, they tend to revolt. In societies where the ruling state has a perceived insurmountable power, they flee. And it basically has to get tot he point where the average person's other choice is "whelp, guess I'll just die then."

      + +

      Right now food is there and there are some options for those most in need. We're not at the stage where the choices are between fighting, fleeing, or dying. We're at the stage where we tighten our belts, cut expenses, and still have hope someone is going to turn it around. We're hardly at the famine stage here. We're not even at the "hungry" stage nation wide. Food is plentiful, if expensive. And if people have to choose between food, medicine, housing, and luxuries, we can still drop the luxuries entirely and possibly still cut back on something else too.

      + +

      And our leaders and wealthy know this (sort of). It's why they campaigned on food prices and the economy. Eventually they will be forced to take action and do something about it. But that time isn't now. It won't be until people are down to choosing between food and shelter (forget about medicine and luxuries).

      + +

      There's a reason why the phrase "bread and circuses" was invented: give the people just enough to eat and enough entertainment to be distracted and they won't revolt. That's too risky to attempt when you still have options. And we right now have TV and cheap entertainment. Practically every single person carries a phone that can play the entire collection of Earth's Entertainment if you look for it.

      + +

      But once we get to that final stage where we don't have options other than to fight or flee, well we'll see which direction most people take. Looking at the US past, we've had migrations numerous times and several wars and revolutions (even if some were short lived) and armed conflict is not a stranger to these lands.

      +
      +

      [–]PM_me_ur_navel_girl [score hidden]  (0 children)

      A business owner, a foreigner, and a blue-collar worker are sat round a table. In the middle is a plate with ten cookies.

      + +

      The business owner takes nine cookies for himself, then says to the blue-collar worker "Watch out for that foreigner, he's got his eye on your last cookie!"

      +
      +

      [–]WhiteSkyfire 6 points7 points  (1 child)

      The return of the workhouse, some politicians are actively working on it

      +
      +

      [–]And-Messy 6 points7 points  (0 children)

      If enough people decided to go for a walk at the same time to the same place, I feel like somehow a miracle would happen.

      +
      +

      [–]agreetodisagree2023 8 points9 points  (14 children)

      50% of the US has an income at or below $51,500. They are there already. We keep them balancing on the edge of despair and then they vote in the rich who steal the last few dollars from them to remove all their power. The system IS working.

      + +

      (Edited for $)

      +
      +

      [–]SarahQuinn113 9 points10 points  (0 children)

      We eat the rich.

      +
      +

      [–]Eyfordsucks 2 points3 points  (0 children)

      They start finding other ways to survive or they give up and die.

      +
      +

      [–]OpeningExtension2 2 points3 points  (0 children)

      Then there's nothing left to lose. Time for a Revolution.

      +
      +

      [–]janrockzzzxx 2 points3 points  (0 children)

      Revolution

      +
      +

      [–]karmais4suckers 2 points3 points  (2 children)

      Some people might go crazy, and others might lose their heads

      +
      +

      [–]questiontomorrow 2 points3 points  (0 children)

      Either revolution or oppression

      +
      +

      [–]FrostySoul3 2 points3 points  (0 children)

      We will all die. Upper class, middle class, lower class. It no longer matters. It’s all of us vs 100 families with robot armies that don’t say no to sending us packing in a metaphorical sense.

      +
      +

      [–]YourLocalOnionNinja 2 points3 points  (0 children)

      Death

      +
      +

      [–]Odd-Adhesiveness-656 2 points3 points  (0 children)

      "Soylent Green is PEOPLE"

      +
      +

      [–]phixitup 2 points3 points  (0 children)

      Anarchy, and it’s getting closer by the day.

      +
      +

      [–]ShittalkyCaps [score hidden]  (1 child)

      I don't see that happening. The powers that be will allow people to barely keep their head above water and sell them hope for a better future. Reason is, when people have nothing to lose, things would get real for real.

      +
      +

      [–]Great-Ad-4270[🍰] 25 points26 points  (3 children)

      They stop being lazy enough to revolt hopefully 

      +
      +

      [–]thepeanutone 2 points3 points  (0 children)

      What does a revolt (revolution? revolting?) look like? People keep saying that like I would know where to put the barricades, what I'm barricading, and what my end goal is.

      + +

      This country runs on where the money goes. Short of staging a robbery of billionaires - how do we interrupt the flow of money?

      + +

      The quiet Revolution is maybe just not buying into all the corporate shit and making the world a better place, all at the same time. What if the whole country just stopped buying fragrance products? Like, no more Downy Unstoppables, no more car clip air fresheners, no more perfume or body spray. What would happen? Shit, we stopped paying for Disney for what? a week? and look what happened.

      + +

      What if we all stopped paying for streaming services and, I don't know, watched movies from the library for entertainment instead? Or we could get REALLY crazy and read books instead?

      + +

      What if we all decided "This is where pain point number 1 hits?" And when you're still hurting, activate pain point #2.

      + +

      We are so spread out in America that the traditional picture of revolution is hard to recreate. We aren't really interested in going hungry or homeless, especially when it feels like we would be the only ones not going to work, so the general strike idea is a tough one to implement effectively.

      + +

      Corporations are widespread and easy for everyone to access/deny.

      + +

      I can't tell if I've had too much coffee or if this is actually a good idea...

      +
      +

      [–]intheshade6 7 points8 points  (1 child)

      What are you doing about it? Or are you saying it’s not your problem?

      +
      +

      [–]Nope_______ 8 points9 points  (0 children)

      He's waiting for someone else to do it. Too lazy lmao

      +
      +

      [–]trustmeep 1 point2 points  (0 children)

      Have they tried saying think you?

      +
      +

      [–]AmbitiousReaction168 1 point2 points  (0 children)

      Riots and mass repression, followed by fascism.

      +
      +

      [–]SecretTreeHouse42 1 point2 points  (0 children)

      I had this thought while grocery shopping the other day. So many items that I used to get, now skipped, because the prices are an insult.

      +
      +

      [–]Coravel 1 point2 points  (0 children)

      You see the slums in ready player one?

      +
      +

      [–]RobsOffDaGrid 1 point2 points  (1 child)

      When it comes to profit when sales go down to a point where a product isn’t selling the product either ceases to be or the price has to come down. +It’s because people keep buying a product as the price increases and keep buying it.

      +
      +

      [–]Chance-Ad7783 1 point2 points  (0 children)

      If history is any guide, if the situation does not change, radicalism and violence. I wish to avoid this because when you have a revolution, it is often the greatest sociopath who comes out on top. But that is just a nonprofessional view.

      +
      +

      [–]haythem007 1 point2 points  (0 children)

      If that happen, you would likely see major pushback protests, policy changes and pressure for things like higher wages or rent controls. Historically, when inequality gets too extreme, societies either adjust or they end up in serious instability, so it usually forces some kind of correction before it reaches a total breaking point.

      +
      +

      [–]LaFilleDuMoulinier 1 point2 points  (0 children)

      We will party like it’s 1789.

      +
      +

      [–]Netmantis 1 point2 points  (0 children)

      No one has thought that far.

      + +

      The upper classes continue their short-sighted "line went up for past week, line go up forever" planning, not understanding that the often large short term gains tend to be balanced out with losses soon afterwards when the consequences of the actions come around. So making the lower classes unable to afford what you sell never enters their mind until sales drop and they are scrambling to figure out why no one wants what they offer.

      +
      +

      [–]MoroseBizarro 1 point2 points  (0 children)

      Poverty breeds crime. The plebs will battle each other for survival while the bourgeoisie hides behind their walls hoping the violence stays away. Militarizing the police will help for a time but eventually the people will win. Same as it's ever been.

      +
      +

      [–]FrankCastillo95 1 point2 points  (0 children)

      Demand will collapse and prices will correspondingly drop or low performing locations will shutter. Nowadays with so many expenses being on things other than necessities in the developed world, most folks have little idea just how far away anything serious really is. They simply won't care about much that happens.

      +
      +

      [–]Original_Remote_6838 1 point2 points  (0 children)

      Realistically? The wealthy will benefit from the situation as the poor get poorer. People will lose their homes and those with enough money will scoop them up. Nothing changed in 2008 and it won’t change now.

      +
      +

      [–]Reasonable-Guess-451 1 point2 points  (0 children)

      That’s my question. If they take all our money after they’ve pissed off the other countries, who’s going to buy their crap?

      +
      +

      [–]AddictedtoBoom 1 point2 points  (0 children)

      Historically revolution and massive social upheaval lasting multiple generations.

      +
      +

      [–]rocktropolis 1 point2 points  (0 children)

      Best case, French style revolution. More likely scenario, East German / Soviet Russia style authoritarianism, breadlines, society.

      +
      +

      [–]0rganicMach1ne 1 point2 points  (0 children)

      The less than 1% responsible for it will still blame everyone else but themselves and sadly some poor idiots will still believe and worship them.

      +
      +

      [–]Tabbygail 1 point2 points  (2 children)

      Once you hit a critical mass of maybe ~15% of people that literally can't afford to live, can't buy food or water, there will be a revolt of some kind. Whether it's successful, or just makes things worse, who can say.

      + +

      Until then we'll get by. We'll buy bikes because cars are too expensive, live 4 to a room, eat rice and beans for every meal. We'll learn to live with injury and sickness because the doctor is too expensive. We'll snag pigeons from their nests for thanksgiving dinner. 

      +
      +

      [–]CaptainPrower 1 point2 points  (0 children)

      The billionaires are planning on going full Elysium by then.

      +
      +

      [–]DerpDerpingtonIV 1 point2 points  (0 children)

      Unrest...conflict....violence...the cull.

      +
      +

      [–]Tanerian 1 point2 points  (0 children)

      Crime ---> propaganda to make the remaining middle class view the lower class as the enemy.

      +
      +

      [–]iamstephen1128 [score hidden]  (0 children)

      The Bell Riots

      +
      +

      [–]painstream [score hidden]  (1 child)

      Increased homelessness, getting criminalized for that homelessness, then thrown in prisons, which are privatized and run as a business, selling labor.

      +
      +

      [–]MatCauthonsHat [score hidden]  (0 children)

      Soylent green?

      +
      +

      [–]DividedStatesofFeces [score hidden]  (0 children)

      Revolution...

      +
      +

      [–]Lighthouse_on_Mars [score hidden]  (0 children)

      Normally, a revolution.

      + +

      However, the world has never been this technologicaly advanced before. And the military and police have weapons and options that FAR outstripes anything the masses have.

      + +

      So while we have the numbers, it would still be fairly easy to keep us in line. Especially as we have been effectively trained to not fight back...

      + +

      Look at the route and Protest France has, what's currently going on in Albania. The US has nothing similar because we are all too busy working to stay alive, and hoping others will put themselves at risk.

      +
      +

      [–]razorwiregoatlick877 [score hidden]  (0 children)

      In the past that is when people brought out the guillotine.

      +
      +

      [–]Extreme_Health_9827 [score hidden]  (0 children)

      I live on 2 hot dogs and 2 cups of coffee a day thanks for asking.

      +
      +

      [–]Creative_Squirrel [score hidden]  (0 children)

      Workhouses 2.0

      +
      +

      [–]jokemon [score hidden]  (0 children)

      they dont really care, we live in a global society, they can simply go to their home in france and take advantage of their economy.

      +
      +

      [–]kdebones [score hidden]  (0 children)

      Realistically, right proper anarchy.

      +
      +

      [–]Dubious_Titan [score hidden]  (0 children)

      The rich will make the poor fight the poorer for the privilege of licking their boots. And it will 100% work.

      +
      +

      [–]KittensAndGravy [score hidden]  (0 children)

      Sadly it seems the sparrows are ok with picking through the well fed horses shit covered oats.

      +
      +

      [–]Prestigious_Safe3565 [score hidden]  (0 children)

      You spelled “middle“ wrong 😑

      +
      +

      π Rendered by PID 52 on reddit-service-r2-loggedout-7768c89db9-ltrqx at 2026-06-04 14:23:47.573526+00:00 running 9e1a20d country code: IT.

      diff --git a/crates/webclaw-core/testdata/reddit/ebpf_6comments.html b/crates/webclaw-core/testdata/reddit/ebpf_6comments.html new file mode 100644 index 0000000..acc2d78 --- /dev/null +++ b/crates/webclaw-core/testdata/reddit/ebpf_6comments.html @@ -0,0 +1,82 @@ +A tale about fixing eBPF spinlock issues in the Linux kernel : programming
      this post was submitted on
      87 points (95% upvoted)

      programming

      /r/programming is a reddit for discussion and news about computer programming.

      + +
      + +

      Rules

      + +

      Refer to the rules page for more info.

      + +
        +
      1. No LLM-Written Content

      2. +
      3. AI-related posts must comply with the AI Policy

      4. +
      5. No Political Posts or Personal/Social Drama/Gossip

      6. +
      7. No Non-Programming/Generic LLM/Diffusion Content

      8. +
      9. No Product Promotion/"I Made This" Project Demo Posts

      10. +
      11. No Content Aggregators

      12. +
      13. No Surveys Or Job Postings

      14. +
      15. No Support Questions or AskReddit-Type Questions

      16. +
      17. No Meta Posts

      18. +
      19. No Images, Memes, Or Other Low Effort Posts

      20. +
      21. No Blogspam

      22. +
      23. No Extreme Beginner Content

      24. +
      25. Comments: No Bots

      26. +
      27. Comments: No Incivility

      28. +
      + +
      + +

      Info

      + + + +
      + +

      Related reddits

      + + + +

      Specific languages

      +
      +
      a community for
      ×
      all 6 comments

      [–]teerre 9 points10 points  (0 children)

      Very cool blog! Great job explaining it. It was easy to follow

      +
      +

      [–]ejrh 4 points5 points  (1 child)

      I guess this is for user space programming, but I was always taught that nothing "big and/or complicated" should happen in an interrupt. Instead, you should do no more than set a flag and rely on the normal non-interrupt code to check it and call the appropriate big and complicated function.

      + +

      The usual example was that anything requiringmalloc orfree was too big and complicated. Running an eBPF program certainly seems big and complicated enough. But I guess kernel programmers are made of sterner stuff and they just have to provide for this? I have a feeling that the eBPF hooks for performance events wouldn't be practical if they used the traditional approach.

      +
      +

      [–]admalledd 1 point2 points  (0 children)

      You are generally correct, the idea falls apart though that these were NMIs from performance sampling type tools, so they have to do some work in the interrupt. As the author stated, eBPF devs shouldn't need to care about this specific case since it's kind of the whole point of eBPF tracing existing. So the kernel devs have to be extra defensive, and seems a spot or two were missed, oops!

      +
      +

      [–]joolzg67_b 2 points3 points  (0 children)

      I worked on a port of nucleus RTOS late 80s, was asked to get it running ASAP, had it running in one day.

      + +

      Got a call a few months later saying "random crashes" are happening.

      + +

      Went in and found the interrupt now being used for the RTOS was a NMI interrupt, added a flag to check if interrupts were disabled and if so ignored the NMI.

      + +

      Voila fixed.

      +
      +

      [–]DowntownCap6204 2 points3 points  (0 children)

      love when a bug goes from “profiler freezes the box” to a tiny eBPF repro and a 250ms spinlock timeout

      +
      +

      [–]drcforbin 4 points5 points  (0 children)

      TIL I love low level debugging porn. "Yay! Job’s done. Or is it?"

      +
      +

      π Rendered by PID 609583 on reddit-service-r2-loggedout-7768c89db9-hndpc at 2026-06-04 14:20:01.909097+00:00 running 9e1a20d country code: IT.

      diff --git a/crates/webclaw-core/testdata/reddit/elixir_60comments.html b/crates/webclaw-core/testdata/reddit/elixir_60comments.html new file mode 100644 index 0000000..6d1f8fa --- /dev/null +++ b/crates/webclaw-core/testdata/reddit/elixir_60comments.html @@ -0,0 +1,312 @@ +Elixir v1.20 released: now a gradually typed language : programming
      this post was submitted on
      214 points (96% upvoted)

      programming

      /r/programming is a reddit for discussion and news about computer programming.

      + +
      + +

      Rules

      + +

      Refer to the rules page for more info.

      + +
        +
      1. No LLM-Written Content

      2. +
      3. AI-related posts must comply with the AI Policy

      4. +
      5. No Political Posts or Personal/Social Drama/Gossip

      6. +
      7. No Non-Programming/Generic LLM/Diffusion Content

      8. +
      9. No Product Promotion/"I Made This" Project Demo Posts

      10. +
      11. No Content Aggregators

      12. +
      13. No Surveys Or Job Postings

      14. +
      15. No Support Questions or AskReddit-Type Questions

      16. +
      17. No Meta Posts

      18. +
      19. No Images, Memes, Or Other Low Effort Posts

      20. +
      21. No Blogspam

      22. +
      23. No Extreme Beginner Content

      24. +
      25. Comments: No Bots

      26. +
      27. Comments: No Incivility

      28. +
      + +
      + +

      Info

      + + + +
      + +

      Related reddits

      + + + +

      Specific languages

      +
      +
      a community for
      ×
      all 60 comments

      [–]markehammons 169 points170 points  (46 children)

      Why does this keep happening for dynamically typed languages? It feels to me like a vindication of static typing when all these dynamically typed languages keep bolting static types on after the fact.

      +
      +

      [–]syklemil 79 points80 points  (2 children)

      I think one part of an explanation could be that in the way-back-when, the alternative to dynamic typing was manifest, restrictive, even weak typing. E.g. languages like C where you

      + +
        +
      • have to specify all the types,
      • +
      • but lack the power of generics and interfaces that have become common in this millennium,
      • +
      • and where the types may not even be trustworthy, as with C's implicit conversions ("promotions"), or plenty of languages that have implicit nulls resulting in NPEs and segfaults.
      • +
      + +

      The gradual type systems we see added to various dynamic languages don't seem to have those same flaws. (Plenty of the older languages have been working on those flaws as well.)

      + +

      Elixir is old enough that it could've been typed from the start (roughly the same age as Typescript), but if we take its beginnings as something like "Erlang in Ruby's clothing" then it's no wonder that it wasn't.

      +
      +

      [–]spider-mario 0 points1 point  (1 child)

      +

      Elixir is old enough that it could've been typed from the start

      +
      + +

      Young enough?

      +
      +

      [–]syklemil 0 points1 point  (0 children)

      I'd rather phrase it as

      + +
      +

      Elixir's age indicates that it could've been typed from the start

      +
      + +

      than get into a pissing match over "old" vs "young"

      +
      +

      [–]kerakk19 186 points187 points  (14 children)

      Because dynamically typed languages sucks. Always will, always have.

      + +

      It's convenient for scripting and so, but as soon as you need to have something stable (actual project), dynamic languages fall flat

      +
      +

      [–]QuickQuirk 36 points37 points  (9 children)

      Awesome for quick scripting and apps that are less than a thousand lines.

      + +

      I really dislike it for anything larger. Good type systems eliminate certain classes of bugs.

      +
      +

      [–]SoInsightful 18 points19 points  (8 children)

      +

      Awesome for quick scripting

      +
      + +

      I've never even understood this. I'll create a small quick script in TypeScript over JavaScript every day.

      +
      +

      [–]stumblinbear 7 points8 points  (6 children)

      Shit, I'll write a quick script in Rust if It's going to be in use for more than few months

      +
      +

      [–]kabocha_ 2 points3 points  (5 children)

      Tbh my barrier is basically just "is it large enough that I want to save it as a file" nowadays.

      + +

      Quick bash scripts still win if it's mostly just calling existing programs, but otherwise my muscle memory + Rusts ease of spinning up a new project and adding dependencies wins.

      + +

      I think I only launch Python as a quick terminal calculator anymore, lol

      +
      +

      [–]tukanoid 2 points3 points  (1 child)

      And if you switch to nushell, you wont even need python for that😂 (cuz thats the only thing i was using it for as well, outside of couple projects at work i seldomly interact with)

      +
      +

      [–]kabocha_ 0 points1 point  (0 children)

      One of these days I'll pick a different shell and switch to it.

      + +

      I always keep thinking "but what about all the other systems I have to log into, my muscle memory will break for seconds before I remember to install it on that system too!" 😂

      +
      +

      [–]syklemil 1 point2 points  (2 children)

      As far as the terminal calculator goes, I'd been using units with a couple of flags; these days I just use numbat.

      +
      +

      [–]kabocha_ 0 points1 point  (1 child)

      Ooo I hadn't even thought of using a unit-aware program/language, that actually might be super useful for most of those "terminal calculator" use cases. Good tip!

      + +

      The python mostly stuck around as muscle memory, so I'll need to break that habit 😂

      +
      +

      [–]syklemil 1 point2 points  (0 children)

      I do have an alias nb=numbat that I think helped get the muscle memory adjusted.

      +
      +

      [–]QuickQuirk 1 point2 points  (0 children)

      Typescript is actually pretty good. Much better than base javascript.

      + +

      I'm still going to reach for python or erlang first.

      +
      +

      [–]-Ch4s3- 8 points9 points  (2 children)

      Every phone call you’ve ever made has touched an Ericsson switch running Erlang code. I’d say the telecom system is highly stable.

      +
      +

      [–][deleted]  (1 child)

      [deleted]

      +
      +

        [–]-Ch4s3- 2 points3 points  (0 children)

        Just because you don’t like dynamic languages doesn’t mean they suck. Distributed systems build for live code reloading while messages are in flight need to have at least some capacity for dynamic types. You can’t have nodes declaring different types at once for the same message. Dynamic types allow you to roll forward in a running system, it’s a very strong solution to that problem, and has proven robust for 40 years. Conversely trying to do this with static types would suck.

        + +

        Moreover a lot of basic web protocols are inherently stringly typed and dynamic.

        +
        +

        [–]efvie 8 points9 points  (0 children)

        Hilarious thing to say when Erlang itself is dynamically typed.

        +
        +

        [–]TypeSafeBug 8 points9 points  (2 children)

        I haven’t used this feature or modern elixir tooling yet but IMO this is a huge improvement over most dynamically typed languages. Most dynamically typed languages have pretty limited inference and require static type annotations to do useful things, and there’s a disconnect between the static types and the runtime behaviour (except in Python with Pydantic I guess).

        + +

        This looks like with guards (which have additional useful properties eg overloading, good luck with that, TypeScript!) you can pretty much have a statically analysable and typesafe program with the convenience of writing it like a dynamic one.

        +
        +

        [–]jessepence 0 points1 point  (1 child)

        [–]TypeSafeBug 0 points1 point  (0 children)

        Edit: didn’t notice the link; see rest regardless: Not really, in TS you can declare the method signature in such a way that when people go to use it, it shows up as if it’s got several overloads, but you are basically just writing a bit JS function that pulls apart arguments manually and has a bunch of internal logic inside.

        + +

        Elixir (like Erlang) uses arity of arguments + a guard clause with predicate functions to determine which version of a function to run. Which is technically not overloading like Java, C# or C++ do but is probably more flexible/useful (and more like Haskell pattern matching on function signature).

        +
        +

        [–]astonished_lasagna 49 points50 points  (9 children)

        Because it's very useful to have a language that can do both.

        +
        +

        [–]pheonixblade9 28 points29 points  (8 children)

        why? everything is typed at the end of the day, it's just chucking the validation down the road.

        +
        +

        [–]astonished_lasagna 17 points18 points  (2 children)

        Some things are harder to express in a static type system. For larger pieces of software, that tradeoff is usually worth it, but for smaller / one-off things it's not.

        +
        +

        [–]Wonderful-Habit-139 2 points3 points  (0 children)

        The tradeoff is worth it the moment you start defining functions imo. I always write types regardless of how small it is.

        + +

        I even wrote types during technical interviews in Python.

        +
        +

        [–]Saint_Nitouche 0 points1 point  (0 children)

        What things did you have in mind? API payloads?

        +
        +

        [–]syklemil 12 points13 points  (2 children)

        Part of it is just the power and effort that needs to go into some types. Dynamic typing generally wants the programmer to be able to express valid programs that would either be inexpressible or require inordinate amounts of efforts to type in static languages.

        + +

        If you've ever seen someone complain about complex generics, those are the cases where a dynamic type system would let you just write the behaviour and move on without needing a PhD in type theory.

        + +

        In practice plenty of people will just punch a hole in their type system with Any (AKA void*, Object, interface{}) when they come across a situation like that, which practically is static type systems moving towards the same middle ground, just from the opposite starting point.

        + +

        I prefer static typing myself, but it should be plenty clear to anyone that it ain't all sunshine and rainbows all the time either.

        +
        +

        [–]tukanoid 0 points1 point  (1 child)

        If you struggle with (complex) generics in types, i find it hard to believe that dynamic typing will make it easier, cuz now you have to keep all that context on your head, good luck remembering/re-figuring shit out in a week of not working on that part of code.

        + +

        Although, guess depends on the language. While the Rusts type system is not perfect, its expressive enough for me to be able to cram my logic into types 95% of the time

        +
        +

        [–]syklemil 0 points1 point  (0 children)

        +

        If you struggle with (complex) generics in types, i find it hard to believe that dynamic typing will make it easier, cuz now you have to keep all that context on your head,

        +
        + +

        That's the neat thing, you don't. Programmers who are used to untyped languages just don't reason around things the same way someone used to e.g. Rust or Haskell do. Shit, just talking about ADTs/"enums" and tuples to gophers is enough to make a lot of them confused, because they're not a part of the type system that they're used to.

        + +

        Instead they'll write tests and maybe some assertions. I suspect that in the extreme cases, that's actually the path of least resistance, as opposed to practically formally proving the behaviour through the type system.

        + +

        Programmers don't all think alike, just like how some have a more imperative mind and some a more expression-oriented mind.

        + +
        +

        Although, guess depends on the language. While the Rusts type system is not perfect, its expressive enough for me to be able to cram my logic into types 95% of the time

        +
        + +

        I also generally like Rust's type system, but that is on the more powerful side of the mainstream spectrum, and having lifetimes be part of the type system means you can describe some behaviour, like typestate, that's more likely to have wonky behaviour in GC languages.

        + +

        And I think most of us would still reach for some escape hatch before we wound up actually writing a signature like this one (via).

        +
        +

        [–]M4mb0 3 points4 points  (0 children)

        Because adding static types can be a huge effort and restrictions in the type system like lack of intersection types, dependent types and higher kinded types can prevent you from even statically expressing perfectly valid runtime code.

        + +

        I'd invite you to take a look at how complex annotations become for instance for python libs like numpy, scipy or pandas. Often functions need 10+ overloads to even statically express all their runtime capabilities.

        +
        +

        [–]ThePickleConnoisseur -1 points0 points  (0 children)

        Scripting or when you need more complex return values

        +
        +

        [–]ultrasneeze 6 points7 points  (0 children)

        Type theory has advanced a lot during this century, and it turns out you can keep the benefits of a dynamic language while taking advantage of some static typing goodies. The same has happened in the statically typed languages: type inference has advanced a lot so you end up with languages like Scala or Rust that have very powerful type systems that can be almost completely ignored when the language is used as a scripting tool.

        +
        +

        [–]Wonderful-Habit-139 5 points6 points  (1 child)

        The way tooling has improved (editors, LSPs, linters, type checkers), they provide a lot of benefits for writing correct code as well as helping you develop correct code faster before you even run it. It also makes programs easier to understand when you can see exactly what shape objects have, rather than having to rely on running programs and checking the object's types at runtime (e.g. responses from external REST APIs).

        +
        +

        [–]syklemil 1 point2 points  (0 children)

        Not just the software, but the hardware, too. If we'd chucked today's software on a machine from when various popular dynamic languages were being developed (as in, early-mid 90s), we'd be having a terrible time. Most of it probably wouldn't even run with the available memory in those machines.

        + +

        My early experiences with Java + Eclipse vs Perl + vim left me with the feeling that I could have a running, working program in Perl before Eclipse had even displayed everything I'd typed on the screen.

        +
        +

        [–]HiPhish 3 points4 points  (0 children)

        At least in the case of Elixir it's because of Erlang, and in the case of Erlang it's because statically typing Erlang is really hard. An Erlang program can change at any time because the runtime supports hot code reloading, so how do you express that in a static type system at compile time? The easy answer is you don't, you just live with dynamic typing. The hard answer is that you start a multi-year research project to get gradual typing added after the fact.

        + +

        To be fair, static typing in Erlang and Elixir is not actually as needed as in Python or JavaScript because you have pattern-matching built-in at the language level. You can see at a glance what shape your data needs to be in. Of course having that enforced by the compiler is better, but it is what it is.

        +
        +

        [–]radozok 3 points4 points  (0 children)

        There is a talk about that: https://youtu.be/Tml94je2edk

        +
        +

        [–]efvie 4 points5 points  (0 children)

        Why not? Both explicit and static typing are very useful in some cases, so being able to be explicit when you want to is great.

        +
        +

        [–]faiface 11 points12 points  (0 children)

        Of course! There are innumerable advantages to static typing: +- Preventing (many) runtime crashes +- Domain model / module documentation that doesn’t go obsolete and is exact +- Better IDE support / autocomplete

        +
        +

        [–]RogueDotSly 1 point2 points  (0 children)

        types are a blessing whether they're static gradual (fake)

        +
        +

        [–]JustBadPlaya 1 point2 points  (0 children)

        Elixir, Erlang and OTP in general are untyped/dynamically typed, and properly typing them at the time was not really possible, it took a good amount of research to make what Elixir is doing viable

        +
        +

        [–]Psychological-Rub505 5 points6 points  (0 children)

        It's because everyone who starts these kinds of new projects has the misconception that much of the existing code that looks like boilerplate is unnecessary and inefficient. Then the language gets used for something serious, the wheels fall off, and people realize that those seemingly unnecessary features were actually what kept the language sane. Voila, the dynamically typed language starts moving toward static typing.

        +
        +

        [–]Revolutionary_Ad6574 3 points4 points  (2 children)

        I'm glad I'm not the only one noticing this, I thought I was going crazy because no one is talking about it. But I don't see the problem. Dynamic typing was never a good idea, static typing has always been superior. It's normal for dynamic languages to have static envy.

        +
        +

        [–]ptoki 7 points8 points  (1 child)

        People talk about this but there is a huge crowd of half brains who think they know better and are loud about it.

        + +

        Same with nosql databases, local apps running like web (large parts of windows now), javascript ecosystem and few others.

        + +

        You come up with explanation why it is bad and the crowd will make sure you fell like you are crazy.

        + +

        BUT! Dynamic typing is not bad idea. It just should not be used for many things it is used for now.

        +
        +

        [–]Revolutionary_Ad6574 3 points4 points  (0 children)

        I agree. It has its uses. But for large, scalable projects static typing is the way to go.

        +
        +

        [–]f311a[S] 2 points3 points  (1 child)

        So what's bad about about catching runtime errors and crashes before they happen at runtime? They will happen if not found, unless it's a dead code. No one forces you to specify types.

        + +

        The examples in the blog post clearly demonstrates that, you can't divide string by a number. Even if a dynamic language can do that, it would be a bug in most of the cases.

        + +

        You can do this is JavaScript and it will silently give you a Nan, which most people won't even bother checking after division.

        +
        +

        [–]syklemil 0 points1 point  (0 children)

        +

        The examples in the blog post clearly demonstrates that, you can't divide string by a number. Even if a dynamic language can do that, it would be a bug in most of the cases.

        +
        + +

        It can also be the correct, intended behaviour, see e.g. this Perl program:

        + +
        print "divisor: ";
        +chomp( my $divisor = <> );
        +
        +print "dividend: ";
        +chomp( my $dividend = <> );
        +
        +say "result: " . $divisor / $dividend;
        +
        + +

        Perl has conversion rules for string -> number, which is something along the lines of "parse the string until you encounter a non-numeric character; 0 if the string starts with a non-numeric character". Its operators are also explicitly for either numbers (like /, +, ==) or strings (like ., eq).

        + +

        I'm not really sure about it being a bug in most cases either, the problem with implicit conversions is more that they're a PITA to locate and correct when they are bugs.

        + +

        Like when PHP, using a similar conversion rule, wound up comparing some SHAs as if they were numbers with some garbage string at the end, e.g. "0efoo…" == "0ebar…" because 0 == 0, and it doesn't have the ==/eq split that Perl does to give the programmer control over what sort of comparison will be done.

        + +

        And in cases like JS, if we get a NaN after division, we might have a 0/0 or we might have "hello"/"world", so it's a really weak error signal compared to what we get out of typechecking.

        +
        +

        [–]ajr901 15 points16 points  (7 children)

        A year ago or so I started toying around with Elixir and genuinely enjoyed it but eventually dropped it because I kept repeatedly thinking, “I really wish this was typed.”

        + +

        I’m really glad it’s heading in that direction.

        +
        +

        [–]moltonel 4 points5 points  (0 children)

        Maybe have a look at Gleam ? Same ecosystem, but statically typed from the start.

        +
        +

        [–]legoman25 -5 points-4 points  (5 children)

        Elixir was typed, before: dynamically typed. People really need to stop implying static typing is "having types" and dynamic typing is "no types"

        +
        +

        [–]bythenumbers10 1 point2 points  (0 children)

        You could always have the worst of both worlds: Static weak typing, like C++!!!

        + +

        Strong types are mandatory, sit down JS. +Gradual typing allows one-off scripts to develop into more deterministic code libraries as foundational assumptions settle into place. But while definitions are in flux, let them.

        +
        +

        [–]Atulin 0 points1 point  (0 children)

        +

        dynamically typed

        +
        + +

        Might as well have not been typed, then

        +
        +

        [–]TankorSmash 0 points1 point  (2 children)

        +

        People really need to stop implying static typing is "having types" and dynamic typing is "no types"

        +
        + +

        That's semantics. What people mean is dynamic types is not checked at 'compile' time

        +
        +

        [–]legoman25 -1 points0 points  (1 child)

        It's not semantics, its the definition of the terms.

        +
        +

        [–]QuickQuirk 3 points4 points  (3 children)

        Now we just need it in Erlang. +I still prefer erlang.

        +
        +

        [–]michalmuskala 1 point2 points  (2 children)

        We have it in erlang already for a while through eqwalizer

        +
        +

        [–]QuickQuirk 3 points4 points  (1 child)

        What they're doing here is the first step in a real type system in the elixir language itself with support in the compiler, rather than running another process on the side. I mean, there was dialyser before.

        + +

        The problem with the 'linter' style tools is that they're not mandatory. Even if you use it in your project, it breaks down when you pull in dependencies that don't.

        + +

        Since it's in the compiler, it means that over time, everything in the ecosystem will be using the type system.

        +
        +

        [–]Wonderful-Habit-139 1 point2 points  (0 children)

        Correct. Just wanted to give an upvote here, and mention that Python went through the same thing when they started having an actual typing module in the standard library, rather than relying on external tools with no direct interaction with the Python language.

        +
        +

        [–]PersonalDatabase31 4 points5 points  (0 children)

        Bare minimum

        +
        +

        [–]No-Hat-2797 -3 points-2 points  (0 children)

        gradual typing in elixir is a big deal for teams that want the flexibility of dynamic code

        +
        +

        π Rendered by PID 42 on reddit-service-r2-loggedout-7768c89db9-tbbvw at 2026-06-04 14:19:58.950569+00:00 running 9e1a20d country code: IT.

        diff --git a/crates/webclaw-core/testdata/reddit/pandas_34comments.html b/crates/webclaw-core/testdata/reddit/pandas_34comments.html new file mode 100644 index 0000000..3ce6d38 --- /dev/null +++ b/crates/webclaw-core/testdata/reddit/pandas_34comments.html @@ -0,0 +1,227 @@ +Pandas as a reason to learn Python, even if you’re not doing data science : programming
        this post was submitted on
        43 points (65% upvoted)

        programming

        /r/programming is a reddit for discussion and news about computer programming.

        + +
        + +

        Rules

        + +

        Refer to the rules page for more info.

        + +
          +
        1. No LLM-Written Content

        2. +
        3. AI-related posts must comply with the AI Policy

        4. +
        5. No Political Posts or Personal/Social Drama/Gossip

        6. +
        7. No Non-Programming/Generic LLM/Diffusion Content

        8. +
        9. No Product Promotion/"I Made This" Project Demo Posts

        10. +
        11. No Content Aggregators

        12. +
        13. No Surveys Or Job Postings

        14. +
        15. No Support Questions or AskReddit-Type Questions

        16. +
        17. No Meta Posts

        18. +
        19. No Images, Memes, Or Other Low Effort Posts

        20. +
        21. No Blogspam

        22. +
        23. No Extreme Beginner Content

        24. +
        25. Comments: No Bots

        26. +
        27. Comments: No Incivility

        28. +
        + +
        + +

        Info

        + + + +
        + +

        Related reddits

        + + + +

        Specific languages

        +
        +
        a community for
        ×
        all 34 comments

        [–]guepier 117 points118 points  (16 children)

        I disagree with this incredibly strongly. I use Python extensively, and I mostly like it, but whenever I need to do data analysis I bend over backwards to avoid Pandas. Mostly this means using R instead. Pandas is nowhere near the state of the art of data analytics. Even Python has better libraries (namely, Polars). Pandas is atrociously slow and has a terrible API. — And to head off potential responses: I have used Pandas extensively, and I am absolutely qualified to judge its merits compared to other solutions.

        + +

        So, no, I disagree with the premise: there are lots of reasons to learn Python, but Pandas is emphatically not one of them.

        +
        +

        [–]unski_ukuli 23 points24 points  (0 children)

        This. I vehemently hate how pandas like to throw stuff into the index. Polars is nice because it has no index, is fast and is logical in the api. Also, immutability is a nice addition.

        +
        +

        [–]ZirePhiinix 20 points21 points  (1 child)

        Pandas has really weird syntax that is nearly impossible to remember. There's randomly differently behaviors based on how the data is structured and I always have to Google like crazy to figure it out.

        +
        +

        [–]Breadinator 3 points4 points  (0 children)

        Thank you. The syntax is esoteric and drives me nuts sometimes. I sometimes have to guess WTH is happening by starting at the references made and working backwards on intent. Then pulling out my LLM assistant anyway and still asking it what the convoluted thing actually is.

        +
        +

        [–]florinp 24 points25 points  (0 children)

        Polars was written as a replacement for Pandas.

        +
        +

        [–]QuickQuirk 5 points6 points  (0 children)

        I've just started learning R, and am pleasantly surprised.

        + +

        I mean, I like Python, in general, but R matches my preferences more. +The things in Pandas that feel like they're bending over backwards to make work are a natural part of R itself. +And it's also more on the functional language side of things, which I appreciate.

        + +

        Helps that the tensorflow support seems pretty good these days too, for ML.

        +
        +

        [–]SV-97 17 points18 points  (9 children)

        AFAIK pandas has actually improved quite a bit with its most recent major release. I haven't checked it out yet since polars is just so good and I doubt that even this new version of pandas is as nice as polars; but I think it *is* substantially better than it used to be.

        + +

        And personally I'd take even old pandas over R any day. The dev experience with R is just atrocious.

        +
        +

        [–]huge_clock 2 points3 points  (0 children)

        Pandas keeps getting worse for the simple things you want to use it for. I used to be able to take a data frame and go df.sum() and get the sum of each numerical category. Now they same operation will concatenate every string object in the data frame.

        +
        +

        [–]guepier 12 points13 points  (7 children)

        +

        The dev experience with R is just atrocious

        +
        + +

        Yes, but the data analytics experience isn’t. R is miles ahead of Python in that space, and not just because of the libraries.

        +
        +

        [–]SV-97 6 points7 points  (6 children)

        If you only consider ecosystem size for your "data analytics experience" (and equate data analysis with mostly calculating statistics on some data): sure, for the most part that's true.

        + +

        However when taking a more holistic view (i.e. setting up a dev environment in the first place, data extraction and cleaning, actually getting data in and out of the system, data exploration, writing core analyses and debugging those, publishing, ...) this isn't really true in my opinion. In my experience you end up wasting so much time dealing with all those pain points and idiosyncrasies around R that it's altogether faster to use Python and just implement the things that don't already exist yourself (although this is of course not viable for everyone) or interop with other languages for those parts.

        + +

        And in particular when you don't do anything overly niche (as is really the case for what OP is talking about here) the python ecosystem is perfectly workable, and in some fields even miles ahead of R. For example for me a lot of data analysis involves optimization and some geometry processing / a bunch of maths. And for those python really has the strictly better ecosystem and larger community.

        +
        +

        [–][deleted]  (4 children)

        [deleted]

        +
        +

          [–]PillowFortressKing 1 point2 points  (0 children)

          Because doing iterative data analysis in a compiled language is even worse. Grabbing a Python package that's written in a high performance language gives you the best of both worlds.

          +
          +

          [–]youcangotohellgoto 4 points5 points  (2 children)

          +

          If someone is worried about speed why are they reaching for python at all?

          +
          + +

          Of course neither Pandas nor Polars are really "Python" - that's just the API to a C or Rust implementation.

          +
          +

          [–]guepier 1 point2 points  (0 children)

          I’m definitely not just considering the ecosystem size, I’m also considering ergonomics of the other aspects you mention. I agree that setting up a reproducible dev environment in R is frustrating. And “getting data in and out” of the system can be more convoluted than in Python, depending on the type of the data and storage and/or ingress/egress mechanism (e.g. JSON data, or data hosted on S3: botocore/s3fs is vastly better than anything R has to offer). But, honestly, in most cases it’s seamless.

          + +

          I disagree with the rest: data extraction, cleaning, exploration, core analysis and publishing are all things that R excels at. Troubleshooting is occasionally made harder due to the lack of any type safety, but type annotations are also much less helpful for data analysis than for most other software engineering applications. And interactive debugging (and, importantly, interactive exploration of data) works very well.

          + +

          And modern R IDE integration (be it via dedicated IDEs such as Positron or RStudio, or via plugins such as Nvim-R or ESS) provides best-in-class interactive data exploration REPLs, and these integrate very well with report generation via Quarto, which in many regards is also strictly superior to Jupyter (but if you prefer the latter, there is an R kernel for it).

          +
          +

          [–]ManySugar5156 2 points3 points  (0 children)

          same, pandas is usually the thing i avoid first. polars or r feels less annoying most of the time

          +
          +

          [–]HiPhish 26 points27 points  (2 children)

          Pandas has an atrociously un-pythonic API that makes me hate it to its core. I guess you have to use it if you are dealing with large amounts of data, but otherwise just give me regular lists and dicts. Pandas feels too much like "magic" where things just work until they don't. The documentation is pretty bad as well, it's as if you are meant to study the examples and then form a mental model of how the API works on your own. Oh, and good luck finding out what the data types are and dealing with Pandas's automatic type conversion.

          + +

          At least that was the case last time I had to use it. Maybe it has gotten better since, but I have no desire to come back.

          +
          +

          [–]squashed_fly_biscuit 7 points8 points  (1 child)

          Mainly because pandas is trying to be like R, which is a pretty weird language with strange norms written by and for scientists

          +
          +

          [–]WannaBeStatDev 2 points3 points  (0 children)

          At least R is good for science :)

          +
          +

          [–]billsil 31 points32 points  (0 children)

          I wrote a tool with straight numpy and it’s 50x faster than the pandas implemention. Pandas is severely overused and that’s before you start talking about polars, which is basically fast pandas.

          +
          +

          [–]RedEyed__ 4 points5 points  (0 children)

          pandaspolars

          +
          +

          [–]zemega 6 points7 points  (0 children)

          I would say, if you need a little operation here and there, pandas are fine. But if you are serious, use polars.

          +
          +

          [–]turbothy 2 points3 points  (0 children)

          If you don't know Pandas by now, count your lucky stars and pick up something actually useful instead.

          +
          +

          [–]lood9phee2Ri 1 point2 points  (7 children)

          I mean, I don't actually mind pandas particularly, but another thing you can do - if you want - is use sqlalchemy against a transient in-memory sqlite. Then use the same sqlalchemy stuff directly, as you would against real database. Faster than you might think (in-memory, duh).

          + +
          import sqlalchemy
          +sql_engine = sqlalchemy.create_engine('sqlite+pysqlite:///:memory:')
          +with sql_engine.connect() as sql_conn:
          +   sql_result = sql_conn.execute(sqlalchemy.text("SELECT 'Hello, World!';"))
          +   print(sql_result.all())
          +
          + +

          =>

          + +
          [('Hello, World!',)]
          +
          + +

          Anyway.

          +
          +

          [–]dannuic 0 points1 point  (0 children)

          I tend to reach for duckdb to create memory models in programs (and before that, I used sqlite). I've never needed to replace those implementations due to poor performance and it lets me get down to writing actual program logic faster. SQL, even limited implementations, are way better than trying to roll your own memory model every single time. Plus it's really easy to pickle state

          +
          +

          [–]huge_clock -3 points-2 points  (5 children)

          I would like to see a test. SQLite’s SQL implementation is incredibly limited and in-memory databases are notoriously slow and i have tested it.

          +
          +

          [–]elh0mbre 2 points3 points  (2 children)

          >  in-memory databases are notoriously slow

          + +

          wut? do you mean products that are specifically designed as in memory databases? Otherwise, "in memory" is as fast as a database gets.

          +
          +

          [–]huge_clock -2 points-1 points  (1 child)

          Single file “serverless” databases that run server side requests on the client machine. Notably SQLite but also MSAccess and DuckDB. Incredibly poor performance for business analytics. Might be fine for a small website with a limited number of users.

          +
          +

          [–]lood9phee2Ri 0 points1 point  (0 children)

          +

          Single file “serverless” databases

          +
          + +

          Are you sure you were testing in transient in-memory mode? "Single-file" kind of suggests you weren't, and are misunderstanding things - if you're hitting a file on persistent storage, of course it's slower than in-memory, even ssd/nvme is still slower than ram for now.

          + +

          https://sqlite.org/inmemorydb.html

          + +
          +

          An SQLite database is normally stored in a single ordinary disk file. However, in certain circumstances, the database might be stored in memory.

          + +

          The most common way to force an SQLite database to exist purely in memory is to open the database using the special filename ":memory:".

          +
          + +

          https://duckdb.org/docs/current/connect/overview#in-memory-database

          + +
          +

          DuckDB can operate in in-memory mode. In most clients, this can be activated by passing the special value :memory: as the database file

          +
          +
          +

          [–]Ralwus 3 points4 points  (1 child)

          Duckdb is incredibly fast. What have you tested that was slow?

          +
          +

          [–]lood9phee2Ri 0 points1 point  (0 children)

          Vaguely worth noting in context that despite it also having its own official python binding, there's also an SQLAlchemy driver for it

          + +

          https://duckdb.org/docs/current/clients/python/overview

          + +

          https://pypi.org/project/duckdb-sqlalchemy/

          + +

          =>

          + +
          sql_engine = create_engine("duckdb:///:memory:")
          +
          +
          +

          [–]elh0mbre 1 point2 points  (1 child)

          Not a reason to reach for python or pandas, IMO.

          + +

          I would reach for SQL, if it's all in one DB. If its in microservices, I'd either be looking to consolidate the data for reporting like this in a data warehouse, or stitch the data together myself in a service (given that dotnet is my daily driver, LINQ would replace pandas aptly for me) if I have a good reason for it to not come from a warehouse (low latency requirements, as one example).

          + +

          I still don't understand the fascination with microservices, nor do I understand a lot of people's aversion to learning/understanding SQL. /shrug

          +
          +

          [–]dannuic 1 point2 points  (0 children)

          SQL has an incredibly stable and sensible syntax, but still gets constant improvement under the hood (especially if you're using postgres). I have no idea why software developers are so afraid of just learning SQL to do anything with data, either.

          +
          +

          π Rendered by PID 608437 on reddit-service-r2-loggedout-7768c89db9-hndpc at 2026-06-04 14:19:58.253283+00:00 running 9e1a20d country code: IT.

          diff --git a/crates/webclaw-core/testdata/reddit/rust_selfpost_36comments.html b/crates/webclaw-core/testdata/reddit/rust_selfpost_36comments.html new file mode 100644 index 0000000..182e0de --- /dev/null +++ b/crates/webclaw-core/testdata/reddit/rust_selfpost_36comments.html @@ -0,0 +1,234 @@ +Learning Rust (for fun) because sick of AI : rust
          this post was submitted on
          89 points (84% upvoted)

          rust

          Please read The Rust Community Code of Conduct

          + +
          + +

          The Rust Programming Language

          + +

          A place for all things related to the Rust programming language—an open-source systems language that emphasizes performance, reliability, and productivity.

          + +
          + +

          Rules

          + +

          Observe our code of conduct

          + + + +

          Submissions must be on-topic

          + +
            +
          • Posts must reference Rust or relate to things using Rust. For content that does not, use a text post to explain its relevance.

          • +
          • Post titles should include useful context.

          • +
          • For Rust questions, use the stickied Q&A thread.

          • +
          • Arts-and-crafts posts are permitted on weekends.

          • +
          • No meta posts; message the mods instead.

          • +
          • Details

          • +
          + +

          Constructive criticism only

          + +
            +
          • Criticism is encouraged, though it must be constructive, useful and actionable.

          • +
          • If criticizing a project on GitHub, you may not link directly to the project's issue tracker. Please create a read-only mirror and link that instead.

          • +
          • Details

          • +
          + +

          Keep things in perspective

          + +
            +
          • A programming language is rarely worth getting worked up over.

          • +
          • No zealotry or fanaticism.

          • +
          • Be charitable in intent. Err on the side of giving others the benefit of the doubt.

          • +
          • Details

          • +
          + +

          No endless relitigation

          + +
            +
          • Avoid re-treading topics that have been long-settled or utterly exhausted.

          • +
          • Avoid bikeshedding.

          • +
          • This is not an official Rust forum, and cannot fulfill feature requests. Use the official venues for that.

          • +
          • Details

          • +
          + +

          No low-effort content

          + +
            +
          • No memes, image macros, etc.

          • +
          • Consider the existing content of the subreddit and whether your post fits in. Does it inspire thoughtful discussion?

          • +
          • Use properly formatted text to share code samples and error messages. Do not use images.

          • +
          • Submissions appearing to contain AI-generated content may be removed at moderator discretion.

          • +
          • Details

          • +
          + +
          + +

          Useful Links

          + +

          Megathreads

          + +

          Most links here will now take you to a search page listing posts with the relevant flair. The latest megathread for that flair should be the top result.

          + + + +

          Official Resources

          + + + +

          Learn Rust

          + + + +

          Discussion Platforms

          + + +
          +
          a community for
          ×
          all 36 comments

          [–]Active-Force2979 27 points28 points  (0 children)

          good luck

          +
          +

          [–]robe_and_wizard_hat 11 points12 points  (0 children)

          love to hear it. this is also how i learned rust. would recommend that you also create a tcp chat server. let the client be telnet. this will force you to deal with concurrency, ownership semantics, and IO. fwiw this is what i do when leaning a new language because it covers so many bases.

          +
          +

          [–]vascocosta 6 points7 points  (3 children)

          Understanding lifetimes was hard and something I kept delaying probably more than I should. It's the one thing I would focus more right from the beginning if I was starting now. It helps understanding a lot of errors specific to Rust much better.

          + +

          That said, a deep understanding of lifetimes will take time to sink in, so be patient.

          +
          +

          [–]bbkane_ 6 points7 points  (2 children)

          Rustlings has a few lifetime exercises and they also point to https://tfpk.github.io/lifetimekata/ for more. I'll be trying those soon myself

          +
          +

          [–]Informal-Baseball209[S] 1 point2 points  (0 children)

          Thanks a lot did not know about Rustlings, that is why I start with Book + advent of code instead of Book + Ruslings, looking forward to go through it.

          +
          +

          [–]vascocosta 0 points1 point  (0 children)

          This is a very good starting point.

          +
          +

          [–]Weaves87 4 points5 points  (0 children)

          Honestly the best way to learn it is to build something you genuinely need. There's no better motivator than needing the software to do something for you, to solve a problem for you. Find a problem that you currently have that needs fixing, create a CLI tool around it.

          + +

          I spent a lot of time initially learning Rust by working on code for little toy projects and definitely learned a lot in that process. But the real lessons didn't come until I tackled something that was providing me a lot of value. Obviously, not everyone has a project idea up their sleeve that they know will provide them with some value. So start with something dead simple.

          + +

          Rust is a considerably more demanding language than Python. There are many layers to it. Don't feel overwhelmed when you get to sections of the Rust book that talk about more advanced concepts. Sometimes you'll read about something and feel like "do I need to use this?" and oftentimes the answer is no, at least not yet. There are some features of Rust you may not even touch for a long time until you decide to do something much more complex (like concurrency, or lifetimes)

          +
          +

          [–]Cronos993 5 points6 points  (1 child)

          Same. I do have a decent bit of Go, C++ and Typescript background tho so for my first actually useful Rust project, I am thinking of creating an audio visualizer using webgpu compiled to wasm and usable in the browser

          +
          +

          [–]countsachot 0 points1 point  (0 children)

          Oh that sounds cool!

          +
          +

          [–]nick42d 2 points3 points  (0 children)

          Fellow IT project manager here! I'd simply say dig in and get started with a project to get a feel for it. Not sure I could say what's clicked though, I guess that it's important how you structure your program in Rust - but that comes with experience rather than any particular tips.

          +
          +

          [–]tikhiibhujiya 2 points3 points  (2 children)

          Since you're learning for fun rather than work, I'd actually avoid using AI too much while learning. Rust is one of the few languages where the struggle is part of the education

          +
          +

          [–]Informal-Baseball209[S] 0 points1 point  (0 children)

          Exactly, that’s my path. I google things, read the documentation, look through some repos. I want to have that joy of “it works” and then “why does it work?”

          +
          +

          [–]caothudanhgiay 2 points3 points  (0 children)

          use AI too much make my brain is doom

          +
          +

          [–]bbkane_ 1 point2 points  (0 children)

          Also learning Rust and rustlings has been a great way to exercise the stuff I'm reading in the Rust book. I highly recommend!

          + +

          I am keeping an AI terminal to the side to ask questions (what does this compilation error mean? Why can't I do it this way?) and make cheat sheets for me. That's helping me a lot too

          +
          +

          [–]Psionikus 1 point2 points  (0 children)

          Learn the mechanics of function calling on an abstract CPU. De-mystifies what lifetimes and many traits were developed to solve.

          +
          +

          [–]paulqq 1 point2 points  (0 children)

          Rust is hard. I asked myself some years ago this question. Do i want to do application or systems development? After having this clear and some initial struggle, i now write sideprojects exclusively in rust. Enjoy the journey

          +
          +

          [–]No-Region8878 1 point2 points  (0 children)

          good luck! I recently got into rust because of AI, I've made a few apps for my own use case all in rust

          +
          +

          [–]Chroiche 1 point2 points  (3 children)

          I'm very confused why you think AI can't do rust. IME it's much more useful for rust than Python because it gets blocked by the compiler more.

          +
          +

          [–]felotar 0 points1 point  (1 child)

          How will you verify if what is being build is actually fundamentally correct? Compiler won't stop illogical choices that slow your program down.

          +
          +

          [–]Chroiche 0 points1 point  (0 children)

          You won't, but you get much more guarantees than Python.

          +
          +

          [–]Informal-Baseball209[S] 0 points1 point  (0 children)

          I’m not saying that AI can’t do Rust. Hi Anthropic, nice PR with Bun completely rewritten from Zig to Rust by your AI Agents...

          + +

          I just feel that improving my Python skills for my use cases - automation and fun is pointless now. Automation is already good to go with just a few right prompts. I’ll probably still build most things in Python with help from AI, because it’s more efficient. But I’ll find “something” that I can write in Rust.

          + +

          I enjoy learning new things and using them in my work. By learning Rust and then building something with it, I can bring a bit of fun into my work, if its makes sense.

          +
          +

          [–]Appropriate-Pin2214 1 point2 points  (0 children)

          Rust is hard. I think AI helps.

          + +

          When you want to get to WASM, however, python and pytorch... Server-side... fine, but what about air-gapped?

          + +

          Not selling anything, but as a poc, here are two wasm files, in a browser, backed by Rust/ort and onnx that does redaction in your browser... https://www.rulesentry.io with GPU inference if your're setup correctly.

          + +

          Peace.

          +
          +

          [–]_w62_ 1 point2 points  (0 children)

          Try Programming Rust the 3rd edition will be available later this year while the 2nd edition worth a read.

          + +

          The first five chapters gives you a bird's eye view of the language. Then it comes ownership, traits, genetics etc. The only missing bit is async Rust.

          +
          +

          [–]tafia97300 1 point2 points  (1 child)

          i wouldn't recommend AOC to learn Rust, those problems are too small to really see Rust benefits. I strongly suggest reimplementing something you already know and that is complex enough (not in terms of performance necessarily but more in terms of architecture).

          + +

          Rust forces you to think differently, it puts the data in the center and everything gravitates around it (data can have only one owner, you need to move it around etc ...). This may seem "simple" to explain but it has profound implications and very often ends up simplifying a complex problem (do try not to reach for Arc/Rc/RefCell/Mutex).

          +
          +

          [–]Informal-Baseball209[S] 0 points1 point  (0 children)

          Arc/Rc/RefCell/Mutex — I had never heard of them until now. I read something about them, but I probably don’t want to hear about them again for at least another year. 😃

          + +

          Thanks for tips.

          +
          +

          [–]Suspicious_Word3776 0 points1 point  (0 children)

          Someone is working on the Keel language in Rust. I would learn it while it's young.

          +
          +

          [–]Plastic_Owl6706 0 points1 point  (0 children)

          Me core , well rust keeps clicking 

          +
          +

          [–]nazeehg 0 points1 point  (0 children)

          [ Removed by Reddit ]

          +
          +

          [–]Fancyness -3 points-2 points  (3 children)

          I don't get the resentment against AI. Its a gift for learning stuff. Every time i am stuck or don't understand why something does not compile, i get a perfect explanation. Its just good at syntax and syntax is a crucial part in learning a new programming language. Next building step are idiomatic solutions and at this AI is also great. So i don't get your problem. Its lame to do everything by yourself. If you have a flying car it doesn't make sense to walk. Embrace the future. Tokens = Revenue. Jensen said it himself on this conference the other day.

          +
          +

          [–]Informal-Baseball209[S] 0 points1 point  (2 children)

          Maybe the title of the thread is a little bit clickbait. I’m not against AI when it comes to efficiency, automation, or even learning.

          + +

          But as I said in the post, I feel like there is no longer much need for me to learn more Python or Bash, because the things I do with them are already handled pretty well by my AI setup. I’m even running OpenClaw on my home server with OpenAI and a few local models, and I use my agents every day.

          + +

          But it doesn’t give me the same fun from programming that I had a few years ago. To me, it feels like an AI black box. Yes, I configure it, and now it works for my niches, but mostly I just ask AI how to configure AI, run some experiments, and voilà.

          + +

          I wrote this in another comment: I like having those moments of “it works.” and then “Why does it work?” And because I’ve never worked with a low-level control language before, Rust is my choice.

          +
          +

          [–]Fancyness 0 points1 point  (1 child)

          I imagine it like using Linux and when having a problem copy pasting some commands that were suggested in online forums. Did I always want to know what every single command in its entire form means? Certainly not. It doesn’t matter as long as the machine just behaves as desired. 

          + +

          But there are Situations where you want to know exactly what is going on and where you just want to understand every single bit of what is happening so you can tweak it if desired. AI doesn’t take that away from you. It enables you to get rid of all the boilerplate that can eat you alive and to focus on the important parts of your project. AI has a very negative connotation between Software Developers because it is forced upon them by their empoyers and it drastically changes how you work: often Developers don’t create anymore but just doing code reviews of ai generated code which can be very frustrating. It’s this sense of not being in control of anything, that truly sucks. But objectively it doesn’t take anything away from the programmer, it just adds more possibilities. No one forces you to fight with bash commands. You still can do it, but you don’t need to. I think AI is empowering and I find it hillarious for being downvoted for this opinion. We live in the best times when it comes to programming. We have the best tools today that ever existed

          +
          +

          [–]Informal-Baseball209[S] 0 points1 point  (0 children)

          I totally agree. AI is super strong and helpful, as I already said. But I think you are missing my point of view about why Rust.

          + +

          I am not a developer; I just like to create things for my niches. Most of the time, it is faster with AI, and as you said, it almost does not matter how it works. But for me, learning Rust is just for the joy of learning. I do not need to ask AI what an error means, what the best way to do something is, or why, at least for now.

          + +

          For example, in my OpenClaw setup folder, I have a “Rust learning” folder and a few prompts that forbid my agent from giving me direct solutions. It can only give me hints based on my current level and progress through The Rust Book and code that I wrote. Even though I have this setup, I still prefer reading the docs on my own because I like doing it this way, and nothing pushes me to be faster or more efficient.

          + +

          I think Linus Torvalds said that now is a great time to be a developer because AI is so helpful. Having the knowledge of how to work with AI and how to write code is a huge benefit for many good developers. But the message “AI will take your job” is real. Companies are willing to pay cloud AI providers millions and push their employees to use AI no matter what, which does not help the situation.

          +
          +

          [–]labooner -1 points0 points  (1 child)

          I use Codecrafters. It’s very self driven. They tell you what you need to implement in this step, and you get it done. They have automated tests to prove your project works.

          + +

          Similar to you, I often struggle to come up with ideas on what to work on without getting bogged down in the specification, tests, etc. Codecrafters really lets you focus on the implementation.

          + +

          It’s a bit pricey, but take a look at the free courses to see what it’s like.

          +
          +

          [–]Solus161 1 point2 points  (0 children)

          Don't get why you get downvoted. I do Codecrafters too. The platform has its own limitations bit it still serve the purpose of learning by building.

          +
          +

          π Rendered by PID 694807 on reddit-service-r2-loggedout-7768c89db9-hndpc at 2026-06-04 15:24:07.705067+00:00 running 9e1a20d country code: IT.

          diff --git a/crates/webclaw-fetch/src/client.rs b/crates/webclaw-fetch/src/client.rs index f579f7c..2bfd8c5 100644 --- a/crates/webclaw-fetch/src/client.rs +++ b/crates/webclaw-fetch/src/client.rs @@ -160,9 +160,6 @@ impl Response { fn body(&self) -> &[u8] { &self.body } - fn is_success(&self) -> bool { - (200..300).contains(&self.status) - } fn text(&self) -> std::borrow::Cow<'_, str> { String::from_utf8_lossy(&self.body) @@ -299,32 +296,15 @@ impl FetchClient { /// when you need literal no-rescue behavior (e.g. inside the rescue /// logic itself to avoid recursion). pub async fn fetch_smart(&self, url: &str) -> Result { - // Reddit: the HTML page shows a verification interstitial for most - // client IPs, but appending `.json` returns the post + comment tree - // publicly. `parse_reddit_json` in downstream code knows how to read - // the result; here we just do the URL swap at the fetch layer. - if crate::reddit::is_reddit_url(url) && !url.ends_with(".json") { - let json_url = crate::reddit::json_url(url); - // Reddit's public .json API serves JSON to identifiable bot - // User-Agents and blocks browser UAs with a verification wall. - // Override our Chrome-profile UA for this specific call. - let ua = concat!( - "Webclaw/", - env!("CARGO_PKG_VERSION"), - " (+https://webclaw.io)" - ); - if let Ok(resp) = self - .fetch_with_headers(&json_url, &[("user-agent", ua)]) - .await - && resp.status == 200 - { - let first = resp.html.trim_start().as_bytes().first().copied(); - if matches!(first, Some(b'{') | Some(b'[')) { - return Ok(resp); - } - } - // If the .json fetch failed or returned HTML, fall through. - } + // Reddit: fetch old.reddit.com for stable server-rendered HTML. + // The JSON API is blocked; old.reddit.com works without JS or auth. + let owned; + let url = if crate::reddit::is_reddit_url(url) { + owned = crate::reddit::to_old_reddit_url(url); + owned.as_str() + } else { + url + }; let resp = self.fetch(url).await?; @@ -496,23 +476,16 @@ impl FetchClient { let parsed_url = crate::url_security::validate_public_http_url(url).await?; let url = parsed_url.as_str(); - // Reddit fallback: use their JSON API to get post + full comment tree. - if crate::reddit::is_reddit_url(url) { - let json_url = crate::reddit::json_url(url); - let json_url = crate::url_security::validate_public_http_url(&json_url).await?; - debug!("reddit detected, fetching {json_url}"); - - let client = self.pick_client(url); - let resp = client.get(json_url.as_str()).send().await?; - let response = Response::from_wreq(resp).await?; - if response.is_success() { - let bytes = response.body(); - match crate::reddit::parse_reddit_json(bytes, url) { - Ok(result) => return Ok(result), - Err(e) => warn!("reddit json fallback failed: {e}, falling back to HTML"), - } - } - } + // Reddit: rewrite to old.reddit.com for stable server-rendered HTML. + // webclaw-core's Reddit fast path then parses the thread structure. + let reddit_owned; + let url = if crate::reddit::is_reddit_url(url) { + reddit_owned = crate::reddit::to_old_reddit_url(url); + debug!("reddit: rewriting to {reddit_owned}"); + reddit_owned.as_str() + } else { + url + }; let start = Instant::now(); let client = self.pick_client(url); diff --git a/crates/webclaw-fetch/src/extractors/reddit.rs b/crates/webclaw-fetch/src/extractors/reddit.rs index 13cdc16..eba8ddb 100644 --- a/crates/webclaw-fetch/src/extractors/reddit.rs +++ b/crates/webclaw-fetch/src/extractors/reddit.rs @@ -1,12 +1,10 @@ -//! Reddit structured extractor — returns the full post + comment tree -//! as typed JSON via Reddit's `.json` API. +//! Reddit structured extractor — parses old.reddit.com HTML. //! -//! The same trick the markdown extractor in `crate::reddit` uses: -//! appending `.json` to any post URL returns the data the new SPA -//! frontend would load client-side. Zero antibot, zero JS rendering. +//! Fetches old.reddit.com (stable server-rendered HTML, no JS required) +//! and delegates parsing to `webclaw_core::reddit`. Returns a typed JSON +//! value with `{ url, post, comments }` structure. -use serde::Deserialize; -use serde_json::{Value, json}; +use serde_json::Value; use super::ExtractorInfo; use crate::error::FetchError; @@ -24,182 +22,27 @@ pub const INFO: ExtractorInfo = ExtractorInfo { }; pub fn matches(url: &str) -> bool { - let host = host_of(url); - let is_reddit_host = matches!( - host, - "reddit.com" | "www.reddit.com" | "old.reddit.com" | "np.reddit.com" | "new.reddit.com" - ); - is_reddit_host && url.contains("/comments/") + webclaw_core::reddit::is_reddit_url(url) && url.contains("/comments/") } pub async fn extract(client: &dyn Fetcher, url: &str) -> Result { - let json_url = build_json_url(url); - let resp = client.fetch(&json_url).await?; + let fetch_url = crate::reddit::to_old_reddit_url(url); + let resp = client.fetch(&fetch_url).await?; if resp.status != 200 { return Err(FetchError::Build(format!( - "reddit api returned status {}", + "reddit: unexpected status {}", resp.status ))); } - let listings: Vec = serde_json::from_str(&resp.html) - .map_err(|e| FetchError::BodyDecode(format!("reddit json parse: {e}")))?; + let thread = webclaw_core::reddit::try_extract_thread(&resp.html, url).ok_or_else(|| { + FetchError::BodyDecode( + "reddit: page structure not recognised — is this a thread URL?".into(), + ) + })?; - if listings.is_empty() { - return Err(FetchError::BodyDecode("reddit response empty".into())); - } - - // First listing = the post (single t3 child). - let post = listings - .first() - .and_then(|l| l.data.children.first()) - .filter(|t| t.kind == "t3") - .map(|t| post_json(&t.data)) - .unwrap_or(Value::Null); - - // Second listing = the comment tree. - let comments: Vec = listings - .get(1) - .map(|l| l.data.children.iter().filter_map(comment_json).collect()) - .unwrap_or_default(); - - Ok(json!({ - "url": url, - "post": post, - "comments": comments, - })) -} - -// --------------------------------------------------------------------------- -// JSON shapers -// --------------------------------------------------------------------------- - -fn post_json(d: &ThingData) -> Value { - json!({ - "id": d.id, - "title": d.title, - "author": d.author, - "subreddit": d.subreddit_name_prefixed, - "permalink": d.permalink.as_ref().map(|p| format!("https://www.reddit.com{p}")), - "url": d.url_overridden_by_dest, - "is_self": d.is_self, - "selftext": d.selftext, - "score": d.score, - "upvote_ratio": d.upvote_ratio, - "num_comments": d.num_comments, - "created_utc": d.created_utc, - "link_flair_text": d.link_flair_text, - "over_18": d.over_18, - "spoiler": d.spoiler, - "stickied": d.stickied, - "locked": d.locked, - }) -} - -/// Render a single comment + its reply tree. Returns `None` for non-t1 -/// kinds (the trailing `more` placeholder Reddit injects at depth limits). -fn comment_json(thing: &Thing) -> Option { - if thing.kind != "t1" { - return None; - } - let d = &thing.data; - let replies: Vec = match &d.replies { - Some(Replies::Listing(l)) => l.data.children.iter().filter_map(comment_json).collect(), - _ => Vec::new(), - }; - Some(json!({ - "id": d.id, - "author": d.author, - "body": d.body, - "score": d.score, - "created_utc": d.created_utc, - "is_submitter": d.is_submitter, - "stickied": d.stickied, - "depth": d.depth, - "permalink": d.permalink.as_ref().map(|p| format!("https://www.reddit.com{p}")), - "replies": replies, - })) -} - -// --------------------------------------------------------------------------- -// URL helpers -// --------------------------------------------------------------------------- - -fn host_of(url: &str) -> &str { - url.split("://") - .nth(1) - .unwrap_or(url) - .split('/') - .next() - .unwrap_or("") -} - -/// Build the Reddit JSON URL. We keep the original host (`www.reddit.com` -/// or `old.reddit.com` as the caller gave us). Routing through -/// `old.reddit.com` unconditionally looks appealing but that host has -/// stricter UA-based blocking than `www.reddit.com`, while the main -/// host accepts our Chrome-fingerprinted client fine. -fn build_json_url(url: &str) -> String { - let clean = url.split('?').next().unwrap_or(url).trim_end_matches('/'); - format!("{clean}.json?raw_json=1") -} - -// --------------------------------------------------------------------------- -// Reddit JSON types — only fields we render. Everything else is dropped. -// --------------------------------------------------------------------------- - -#[derive(Deserialize)] -struct Listing { - data: ListingData, -} - -#[derive(Deserialize)] -struct ListingData { - children: Vec, -} - -#[derive(Deserialize)] -struct Thing { - kind: String, - data: ThingData, -} - -#[derive(Deserialize, Default)] -struct ThingData { - // post (t3) - id: Option, - title: Option, - selftext: Option, - subreddit_name_prefixed: Option, - url_overridden_by_dest: Option, - is_self: Option, - upvote_ratio: Option, - num_comments: Option, - over_18: Option, - spoiler: Option, - stickied: Option, - locked: Option, - link_flair_text: Option, - - // comment (t1) - author: Option, - body: Option, - score: Option, - created_utc: Option, - is_submitter: Option, - depth: Option, - permalink: Option, - - // recursive - replies: Option, -} - -#[derive(Deserialize)] -#[serde(untagged)] -enum Replies { - Listing(Listing), - #[allow(dead_code)] - Empty(String), + serde_json::to_value(&thread) + .map_err(|e| FetchError::BodyDecode(format!("reddit: serialisation error: {e}"))) } #[cfg(test)] @@ -207,28 +50,17 @@ mod tests { use super::*; #[test] - fn matches_reddit_post_urls() { + fn matches_thread_urls() { assert!(matches( "https://www.reddit.com/r/rust/comments/abc123/some_title/" )); - assert!(matches( - "https://reddit.com/r/rust/comments/abc123/some_title" - )); assert!(matches("https://old.reddit.com/r/rust/comments/abc123/x/")); + assert!(matches("https://reddit.com/r/rust/comments/abc/x")); } #[test] - fn rejects_non_post_reddit_urls() { + fn rejects_listing_and_non_reddit() { assert!(!matches("https://www.reddit.com/r/rust")); - assert!(!matches("https://www.reddit.com/user/foo")); - assert!(!matches("https://example.com/r/rust/comments/x")); - } - - #[test] - fn json_url_appends_suffix_and_drops_query() { - assert_eq!( - build_json_url("https://www.reddit.com/r/rust/comments/abc/x/?utm=foo"), - "https://www.reddit.com/r/rust/comments/abc/x.json?raw_json=1" - ); + assert!(!matches("https://example.com/r/rust/comments/abc/x")); } } diff --git a/crates/webclaw-fetch/src/reddit.rs b/crates/webclaw-fetch/src/reddit.rs index 7676ccd..bfe4001 100644 --- a/crates/webclaw-fetch/src/reddit.rs +++ b/crates/webclaw-fetch/src/reddit.rs @@ -1,172 +1,56 @@ -/// Reddit JSON API fallback for extracting posts + comments without JS rendering. -/// -/// Reddit's new `shreddit` frontend only SSRs the post body — comments are -/// loaded client-side. Appending `.json` to any Reddit URL returns the full -/// comment tree as structured JSON, which we convert to clean markdown. -use serde::Deserialize; -use tracing::debug; -use webclaw_core::{Content, ExtractionResult, Metadata}; +//! Reddit URL helpers for the fetch layer. +//! +//! The JSON API (`*.json`) is blocked. We rewrite all Reddit hosts to +//! `old.reddit.com`, which serves stable server-rendered HTML that +//! `webclaw-core::reddit` parses directly. -/// Check if a URL points to a Reddit post/comment page. pub fn is_reddit_url(url: &str) -> bool { - let host = url - .split("://") - .nth(1) - .unwrap_or(url) - .split('/') - .next() - .unwrap_or(""); - matches!( - host, - "reddit.com" | "www.reddit.com" | "old.reddit.com" | "np.reddit.com" | "new.reddit.com" - ) + webclaw_core::reddit::is_reddit_url(url) } -/// Build the `.json` URL from a Reddit page URL. -pub fn json_url(url: &str) -> String { - let clean = url.split('?').next().unwrap_or(url).trim_end_matches('/'); - format!("{clean}.json") +/// Rewrite any Reddit host to old.reddit.com, preserving path and query. +pub fn to_old_reddit_url(url: &str) -> String { + let Some(scheme_end) = url.find("://") else { + return url.to_string(); + }; + let after = &url[scheme_end + 3..]; + let host_end = after.find(['/', '?', '#']).unwrap_or(after.len()); + let scheme = &url[..scheme_end + 3]; + let rest = &after[host_end..]; + format!("{scheme}old.reddit.com{rest}") } -/// Convert Reddit JSON API response into an ExtractionResult. -pub fn parse_reddit_json(json_bytes: &[u8], url: &str) -> Result { - let listings: Vec = - serde_json::from_slice(json_bytes).map_err(|e| format!("reddit json parse: {e}"))?; +#[cfg(test)] +mod tests { + use super::*; - let mut markdown = String::new(); - let mut title = None; - let mut author = None; - let mut subreddit = None; - - // First listing = the post itself - if let Some(post_listing) = listings.first() { - for child in &post_listing.data.children { - if child.kind == "t3" { - let d = &child.data; - title = d.title.clone(); - author = d.author.clone(); - subreddit = d.subreddit_name_prefixed.clone(); - - if let Some(ref t) = title { - markdown.push_str(&format!("# {t}\n\n")); - } - if let (Some(a), Some(sr)) = (&author, &subreddit) { - markdown.push_str(&format!("**u/{a}** in {sr}\n\n")); - } - if let Some(ref body) = d.selftext - && !body.is_empty() - { - markdown.push_str(body); - markdown.push_str("\n\n"); - } - if let Some(ref url_field) = d.url_overridden_by_dest - && !url_field.is_empty() - { - markdown.push_str(&format!("[Link]({url_field})\n\n")); - } - markdown.push_str("---\n\n"); - } - } + #[test] + fn rewrites_www_to_old() { + assert_eq!( + to_old_reddit_url("https://www.reddit.com/r/rust/comments/abc/x/"), + "https://old.reddit.com/r/rust/comments/abc/x/" + ); } - // Second listing = comment tree - if let Some(comment_listing) = listings.get(1) { - markdown.push_str("## Comments\n\n"); - for child in &comment_listing.data.children { - render_comment(child, 0, &mut markdown); - } + #[test] + fn rewrites_bare_to_old() { + assert_eq!( + to_old_reddit_url("https://reddit.com/r/rust/"), + "https://old.reddit.com/r/rust/" + ); } - let word_count = markdown.split_whitespace().count(); - debug!(word_count, "reddit json extracted"); - - Ok(ExtractionResult { - metadata: Metadata { - title, - description: None, - author, - published_date: None, - language: Some("en".into()), - url: Some(url.to_string()), - site_name: subreddit, - image: None, - favicon: None, - word_count, - }, - content: Content { - markdown, - plain_text: String::new(), - links: vec![], - images: vec![], - code_blocks: vec![], - raw_html: None, - }, - domain_data: None, - structured_data: vec![], - }) -} - -fn render_comment(thing: &Thing, depth: usize, out: &mut String) { - if thing.kind != "t1" { - return; + #[test] + fn preserves_old_reddit_unchanged() { + let url = "https://old.reddit.com/r/rust/comments/abc/x/?context=3"; + assert_eq!(to_old_reddit_url(url), url); } - let d = &thing.data; - let indent = " ".repeat(depth); - let author = d.author.as_deref().unwrap_or("[deleted]"); - let body = d.body.as_deref().unwrap_or("[removed]"); - let score = d.score.unwrap_or(0); - out.push_str(&format!("{indent}- **u/{author}** ({score} pts)\n")); - for line in body.lines() { - out.push_str(&format!("{indent} {line}\n")); - } - out.push('\n'); - - // Recurse into replies - if let Some(Replies::Listing(listing)) = &d.replies { - for child in &listing.data.children { - render_comment(child, depth + 1, out); - } + #[test] + fn preserves_query_and_hash() { + assert_eq!( + to_old_reddit_url("https://www.reddit.com/r/rust/?sort=top#anchor"), + "https://old.reddit.com/r/rust/?sort=top#anchor" + ); } } - -// --- Reddit JSON types (minimal) --- - -#[derive(Deserialize)] -struct Listing { - data: ListingData, -} - -#[derive(Deserialize)] -struct ListingData { - children: Vec, -} - -#[derive(Deserialize)] -struct Thing { - kind: String, - data: ThingData, -} - -#[derive(Deserialize)] -struct ThingData { - // Post fields (t3) - title: Option, - selftext: Option, - subreddit_name_prefixed: Option, - url_overridden_by_dest: Option, - // Comment fields (t1) - author: Option, - body: Option, - score: Option, - replies: Option, -} - -/// Reddit replies can be either a nested Listing or an empty string. -#[derive(Deserialize)] -#[serde(untagged)] -enum Replies { - Listing(Listing), - #[allow(dead_code)] - Empty(String), -}