mirror of
https://github.com/0xMassi/webclaw.git
synced 2026-04-25 00:06:21 +02:00
fix(fetch): send bot-identifying UA on reddit .json API to bypass browser UA block
This commit is contained in:
parent
866fa88aa0
commit
966981bc42
1 changed files with 12 additions and 5 deletions
|
|
@ -275,14 +275,21 @@ impl FetchClient {
|
|||
// client IPs, but appending `.json` returns the post + comment tree
|
||||
// publicly. `parse_reddit_json` in downstream code knows how to read
|
||||
// the result; here we just do the URL swap at the fetch layer.
|
||||
if crate::reddit::is_reddit_url(url) {
|
||||
if crate::reddit::is_reddit_url(url) && !url.ends_with(".json") {
|
||||
let json_url = crate::reddit::json_url(url);
|
||||
if let Ok(resp) = self.fetch(&json_url).await
|
||||
// Reddit's public .json API serves JSON to identifiable bot
|
||||
// User-Agents and blocks browser UAs with a verification wall.
|
||||
// Override our Chrome-profile UA for this specific call.
|
||||
let ua = concat!(
|
||||
"Webclaw/",
|
||||
env!("CARGO_PKG_VERSION"),
|
||||
" (+https://webclaw.io)"
|
||||
);
|
||||
if let Ok(resp) = self
|
||||
.fetch_with_headers(&json_url, &[("user-agent", ua)])
|
||||
.await
|
||||
&& resp.status == 200
|
||||
{
|
||||
// Reddit will serve an HTML verification page at the .json
|
||||
// URL too when the IP is flagged. Only return if the body
|
||||
// actually starts with a JSON payload.
|
||||
let first = resp.html.trim_start().as_bytes().first().copied();
|
||||
if matches!(first, Some(b'{') | Some(b'[')) {
|
||||
return Ok(resp);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue