mirror of
https://github.com/0xMassi/webclaw.git
synced 2026-04-25 00:06:21 +02:00
Bump to 0.5.4: SafariIos profile + Chrome fingerprint alignment + locale helper
- New BrowserProfile::SafariIos mapped to BrowserVariant::SafariIos26. Built on wreq_util::Emulation::SafariIos26 with 4 overrides (TLS extension order, HTTP/2 HEADERS priority, real Safari iOS 26 headers, gzip/deflate/br). Matches bogdanfinn safari_ios_26_0 JA3 8d909525bd5bbb79f133d11cc05159fe exactly. Empirically 9/10 on immobiliare.it with country-it residential. - BrowserProfile::Chrome aligned to bogdanfinn chrome_133: dropped MAX_CONCURRENT_STREAMS from H2 SETTINGS, priority weight 256, explicit extension_permutation, advertise h3 in ALPN and ALPS. JA3 43067709b025da334de1279a120f8e14, akamai_fp 52d84b11737d980aef856699f885ca86. Fixes indeed.com and other Cloudflare-fronted sites. - New locale module: accept_language_for_url / accept_language_for_tld. TLD to Accept-Language mapping, unknown TLDs default to en-US. DataDome geo-vs-locale cross-checks are now trivially satisfiable. - wreq-util bumped 2.2.6 to 3.0.0-rc.10 for Emulation::SafariIos26.
This commit is contained in:
parent
4bf11d902f
commit
b77767814a
9 changed files with 291 additions and 26 deletions
14
CHANGELOG.md
14
CHANGELOG.md
|
|
@ -3,6 +3,20 @@
|
||||||
All notable changes to webclaw are documented here.
|
All notable changes to webclaw are documented here.
|
||||||
Format follows [Keep a Changelog](https://keepachangelog.com/).
|
Format follows [Keep a Changelog](https://keepachangelog.com/).
|
||||||
|
|
||||||
|
## [0.5.4] — 2026-04-23
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- **`BrowserProfile::SafariIos`** variant, mapped to a new `BrowserVariant::SafariIos26`. Built on top of `wreq_util::Emulation::SafariIos26` with four targeted overrides that close the gap against DataDome's immobiliare.it / target.com / bestbuy.com / sephora.com rulesets: TLS extension order pinned to bogdanfinn's `safari_ios_26_0` wire format, HTTP/2 HEADERS priority flag set (weight 256, exclusive, depends_on=0) while preserving wreq-util's SETTINGS + WINDOW_UPDATE, Safari iOS 26 header set without Chromium leaks, accept-encoding limited to `gzip, deflate, br` (no zstd). Empirically 9/10 on immobiliare with `country-it` residential, 2/2 on target/bestbuy/sephora with `country-us` residential. Matches bogdanfinn's JA3 `8d909525bd5bbb79f133d11cc05159fe` exactly.
|
||||||
|
|
||||||
|
- **`accept_language_for_url(url)` and `accept_language_for_tld(tld)` helpers** in a new `locale` module. TLD to `Accept-Language` mapping (`.it` to `it-IT,it;q=0.9`, `.fr` to `fr-FR,fr;q=0.9`, etc.). Unknown TLDs fall back to `en-US,en;q=0.9`. DataDome rules that cross-check geo vs locale (Italian IP + English `accept-language` = bot) are now trivially satisfiable by callers that plumb the target URL through this helper before building a `FetchConfig`.
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- **`BrowserProfile::Chrome` fingerprint aligned to bogdanfinn `chrome_133`.** Three wire-level fixes: removed `MAX_CONCURRENT_STREAMS` from the HTTP/2 SETTINGS frame (real Chrome 133 does not send this setting), priority weight on the HEADERS frame changed from 220 to 256, TLS extension order pinned via `extension_permutation` to match bogdanfinn's stable JA3 `43067709b025da334de1279a120f8e14`. `alpn_protocols` extended to `[HTTP3, HTTP2, HTTP1]` and `alps_protocols` to `[HTTP3, HTTP2]` so Cloudflare's bot management sees the h3 advertisement real Chrome 133+ emits. Fixes indeed.com and other Cloudflare-protected sites that were serving the previous fingerprint a 403 "Security Check" challenge. Full matrix result (12 Chrome rows): 11/12 clean, the one failure is shared with bogdanfinn from the same proxy (IP reputation, not fingerprint).
|
||||||
|
|
||||||
|
- **Bumped `wreq-util` from `2.2.6` to `3.0.0-rc.10`** to pick up `Emulation::SafariIos26`, which didn't ship until rc.10.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## [0.5.2] — 2026-04-22
|
## [0.5.2] — 2026-04-22
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
|
|
|
||||||
31
Cargo.lock
generated
31
Cargo.lock
generated
|
|
@ -2967,6 +2967,26 @@ dependencies = [
|
||||||
"pom",
|
"pom",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "typed-builder"
|
||||||
|
version = "0.23.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "31aa81521b70f94402501d848ccc0ecaa8f93c8eb6999eb9747e72287757ffda"
|
||||||
|
dependencies = [
|
||||||
|
"typed-builder-macro",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "typed-builder-macro"
|
||||||
|
version = "0.23.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "076a02dc54dd46795c2e9c8282ed40bcfb1e22747e955de9389a1de28190fb26"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "typed-path"
|
name = "typed-path"
|
||||||
version = "0.12.3"
|
version = "0.12.3"
|
||||||
|
|
@ -3258,6 +3278,7 @@ dependencies = [
|
||||||
"webclaw-core",
|
"webclaw-core",
|
||||||
"webclaw-pdf",
|
"webclaw-pdf",
|
||||||
"wreq",
|
"wreq",
|
||||||
|
"wreq-util",
|
||||||
"zip 2.4.2",
|
"zip 2.4.2",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -3709,6 +3730,16 @@ dependencies = [
|
||||||
"zstd",
|
"zstd",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wreq-util"
|
||||||
|
version = "3.0.0-rc.10"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6c6bbe24d28beb9ceb58b514bd6a613c759d3b706f768b9d2950d5d35b543c04"
|
||||||
|
dependencies = [
|
||||||
|
"typed-builder",
|
||||||
|
"wreq",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "writeable"
|
name = "writeable"
|
||||||
version = "0.6.2"
|
version = "0.6.2"
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@ resolver = "2"
|
||||||
members = ["crates/*"]
|
members = ["crates/*"]
|
||||||
|
|
||||||
[workspace.package]
|
[workspace.package]
|
||||||
version = "0.5.3"
|
version = "0.5.4"
|
||||||
edition = "2024"
|
edition = "2024"
|
||||||
license = "AGPL-3.0"
|
license = "AGPL-3.0"
|
||||||
repository = "https://github.com/0xMassi/webclaw"
|
repository = "https://github.com/0xMassi/webclaw"
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@ tracing = { workspace = true }
|
||||||
tokio = { workspace = true }
|
tokio = { workspace = true }
|
||||||
async-trait = "0.1"
|
async-trait = "0.1"
|
||||||
wreq = { version = "6.0.0-rc.28", features = ["cookies", "gzip", "brotli", "zstd", "deflate"] }
|
wreq = { version = "6.0.0-rc.28", features = ["cookies", "gzip", "brotli", "zstd", "deflate"] }
|
||||||
|
wreq-util = "3.0.0-rc.10"
|
||||||
http = "1"
|
http = "1"
|
||||||
bytes = "1"
|
bytes = "1"
|
||||||
url = "2"
|
url = "2"
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,10 @@ pub enum BrowserProfile {
|
||||||
#[default]
|
#[default]
|
||||||
Chrome,
|
Chrome,
|
||||||
Firefox,
|
Firefox,
|
||||||
|
/// Safari iOS 26 (iPhone). The one profile proven to defeat
|
||||||
|
/// DataDome's immobiliare.it / idealista.it / target.com-class
|
||||||
|
/// rules when paired with a country-scoped residential proxy.
|
||||||
|
SafariIos,
|
||||||
/// Randomly pick from all available profiles on each request.
|
/// Randomly pick from all available profiles on each request.
|
||||||
Random,
|
Random,
|
||||||
}
|
}
|
||||||
|
|
@ -18,6 +22,7 @@ pub enum BrowserVariant {
|
||||||
ChromeMacos,
|
ChromeMacos,
|
||||||
Firefox,
|
Firefox,
|
||||||
Safari,
|
Safari,
|
||||||
|
SafariIos26,
|
||||||
Edge,
|
Edge,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -635,6 +635,7 @@ fn collect_variants(profile: &BrowserProfile) -> Vec<BrowserVariant> {
|
||||||
BrowserProfile::Random => browser::all_variants(),
|
BrowserProfile::Random => browser::all_variants(),
|
||||||
BrowserProfile::Chrome => vec![browser::latest_chrome()],
|
BrowserProfile::Chrome => vec![browser::latest_chrome()],
|
||||||
BrowserProfile::Firefox => vec![browser::latest_firefox()],
|
BrowserProfile::Firefox => vec![browser::latest_firefox()],
|
||||||
|
BrowserProfile::SafariIos => vec![BrowserVariant::SafariIos26],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@ pub mod error;
|
||||||
pub mod extractors;
|
pub mod extractors;
|
||||||
pub mod fetcher;
|
pub mod fetcher;
|
||||||
pub mod linkedin;
|
pub mod linkedin;
|
||||||
|
pub mod locale;
|
||||||
pub mod proxy;
|
pub mod proxy;
|
||||||
pub mod reddit;
|
pub mod reddit;
|
||||||
pub mod sitemap;
|
pub mod sitemap;
|
||||||
|
|
@ -21,6 +22,7 @@ pub use crawler::{CrawlConfig, CrawlResult, CrawlState, Crawler, PageResult};
|
||||||
pub use error::FetchError;
|
pub use error::FetchError;
|
||||||
pub use fetcher::Fetcher;
|
pub use fetcher::Fetcher;
|
||||||
pub use http::HeaderMap;
|
pub use http::HeaderMap;
|
||||||
|
pub use locale::{accept_language_for_tld, accept_language_for_url};
|
||||||
pub use proxy::{parse_proxy_file, parse_proxy_line};
|
pub use proxy::{parse_proxy_file, parse_proxy_line};
|
||||||
pub use sitemap::SitemapEntry;
|
pub use sitemap::SitemapEntry;
|
||||||
pub use webclaw_pdf::PdfMode;
|
pub use webclaw_pdf::PdfMode;
|
||||||
|
|
|
||||||
77
crates/webclaw-fetch/src/locale.rs
Normal file
77
crates/webclaw-fetch/src/locale.rs
Normal file
|
|
@ -0,0 +1,77 @@
|
||||||
|
//! Derive an `Accept-Language` header from a URL.
|
||||||
|
//!
|
||||||
|
//! DataDome-class bot detection on country-specific sites (e.g. immobiliare.it,
|
||||||
|
//! leboncoin.fr) does a geo-vs-locale sanity check: residential IP in the
|
||||||
|
//! target country + a browser UA but the wrong `Accept-Language` is a bot
|
||||||
|
//! signal. Matching the site's expected locale gets us through.
|
||||||
|
//!
|
||||||
|
//! Default for unmapped TLDs is `en-US,en;q=0.9` — the global fallback.
|
||||||
|
|
||||||
|
/// Best-effort `Accept-Language` header value for the given URL's TLD.
|
||||||
|
/// Returns `None` if the URL cannot be parsed.
|
||||||
|
pub fn accept_language_for_url(url: &str) -> Option<&'static str> {
|
||||||
|
let host = url::Url::parse(url).ok()?.host_str()?.to_ascii_lowercase();
|
||||||
|
let tld = host.rsplit('.').next()?;
|
||||||
|
Some(accept_language_for_tld(tld))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Map a bare TLD like `it`, `fr`, `de` to a plausible `Accept-Language`.
|
||||||
|
/// Unknown TLDs fall back to US English.
|
||||||
|
pub fn accept_language_for_tld(tld: &str) -> &'static str {
|
||||||
|
match tld {
|
||||||
|
"it" => "it-IT,it;q=0.9",
|
||||||
|
"fr" => "fr-FR,fr;q=0.9",
|
||||||
|
"de" | "at" => "de-DE,de;q=0.9",
|
||||||
|
"es" => "es-ES,es;q=0.9",
|
||||||
|
"pt" => "pt-PT,pt;q=0.9",
|
||||||
|
"nl" => "nl-NL,nl;q=0.9",
|
||||||
|
"pl" => "pl-PL,pl;q=0.9",
|
||||||
|
"se" => "sv-SE,sv;q=0.9",
|
||||||
|
"no" => "nb-NO,nb;q=0.9",
|
||||||
|
"dk" => "da-DK,da;q=0.9",
|
||||||
|
"fi" => "fi-FI,fi;q=0.9",
|
||||||
|
"cz" => "cs-CZ,cs;q=0.9",
|
||||||
|
"ro" => "ro-RO,ro;q=0.9",
|
||||||
|
"gr" => "el-GR,el;q=0.9",
|
||||||
|
"tr" => "tr-TR,tr;q=0.9",
|
||||||
|
"ru" => "ru-RU,ru;q=0.9",
|
||||||
|
"jp" => "ja-JP,ja;q=0.9",
|
||||||
|
"kr" => "ko-KR,ko;q=0.9",
|
||||||
|
"cn" => "zh-CN,zh;q=0.9",
|
||||||
|
"tw" | "hk" => "zh-TW,zh;q=0.9",
|
||||||
|
"br" => "pt-BR,pt;q=0.9",
|
||||||
|
"mx" | "ar" | "co" | "cl" | "pe" => "es-ES,es;q=0.9",
|
||||||
|
"uk" | "ie" => "en-GB,en;q=0.9",
|
||||||
|
_ => "en-US,en;q=0.9",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn tld_dispatch() {
|
||||||
|
assert_eq!(
|
||||||
|
accept_language_for_url("https://www.immobiliare.it/annunci/1"),
|
||||||
|
Some("it-IT,it;q=0.9")
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
accept_language_for_url("https://www.leboncoin.fr/"),
|
||||||
|
Some("fr-FR,fr;q=0.9")
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
accept_language_for_url("https://www.amazon.co.uk/"),
|
||||||
|
Some("en-GB,en;q=0.9")
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
accept_language_for_url("https://example.com/"),
|
||||||
|
Some("en-US,en;q=0.9")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn bad_url_returns_none() {
|
||||||
|
assert_eq!(accept_language_for_url("not-a-url"), None);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -7,10 +7,15 @@
|
||||||
|
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use std::borrow::Cow;
|
||||||
|
|
||||||
use wreq::http2::{
|
use wreq::http2::{
|
||||||
Http2Options, PseudoId, PseudoOrder, SettingId, SettingsOrder, StreamDependency, StreamId,
|
Http2Options, PseudoId, PseudoOrder, SettingId, SettingsOrder, StreamDependency, StreamId,
|
||||||
};
|
};
|
||||||
use wreq::tls::{AlpsProtocol, CertificateCompressionAlgorithm, TlsOptions, TlsVersion};
|
use wreq::tls::{
|
||||||
|
AlpnProtocol, AlpsProtocol, CertificateCompressionAlgorithm, ExtensionType, TlsOptions,
|
||||||
|
TlsVersion,
|
||||||
|
};
|
||||||
use wreq::{Client, Emulation};
|
use wreq::{Client, Emulation};
|
||||||
|
|
||||||
use crate::browser::BrowserVariant;
|
use crate::browser::BrowserVariant;
|
||||||
|
|
@ -43,6 +48,55 @@ const SAFARI_SIGALGS: &str = "ecdsa_secp256r1_sha256:rsa_pss_rsae_sha256:rsa_pkc
|
||||||
/// Safari curves.
|
/// Safari curves.
|
||||||
const SAFARI_CURVES: &str = "X25519:P-256:P-384:P-521";
|
const SAFARI_CURVES: &str = "X25519:P-256:P-384:P-521";
|
||||||
|
|
||||||
|
/// Safari iOS 26 TLS extension order, matching bogdanfinn's
|
||||||
|
/// `safari_ios_26_0` wire format. GREASE slots are omitted. wreq
|
||||||
|
/// inserts them itself. Diverges from wreq-util's default SafariIos26
|
||||||
|
/// extension order, which DataDome's immobiliare.it ruleset flags.
|
||||||
|
fn safari_ios_extensions() -> Vec<ExtensionType> {
|
||||||
|
vec![
|
||||||
|
ExtensionType::CERTIFICATE_TIMESTAMP,
|
||||||
|
ExtensionType::APPLICATION_LAYER_PROTOCOL_NEGOTIATION,
|
||||||
|
ExtensionType::SERVER_NAME,
|
||||||
|
ExtensionType::CERT_COMPRESSION,
|
||||||
|
ExtensionType::KEY_SHARE,
|
||||||
|
ExtensionType::SUPPORTED_VERSIONS,
|
||||||
|
ExtensionType::PSK_KEY_EXCHANGE_MODES,
|
||||||
|
ExtensionType::SUPPORTED_GROUPS,
|
||||||
|
ExtensionType::RENEGOTIATE,
|
||||||
|
ExtensionType::SIGNATURE_ALGORITHMS,
|
||||||
|
ExtensionType::STATUS_REQUEST,
|
||||||
|
ExtensionType::EC_POINT_FORMATS,
|
||||||
|
ExtensionType::EXTENDED_MASTER_SECRET,
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Chrome 133 TLS extension order, matching bogdanfinn's stable JA3
|
||||||
|
/// (`43067709b025da334de1279a120f8e14`). Real Chrome permutes extensions
|
||||||
|
/// per handshake, but indeed.com's WAF allowlists this specific wire order
|
||||||
|
/// and rejects permuted ones. GREASE slots are inserted by wreq.
|
||||||
|
///
|
||||||
|
/// JA3 extension field from peet.ws: 18-5-35-51-10-45-11-27-17613-43-13-0-16-65037-65281-23
|
||||||
|
fn chrome_extensions() -> Vec<ExtensionType> {
|
||||||
|
vec![
|
||||||
|
ExtensionType::CERTIFICATE_TIMESTAMP, // 18
|
||||||
|
ExtensionType::STATUS_REQUEST, // 5
|
||||||
|
ExtensionType::SESSION_TICKET, // 35
|
||||||
|
ExtensionType::KEY_SHARE, // 51
|
||||||
|
ExtensionType::SUPPORTED_GROUPS, // 10
|
||||||
|
ExtensionType::PSK_KEY_EXCHANGE_MODES, // 45
|
||||||
|
ExtensionType::EC_POINT_FORMATS, // 11
|
||||||
|
ExtensionType::CERT_COMPRESSION, // 27
|
||||||
|
ExtensionType::APPLICATION_SETTINGS_NEW, // 17613 (new codepoint, matches alps_use_new_codepoint)
|
||||||
|
ExtensionType::SUPPORTED_VERSIONS, // 43
|
||||||
|
ExtensionType::SIGNATURE_ALGORITHMS, // 13
|
||||||
|
ExtensionType::SERVER_NAME, // 0
|
||||||
|
ExtensionType::APPLICATION_LAYER_PROTOCOL_NEGOTIATION, // 16
|
||||||
|
ExtensionType::ENCRYPTED_CLIENT_HELLO, // 65037
|
||||||
|
ExtensionType::RENEGOTIATE, // 65281
|
||||||
|
ExtensionType::EXTENDED_MASTER_SECRET, // 23
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
// --- Chrome HTTP headers in correct wire order ---
|
// --- Chrome HTTP headers in correct wire order ---
|
||||||
|
|
||||||
const CHROME_HEADERS: &[(&str, &str)] = &[
|
const CHROME_HEADERS: &[(&str, &str)] = &[
|
||||||
|
|
@ -130,6 +184,26 @@ const SAFARI_HEADERS: &[(&str, &str)] = &[
|
||||||
("sec-fetch-dest", "document"),
|
("sec-fetch-dest", "document"),
|
||||||
];
|
];
|
||||||
|
|
||||||
|
/// Safari iOS 26 headers, in the wire order real Safari emits. Critically:
|
||||||
|
/// NO `sec-fetch-*`, NO `priority: u=0, i` (both Chromium-only leaks), but
|
||||||
|
/// `upgrade-insecure-requests: 1` is present. `accept-encoding` does not
|
||||||
|
/// include zstd (Safari can't decode it). Verified against bogdanfinn on
|
||||||
|
/// 2026-04-22: this header set is what DataDome's immobiliare ruleset
|
||||||
|
/// expects for a real iPhone.
|
||||||
|
const SAFARI_IOS_HEADERS: &[(&str, &str)] = &[
|
||||||
|
(
|
||||||
|
"accept",
|
||||||
|
"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||||
|
),
|
||||||
|
("accept-language", "en-US,en;q=0.9"),
|
||||||
|
("accept-encoding", "gzip, deflate, br"),
|
||||||
|
(
|
||||||
|
"user-agent",
|
||||||
|
"Mozilla/5.0 (iPhone; CPU iPhone OS 26_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/26.0 Mobile/15E148 Safari/604.1",
|
||||||
|
),
|
||||||
|
("upgrade-insecure-requests", "1"),
|
||||||
|
];
|
||||||
|
|
||||||
const EDGE_HEADERS: &[(&str, &str)] = &[
|
const EDGE_HEADERS: &[(&str, &str)] = &[
|
||||||
(
|
(
|
||||||
"sec-ch-ua",
|
"sec-ch-ua",
|
||||||
|
|
@ -156,6 +230,9 @@ const EDGE_HEADERS: &[(&str, &str)] = &[
|
||||||
];
|
];
|
||||||
|
|
||||||
fn chrome_tls() -> TlsOptions {
|
fn chrome_tls() -> TlsOptions {
|
||||||
|
// permute_extensions is off so the explicit extension_permutation sticks.
|
||||||
|
// Real Chrome permutes, but indeed.com's WAF allowlists bogdanfinn's
|
||||||
|
// fixed order, so matching that gets us through.
|
||||||
TlsOptions::builder()
|
TlsOptions::builder()
|
||||||
.cipher_list(CHROME_CIPHERS)
|
.cipher_list(CHROME_CIPHERS)
|
||||||
.sigalgs_list(CHROME_SIGALGS)
|
.sigalgs_list(CHROME_SIGALGS)
|
||||||
|
|
@ -163,12 +240,18 @@ fn chrome_tls() -> TlsOptions {
|
||||||
.min_tls_version(TlsVersion::TLS_1_2)
|
.min_tls_version(TlsVersion::TLS_1_2)
|
||||||
.max_tls_version(TlsVersion::TLS_1_3)
|
.max_tls_version(TlsVersion::TLS_1_3)
|
||||||
.grease_enabled(true)
|
.grease_enabled(true)
|
||||||
.permute_extensions(true)
|
.permute_extensions(false)
|
||||||
|
.extension_permutation(chrome_extensions())
|
||||||
.enable_ech_grease(true)
|
.enable_ech_grease(true)
|
||||||
.pre_shared_key(true)
|
.pre_shared_key(true)
|
||||||
.enable_ocsp_stapling(true)
|
.enable_ocsp_stapling(true)
|
||||||
.enable_signed_cert_timestamps(true)
|
.enable_signed_cert_timestamps(true)
|
||||||
.alps_protocols([AlpsProtocol::HTTP2])
|
.alpn_protocols([
|
||||||
|
AlpnProtocol::HTTP3,
|
||||||
|
AlpnProtocol::HTTP2,
|
||||||
|
AlpnProtocol::HTTP1,
|
||||||
|
])
|
||||||
|
.alps_protocols([AlpsProtocol::HTTP3, AlpsProtocol::HTTP2])
|
||||||
.alps_use_new_codepoint(true)
|
.alps_use_new_codepoint(true)
|
||||||
.aes_hw_override(true)
|
.aes_hw_override(true)
|
||||||
.certificate_compression_algorithms(&[CertificateCompressionAlgorithm::BROTLI])
|
.certificate_compression_algorithms(&[CertificateCompressionAlgorithm::BROTLI])
|
||||||
|
|
@ -212,25 +295,70 @@ fn safari_tls() -> TlsOptions {
|
||||||
.build()
|
.build()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Safari iOS 26 emulation — composed on top of `wreq_util::Emulation::SafariIos26`
|
||||||
|
/// with four targeted overrides. We don't hand-roll this one like Chrome/Firefox
|
||||||
|
/// because the wire-level defaults from wreq-util are already correct for ciphers,
|
||||||
|
/// sigalgs, curves, and GREASE — the four things wreq-util gets *wrong* for
|
||||||
|
/// DataDome compatibility are overridden here:
|
||||||
|
///
|
||||||
|
/// 1. TLS extension order: match bogdanfinn `safari_ios_26_0` exactly (JA3
|
||||||
|
/// ends up `8d909525bd5bbb79f133d11cc05159fe`).
|
||||||
|
/// 2. HTTP/2 HEADERS priority flag: weight=256, exclusive=1, depends_on=0.
|
||||||
|
/// wreq-util omits this frame; real Safari and bogdanfinn include it.
|
||||||
|
/// This flip is the thing DataDome actually reads — the akamai_fingerprint
|
||||||
|
/// hash changes from `c52879e43202aeb92740be6e8c86ea96` to
|
||||||
|
/// `d1294410a06522e37a5c5e3f0a45a705`, which is the winning signature.
|
||||||
|
/// 3. Headers: strip wreq-util's Chromium defaults (`sec-fetch-*`,
|
||||||
|
/// `priority: u=0, i`, zstd), replace with the real iOS 26 set.
|
||||||
|
/// 4. `accept-language` preserved from config.extra_headers for locale.
|
||||||
|
fn safari_ios_emulation() -> wreq::Emulation {
|
||||||
|
use wreq::EmulationFactory;
|
||||||
|
let mut em = wreq_util::Emulation::SafariIos26.emulation();
|
||||||
|
|
||||||
|
if let Some(tls) = em.tls_options_mut().as_mut() {
|
||||||
|
tls.extension_permutation = Some(Cow::Owned(safari_ios_extensions()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only override the priority flag — keep wreq-util's SETTINGS, WINDOW_UPDATE,
|
||||||
|
// and pseudo-order intact. Replacing the whole Http2Options resets SETTINGS
|
||||||
|
// to defaults, which sends only INITIAL_WINDOW_SIZE and fails DataDome.
|
||||||
|
if let Some(h2) = em.http2_options_mut().as_mut() {
|
||||||
|
h2.headers_stream_dependency = Some(StreamDependency::new(StreamId::zero(), 255, true));
|
||||||
|
}
|
||||||
|
|
||||||
|
let hm = em.headers_mut();
|
||||||
|
hm.clear();
|
||||||
|
for (k, v) in SAFARI_IOS_HEADERS {
|
||||||
|
if let (Ok(n), Ok(val)) = (
|
||||||
|
http::header::HeaderName::from_bytes(k.as_bytes()),
|
||||||
|
http::header::HeaderValue::from_str(v),
|
||||||
|
) {
|
||||||
|
hm.append(n, val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
em
|
||||||
|
}
|
||||||
|
|
||||||
fn chrome_h2() -> Http2Options {
|
fn chrome_h2() -> Http2Options {
|
||||||
|
// SETTINGS frame matches bogdanfinn `chrome_133`: HEADER_TABLE_SIZE,
|
||||||
|
// ENABLE_PUSH=0, INITIAL_WINDOW_SIZE, MAX_HEADER_LIST_SIZE. No
|
||||||
|
// MAX_CONCURRENT_STREAMS — real Chrome 133 and bogdanfinn both omit it,
|
||||||
|
// and indeed.com's WAF reads this as a bot signal when present. Priority
|
||||||
|
// weight 256 (encoded as 255 + 1) matches bogdanfinn's HEADERS frame.
|
||||||
Http2Options::builder()
|
Http2Options::builder()
|
||||||
.initial_window_size(6_291_456)
|
.initial_window_size(6_291_456)
|
||||||
.initial_connection_window_size(15_728_640)
|
.initial_connection_window_size(15_728_640)
|
||||||
.max_header_list_size(262_144)
|
.max_header_list_size(262_144)
|
||||||
.header_table_size(65_536)
|
.header_table_size(65_536)
|
||||||
.max_concurrent_streams(1000u32)
|
|
||||||
.enable_push(false)
|
.enable_push(false)
|
||||||
.settings_order(
|
.settings_order(
|
||||||
SettingsOrder::builder()
|
SettingsOrder::builder()
|
||||||
.extend([
|
.extend([
|
||||||
SettingId::HeaderTableSize,
|
SettingId::HeaderTableSize,
|
||||||
SettingId::EnablePush,
|
SettingId::EnablePush,
|
||||||
SettingId::MaxConcurrentStreams,
|
|
||||||
SettingId::InitialWindowSize,
|
SettingId::InitialWindowSize,
|
||||||
SettingId::MaxFrameSize,
|
|
||||||
SettingId::MaxHeaderListSize,
|
SettingId::MaxHeaderListSize,
|
||||||
SettingId::EnableConnectProtocol,
|
|
||||||
SettingId::NoRfc7540Priorities,
|
|
||||||
])
|
])
|
||||||
.build(),
|
.build(),
|
||||||
)
|
)
|
||||||
|
|
@ -244,7 +372,7 @@ fn chrome_h2() -> Http2Options {
|
||||||
])
|
])
|
||||||
.build(),
|
.build(),
|
||||||
)
|
)
|
||||||
.headers_stream_dependency(StreamDependency::new(StreamId::zero(), 219, true))
|
.headers_stream_dependency(StreamDependency::new(StreamId::zero(), 255, true))
|
||||||
.build()
|
.build()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -328,32 +456,38 @@ pub fn build_client(
|
||||||
extra_headers: &std::collections::HashMap<String, String>,
|
extra_headers: &std::collections::HashMap<String, String>,
|
||||||
proxy: Option<&str>,
|
proxy: Option<&str>,
|
||||||
) -> Result<Client, FetchError> {
|
) -> Result<Client, FetchError> {
|
||||||
let (tls, h2, headers) = match variant {
|
// SafariIos26 builds its Emulation on top of wreq-util's base instead
|
||||||
|
// of from scratch. See `safari_ios_emulation` for why.
|
||||||
|
let mut emulation = match variant {
|
||||||
|
BrowserVariant::SafariIos26 => safari_ios_emulation(),
|
||||||
|
other => {
|
||||||
|
let (tls, h2, headers) = match other {
|
||||||
BrowserVariant::Chrome => (chrome_tls(), chrome_h2(), CHROME_HEADERS),
|
BrowserVariant::Chrome => (chrome_tls(), chrome_h2(), CHROME_HEADERS),
|
||||||
BrowserVariant::ChromeMacos => (chrome_tls(), chrome_h2(), CHROME_MACOS_HEADERS),
|
BrowserVariant::ChromeMacos => (chrome_tls(), chrome_h2(), CHROME_MACOS_HEADERS),
|
||||||
BrowserVariant::Firefox => (firefox_tls(), firefox_h2(), FIREFOX_HEADERS),
|
BrowserVariant::Firefox => (firefox_tls(), firefox_h2(), FIREFOX_HEADERS),
|
||||||
BrowserVariant::Safari => (safari_tls(), safari_h2(), SAFARI_HEADERS),
|
BrowserVariant::Safari => (safari_tls(), safari_h2(), SAFARI_HEADERS),
|
||||||
BrowserVariant::Edge => (chrome_tls(), chrome_h2(), EDGE_HEADERS),
|
BrowserVariant::Edge => (chrome_tls(), chrome_h2(), EDGE_HEADERS),
|
||||||
|
BrowserVariant::SafariIos26 => unreachable!("handled above"),
|
||||||
|
};
|
||||||
|
Emulation::builder()
|
||||||
|
.tls_options(tls)
|
||||||
|
.http2_options(h2)
|
||||||
|
.headers(build_headers(headers))
|
||||||
|
.build()
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut header_map = build_headers(headers);
|
// Append extra headers after profile defaults.
|
||||||
|
let hm = emulation.headers_mut();
|
||||||
// Append extra headers after profile defaults
|
|
||||||
for (k, v) in extra_headers {
|
for (k, v) in extra_headers {
|
||||||
if let (Ok(n), Ok(val)) = (
|
if let (Ok(n), Ok(val)) = (
|
||||||
http::header::HeaderName::from_bytes(k.as_bytes()),
|
http::header::HeaderName::from_bytes(k.as_bytes()),
|
||||||
http::header::HeaderValue::from_str(v),
|
http::header::HeaderValue::from_str(v),
|
||||||
) {
|
) {
|
||||||
header_map.insert(n, val);
|
hm.insert(n, val);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let emulation = Emulation::builder()
|
|
||||||
.tls_options(tls)
|
|
||||||
.http2_options(h2)
|
|
||||||
.headers(header_map)
|
|
||||||
.build();
|
|
||||||
|
|
||||||
let mut builder = Client::builder()
|
let mut builder = Client::builder()
|
||||||
.emulation(emulation)
|
.emulation(emulation)
|
||||||
.redirect(wreq::redirect::Policy::limited(10))
|
.redirect(wreq::redirect::Policy::limited(10))
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue