mirror of
https://github.com/0xMassi/webclaw.git
synced 2026-06-06 22:05:13 +02:00
feat(core): endpoints module for API surface extraction from HTML and JS (#47)
* feat(core): endpoints module — extract API surface from HTML + JS bundles * fix(docker): source CA bundle from distroless instead of apt (fixes arm64 release build) * fix(test): serialize env-mutating CloudClient tests to stop flaky CI * feat(core): filter endpoint-extractor noise (invalid hosts, schema domains, bare paths)
This commit is contained in:
parent
be8bcfebd9
commit
fe567a6af1
5 changed files with 536 additions and 11 deletions
515
crates/webclaw-core/src/endpoints.rs
Normal file
515
crates/webclaw-core/src/endpoints.rs
Normal file
|
|
@ -0,0 +1,515 @@
|
|||
//! API/endpoint surface discovery from HTML + JS bundle text.
|
||||
//!
|
||||
//! Pure and zero-network: callers fetch the page and its `<script src>`
|
||||
//! bundles, then hand the raw text here. We surface API paths, absolute
|
||||
//! API URLs, GraphQL and WebSocket endpoints that live in inline scripts
|
||||
//! and bundles — the surface a sitemap/`map` can never see.
|
||||
//!
|
||||
//! Heuristic by design: regex over string literals, not JS dataflow.
|
||||
//! High-signal patterns only; bounded for DoS safety.
|
||||
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
use scraper::{Html, Selector};
|
||||
use std::collections::BTreeSet;
|
||||
use url::Url;
|
||||
|
||||
/// Hard caps so a hostile/huge bundle set can't blow up CPU or memory.
|
||||
const MAX_SCAN_BYTES: usize = 8 * 1024 * 1024;
|
||||
const MAX_ENDPOINTS: usize = 2000;
|
||||
/// Cap on `<script src>` URLs returned for the caller to fetch.
|
||||
const MAX_SCRIPT_SRCS: usize = 40;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, serde::Serialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum EndpointKind {
|
||||
RelativePath,
|
||||
AbsoluteUrl,
|
||||
GraphQl,
|
||||
WebSocket,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, serde::Serialize)]
|
||||
pub struct DiscoveredEndpoint {
|
||||
pub value: String,
|
||||
pub kind: EndpointKind,
|
||||
pub first_party: bool,
|
||||
/// `"inline"` or the bundle URL the match came from.
|
||||
pub source: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, serde::Serialize)]
|
||||
pub struct EndpointReport {
|
||||
pub endpoints: Vec<DiscoveredEndpoint>,
|
||||
/// Distinct hosts seen across absolute URLs (first- and third-party).
|
||||
pub hosts: Vec<String>,
|
||||
pub bundles_scanned: usize,
|
||||
/// True if a cap was hit and results may be incomplete.
|
||||
pub truncated: bool,
|
||||
}
|
||||
|
||||
// Quoted relative path that looks API-ish. Bounded quantifiers; the `regex`
|
||||
// crate is linear-time (RE2) so this cannot catastrophically backtrack.
|
||||
static RE_REL_PATH: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new(
|
||||
r#"["'`](/[A-Za-z0-9_\-./]{0,200}?(?:api|graphql|gql|/v[0-9]|/rest|/gateway|/internal|/discovery)[A-Za-z0-9_\-./]{0,200})["'`]"#,
|
||||
)
|
||||
.expect("RE_REL_PATH")
|
||||
});
|
||||
|
||||
static RE_ABS_URL: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new(r#"https?://[A-Za-z0-9.\-]{1,253}(?:/[A-Za-z0-9_\-./%]{0,400})?"#)
|
||||
.expect("RE_ABS_URL")
|
||||
});
|
||||
|
||||
static RE_WS: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new(r#"wss?://[A-Za-z0-9.\-]{1,253}(?:/[A-Za-z0-9_\-./%]{0,256})?"#).expect("RE_WS")
|
||||
});
|
||||
|
||||
static SCRIPT_SEL: Lazy<Selector> = Lazy::new(|| Selector::parse("script").expect("script sel"));
|
||||
|
||||
/// Common multi-label public suffixes so `ticketmaster.co.uk` resolves to
|
||||
/// `ticketmaster.co.uk` (not `co.uk`). Not a full PSL — pragmatic v1.
|
||||
const SUFFIX2: &[&str] = &[
|
||||
"co.uk", "org.uk", "gov.uk", "ac.uk", "me.uk", "com.au", "net.au", "org.au", "co.jp", "co.nz",
|
||||
"co.za", "com.br", "com.mx", "com.sg", "co.in", "co.kr", "com.tr", "com.cn",
|
||||
];
|
||||
|
||||
fn registrable_domain(host: &str) -> String {
|
||||
let host = host.trim_end_matches('.').to_ascii_lowercase();
|
||||
let labels: Vec<&str> = host.split('.').collect();
|
||||
if labels.len() < 2 {
|
||||
return host;
|
||||
}
|
||||
let last2 = labels[labels.len() - 2..].join(".");
|
||||
if SUFFIX2.contains(&last2.as_str()) && labels.len() >= 3 {
|
||||
labels[labels.len() - 3..].join(".")
|
||||
} else {
|
||||
last2
|
||||
}
|
||||
}
|
||||
|
||||
fn is_first_party(candidate_host: &str, base_reg: &str) -> bool {
|
||||
let ch = candidate_host.to_ascii_lowercase();
|
||||
ch == base_reg || ch.ends_with(&format!(".{base_reg}"))
|
||||
}
|
||||
|
||||
/// Registrable domains that are spec/schema/example noise, never real API
|
||||
/// surface (minified JSON-Schema/`schema.org` refs show up constantly).
|
||||
const NOISE_HOSTS: &[&str] = &[
|
||||
"schema.org",
|
||||
"json-schema.org",
|
||||
"w3.org",
|
||||
"example.com",
|
||||
"example.org",
|
||||
"example.net",
|
||||
"localhost",
|
||||
];
|
||||
|
||||
/// A host worth reporting: multi-label with an alphabetic TLD (>=2 chars).
|
||||
/// Rejects minifier garbage like `http://f` / `http://n` and UUID-ish
|
||||
/// single labels that the URL regex otherwise picks up.
|
||||
fn is_valid_host(host: &str) -> bool {
|
||||
let h = host.trim_end_matches('.');
|
||||
let labels: Vec<&str> = h.split('.').collect();
|
||||
if labels.len() < 2 || labels.iter().any(|l| l.is_empty()) {
|
||||
return false;
|
||||
}
|
||||
let tld = labels[labels.len() - 1];
|
||||
tld.len() >= 2 && tld.chars().all(|c| c.is_ascii_alphabetic())
|
||||
}
|
||||
|
||||
/// Bare/low-signal relative paths that are just the prefix, not an endpoint
|
||||
/// (e.g. `/api`, `/api/`, `/`). `/graphql`, `/gql`, `/api/x` are kept.
|
||||
fn is_noise_path(p: &str) -> bool {
|
||||
let t = p.trim_end_matches('/');
|
||||
t.len() < 4 || matches!(t, "/api" | "/rest")
|
||||
}
|
||||
|
||||
/// Resolved absolute `<script src>` URLs (http/https only), deduped, capped.
|
||||
/// Inline scripts have no `src` and are scanned via [`extract_endpoints`].
|
||||
pub fn script_srcs(html: &str, base_url: &str) -> Vec<String> {
|
||||
let base = Url::parse(base_url).ok();
|
||||
let doc = Html::parse_document(html);
|
||||
let mut seen = BTreeSet::new();
|
||||
let mut out = Vec::new();
|
||||
for el in doc.select(&SCRIPT_SEL) {
|
||||
if out.len() >= MAX_SCRIPT_SRCS {
|
||||
break;
|
||||
}
|
||||
let Some(src) = el.value().attr("src") else {
|
||||
continue;
|
||||
};
|
||||
let resolved = match Url::parse(src) {
|
||||
Ok(u) => Some(u),
|
||||
Err(_) => base.as_ref().and_then(|b| b.join(src).ok()),
|
||||
};
|
||||
let Some(u) = resolved else {
|
||||
continue;
|
||||
};
|
||||
if (u.scheme() == "http" || u.scheme() == "https") && seen.insert(u.to_string()) {
|
||||
out.push(u.to_string());
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Extract endpoints from inline HTML scripts plus pre-fetched JS bundles.
|
||||
/// `bundles` is `(bundle_url, bundle_text)`.
|
||||
pub fn extract_endpoints(
|
||||
html: &str,
|
||||
base_url: &str,
|
||||
bundles: &[(String, String)],
|
||||
) -> EndpointReport {
|
||||
let base_reg = Url::parse(base_url)
|
||||
.ok()
|
||||
.and_then(|u| u.host_str().map(registrable_domain))
|
||||
.unwrap_or_default();
|
||||
|
||||
let mut endpoints: Vec<DiscoveredEndpoint> = Vec::new();
|
||||
let mut seen: BTreeSet<(String, String)> = BTreeSet::new();
|
||||
let mut hosts: BTreeSet<String> = BTreeSet::new();
|
||||
let mut budget = MAX_SCAN_BYTES;
|
||||
let mut truncated = false;
|
||||
|
||||
let push = |value: String,
|
||||
kind: EndpointKind,
|
||||
source: &str,
|
||||
endpoints: &mut Vec<DiscoveredEndpoint>,
|
||||
seen: &mut BTreeSet<(String, String)>,
|
||||
hosts: &mut BTreeSet<String>|
|
||||
-> bool {
|
||||
if endpoints.len() >= MAX_ENDPOINTS {
|
||||
return false;
|
||||
}
|
||||
let first_party = match Url::parse(&value) {
|
||||
Ok(u) => {
|
||||
let Some(h) = u.host_str() else {
|
||||
return true;
|
||||
};
|
||||
if !is_valid_host(h) {
|
||||
return true; // minifier garbage host
|
||||
}
|
||||
if NOISE_HOSTS.contains(®istrable_domain(h).as_str()) {
|
||||
return true; // schema.org / json-schema.org / example.*
|
||||
}
|
||||
// Absolute URL with no real path is an origin/site link,
|
||||
// not an API endpoint (drops the page's own URL too).
|
||||
let path = u.path();
|
||||
if path.is_empty() || path == "/" {
|
||||
return true;
|
||||
}
|
||||
hosts.insert(h.to_ascii_lowercase());
|
||||
is_first_party(h, &base_reg)
|
||||
}
|
||||
// Relative path: same origin as the page by definition.
|
||||
Err(_) => {
|
||||
if is_noise_path(&value) {
|
||||
return true; // bare /api, /, ultra-short
|
||||
}
|
||||
true
|
||||
}
|
||||
};
|
||||
if seen.insert((value.clone(), source.to_string())) {
|
||||
endpoints.push(DiscoveredEndpoint {
|
||||
value,
|
||||
kind,
|
||||
first_party,
|
||||
source: source.to_string(),
|
||||
});
|
||||
}
|
||||
true
|
||||
};
|
||||
|
||||
let scan = |text: &str,
|
||||
source: &str,
|
||||
endpoints: &mut Vec<DiscoveredEndpoint>,
|
||||
seen: &mut BTreeSet<(String, String)>,
|
||||
hosts: &mut BTreeSet<String>,
|
||||
budget: &mut usize,
|
||||
truncated: &mut bool| {
|
||||
if *budget == 0 {
|
||||
return;
|
||||
}
|
||||
let slice = if text.len() > *budget {
|
||||
*truncated = true;
|
||||
&text[..*budget]
|
||||
} else {
|
||||
text
|
||||
};
|
||||
*budget -= slice.len();
|
||||
|
||||
for c in RE_REL_PATH.captures_iter(slice) {
|
||||
if let Some(m) = c.get(1) {
|
||||
let v = m.as_str().to_string();
|
||||
let kind = if v.contains("graphql") || v.contains("/gql") {
|
||||
EndpointKind::GraphQl
|
||||
} else {
|
||||
EndpointKind::RelativePath
|
||||
};
|
||||
if !push(v, kind, source, endpoints, seen, hosts) {
|
||||
*truncated = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
for m in RE_WS.find_iter(slice) {
|
||||
if !push(
|
||||
m.as_str().to_string(),
|
||||
EndpointKind::WebSocket,
|
||||
source,
|
||||
endpoints,
|
||||
seen,
|
||||
hosts,
|
||||
) {
|
||||
*truncated = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
for m in RE_ABS_URL.find_iter(slice) {
|
||||
let v = m.as_str().to_string();
|
||||
// Skip obvious static assets — we want API surface, not CDN files.
|
||||
let lower = v.to_ascii_lowercase();
|
||||
if lower.ends_with(".js")
|
||||
|| lower.ends_with(".css")
|
||||
|| lower.ends_with(".png")
|
||||
|| lower.ends_with(".jpg")
|
||||
|| lower.ends_with(".svg")
|
||||
|| lower.ends_with(".woff2")
|
||||
{
|
||||
// still record the host for visibility
|
||||
if let Some(h) = Url::parse(&v)
|
||||
.ok()
|
||||
.and_then(|u| u.host_str().map(str::to_string))
|
||||
{
|
||||
hosts.insert(h.to_ascii_lowercase());
|
||||
}
|
||||
continue;
|
||||
}
|
||||
let kind = if lower.contains("graphql") || lower.contains("/gql") {
|
||||
EndpointKind::GraphQl
|
||||
} else {
|
||||
EndpointKind::AbsoluteUrl
|
||||
};
|
||||
if !push(v, kind, source, endpoints, seen, hosts) {
|
||||
*truncated = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Inline scripts.
|
||||
let doc = Html::parse_document(html);
|
||||
let mut inline = String::new();
|
||||
for el in doc.select(&SCRIPT_SEL) {
|
||||
if el.value().attr("src").is_none() {
|
||||
inline.push_str(&el.text().collect::<String>());
|
||||
inline.push('\n');
|
||||
}
|
||||
}
|
||||
scan(
|
||||
&inline,
|
||||
"inline",
|
||||
&mut endpoints,
|
||||
&mut seen,
|
||||
&mut hosts,
|
||||
&mut budget,
|
||||
&mut truncated,
|
||||
);
|
||||
|
||||
// Bundles.
|
||||
let mut bundles_scanned = 0usize;
|
||||
for (src, text) in bundles {
|
||||
if budget == 0 {
|
||||
truncated = true;
|
||||
break;
|
||||
}
|
||||
bundles_scanned += 1;
|
||||
scan(
|
||||
text,
|
||||
src,
|
||||
&mut endpoints,
|
||||
&mut seen,
|
||||
&mut hosts,
|
||||
&mut budget,
|
||||
&mut truncated,
|
||||
);
|
||||
}
|
||||
|
||||
endpoints.sort_by(|a, b| (a.kind, &a.value, &a.source).cmp(&(b.kind, &b.value, &b.source)));
|
||||
|
||||
EndpointReport {
|
||||
endpoints,
|
||||
hosts: hosts.into_iter().collect(),
|
||||
bundles_scanned,
|
||||
truncated,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn registrable_domain_handles_cc_tlds() {
|
||||
assert_eq!(
|
||||
registrable_domain("www.ticketmaster.co.uk"),
|
||||
"ticketmaster.co.uk"
|
||||
);
|
||||
assert_eq!(
|
||||
registrable_domain("api.ticketmaster.com"),
|
||||
"ticketmaster.com"
|
||||
);
|
||||
assert_eq!(
|
||||
registrable_domain("pubapi.ticketmaster.co.uk"),
|
||||
"ticketmaster.co.uk"
|
||||
);
|
||||
assert_eq!(registrable_domain("localhost"), "localhost");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn script_srcs_resolves_and_filters() {
|
||||
let html = r#"<html><head>
|
||||
<script src="/_next/static/chunks/main-abc.js"></script>
|
||||
<script src="https://cdn.example.net/lib.js"></script>
|
||||
<script>var inline = 1;</script>
|
||||
<script src="data:text/javascript,1"></script>
|
||||
</head></html>"#;
|
||||
let srcs = script_srcs(html, "https://www.ticketmaster.co.uk/");
|
||||
assert!(srcs.contains(
|
||||
&"https://www.ticketmaster.co.uk/_next/static/chunks/main-abc.js".to_string()
|
||||
));
|
||||
assert!(srcs.contains(&"https://cdn.example.net/lib.js".to_string()));
|
||||
assert_eq!(srcs.len(), 2, "inline + data: ignored");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extracts_inline_and_bundle_endpoints_with_classification() {
|
||||
let html = r#"<html><body>
|
||||
<script>
|
||||
var cfg = { search: "/api/search/events", suggest: "/api/search/search-suggest" };
|
||||
fetch("/api/venue/info");
|
||||
</script>
|
||||
<script src="/app.js"></script>
|
||||
</body></html>"#;
|
||||
let bundles = vec![(
|
||||
"https://www.ticketmaster.co.uk/app.js".to_string(),
|
||||
r#"
|
||||
const GQL = "https://pubapi.ticketmaster.co.uk/graphql";
|
||||
axios.post("https://services.ticketmaster.co.uk/discovery/v2/events");
|
||||
new WebSocket("wss://live.ticketmaster.co.uk/socket");
|
||||
const ga = "https://www.googletagservices.com/tag/js/gpt.js";
|
||||
const img = "https://cdn.tmol.co/hero.png";
|
||||
"#
|
||||
.to_string(),
|
||||
)];
|
||||
let r = extract_endpoints(html, "https://www.ticketmaster.co.uk/", &bundles);
|
||||
let vals: Vec<&str> = r.endpoints.iter().map(|e| e.value.as_str()).collect();
|
||||
|
||||
assert!(vals.contains(&"/api/search/events"));
|
||||
assert!(vals.contains(&"/api/search/search-suggest"));
|
||||
assert!(vals.contains(&"/api/venue/info"));
|
||||
assert!(vals.contains(&"https://pubapi.ticketmaster.co.uk/graphql"));
|
||||
assert!(vals.contains(&"https://services.ticketmaster.co.uk/discovery/v2/events"));
|
||||
assert!(vals.contains(&"wss://live.ticketmaster.co.uk/socket"));
|
||||
// static .js asset is not an endpoint, but its host is recorded
|
||||
assert!(!vals.contains(&"https://www.googletagservices.com/tag/js/gpt.js"));
|
||||
assert!(r.hosts.iter().any(|h| h == "www.googletagservices.com"));
|
||||
|
||||
let gql = r
|
||||
.endpoints
|
||||
.iter()
|
||||
.find(|e| e.value.contains("graphql"))
|
||||
.unwrap();
|
||||
assert_eq!(gql.kind, EndpointKind::GraphQl);
|
||||
assert!(
|
||||
gql.first_party,
|
||||
"pubapi.ticketmaster.co.uk is first-party to .co.uk"
|
||||
);
|
||||
|
||||
let third = r
|
||||
.endpoints
|
||||
.iter()
|
||||
.find(|e| e.value.starts_with("/api/venue"));
|
||||
assert!(third.unwrap().first_party, "relative path is same-origin");
|
||||
assert_eq!(r.bundles_scanned, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn third_party_absolute_is_flagged_not_first_party() {
|
||||
let bundles = vec![(
|
||||
"b".to_string(),
|
||||
r#"x="https://api.stripe.com/v1/charges""#.to_string(),
|
||||
)];
|
||||
let r = extract_endpoints("<html></html>", "https://www.ticketmaster.co.uk/", &bundles);
|
||||
let e = r
|
||||
.endpoints
|
||||
.iter()
|
||||
.find(|e| e.value.contains("stripe"))
|
||||
.unwrap();
|
||||
assert!(!e.first_party);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn caps_bound_pathological_input() {
|
||||
// A huge blob of fake endpoints must not exceed MAX_ENDPOINTS and
|
||||
// must return promptly (regex crate is linear-time).
|
||||
let mut big = String::new();
|
||||
for i in 0..50_000 {
|
||||
big.push_str(&format!("\"/api/v1/item/{i}\" "));
|
||||
}
|
||||
let bundles = vec![("big".to_string(), big)];
|
||||
let r = extract_endpoints("<html></html>", "https://x.com/", &bundles);
|
||||
assert!(r.endpoints.len() <= MAX_ENDPOINTS);
|
||||
assert!(r.truncated);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_inputs_are_safe() {
|
||||
let r = extract_endpoints("", "not a url", &[]);
|
||||
assert!(r.endpoints.is_empty());
|
||||
assert_eq!(r.bundles_scanned, 0);
|
||||
assert!(!r.truncated);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn v1_1_noise_is_filtered() {
|
||||
let bundles = vec![(
|
||||
"b.js".to_string(),
|
||||
r#"
|
||||
"/api/search/events";
|
||||
"/api"; "/api/";
|
||||
"http://f"; "http://n/x";
|
||||
"https://schema.org/Thing";
|
||||
"http://json-schema.org/draft-07/schema";
|
||||
"https://www.ticketmaster.co.uk/";
|
||||
"https://pubapi.ticketmaster.co.uk/discovery/v2/events";
|
||||
"wss://live.ticketmaster.co.uk/socket";
|
||||
"#
|
||||
.to_string(),
|
||||
)];
|
||||
let r = extract_endpoints("<html></html>", "https://www.ticketmaster.co.uk/", &bundles);
|
||||
let vals: std::collections::BTreeSet<&str> =
|
||||
r.endpoints.iter().map(|e| e.value.as_str()).collect();
|
||||
assert!(vals.contains("/api/search/events"));
|
||||
assert!(vals.contains("https://pubapi.ticketmaster.co.uk/discovery/v2/events"));
|
||||
assert!(vals.contains("wss://live.ticketmaster.co.uk/socket"));
|
||||
for junk in [
|
||||
"/api",
|
||||
"/api/",
|
||||
"http://f",
|
||||
"http://n/x",
|
||||
"https://schema.org/Thing",
|
||||
"http://json-schema.org/draft-07/schema",
|
||||
"https://www.ticketmaster.co.uk/",
|
||||
] {
|
||||
assert!(!vals.contains(junk), "noise leaked: {junk}");
|
||||
}
|
||||
assert!(
|
||||
!r.hosts
|
||||
.iter()
|
||||
.any(|h| h == "f" || h == "n" || h == "schema.org")
|
||||
);
|
||||
assert!(r.hosts.iter().any(|h| h == "pubapi.ticketmaster.co.uk"));
|
||||
}
|
||||
}
|
||||
|
|
@ -7,6 +7,7 @@ pub(crate) mod data_island;
|
|||
/// Zero network dependencies — WASM-compatible by design.
|
||||
pub mod diff;
|
||||
pub mod domain;
|
||||
pub mod endpoints;
|
||||
pub mod error;
|
||||
pub mod extractor;
|
||||
#[cfg(all(feature = "quickjs", not(target_arch = "wasm32")))]
|
||||
|
|
|
|||
|
|
@ -810,13 +810,18 @@ mod tests {
|
|||
|
||||
// --- CloudClient construction ------------------------------------------
|
||||
|
||||
// `WEBCLAW_API_KEY` is process-global; cargo runs tests in parallel
|
||||
// threads. Without serialization, a test that sets the var can race a
|
||||
// test asserting it is absent. This lock makes the env-mutating
|
||||
// CloudClient tests mutually exclusive (poison-tolerant: a panicking
|
||||
// test must not wedge the others).
|
||||
static ENV_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
|
||||
|
||||
#[test]
|
||||
fn cloud_client_explicit_key_wins_over_env() {
|
||||
// SAFETY: this test mutates process env. Serial tests only.
|
||||
// Set env to something, pass an explicit key, explicit should win.
|
||||
// (We don't actually *call* the API, just check the struct stored
|
||||
// the right key.)
|
||||
// rustc std::env::set_var is unsafe in newer toolchains.
|
||||
let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
|
||||
// SAFETY: env mutation is serialized by ENV_LOCK; set_var/remove_var
|
||||
// are unsafe on the 2024 toolchain. Explicit key must beat the env.
|
||||
unsafe {
|
||||
std::env::set_var("WEBCLAW_API_KEY", "from-env");
|
||||
}
|
||||
|
|
@ -829,6 +834,9 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn cloud_client_none_when_empty() {
|
||||
let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
|
||||
// SAFETY: env mutation serialized by ENV_LOCK. Clearing the var
|
||||
// (incl. any ambient runner value) is what makes this deterministic.
|
||||
unsafe {
|
||||
std::env::remove_var("WEBCLAW_API_KEY");
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue