mirror of
https://github.com/0xMassi/webclaw.git
synced 2026-06-24 02:58:05 +02:00
fix: harden fetch URL validation
This commit is contained in:
parent
23544f8fac
commit
bdf81fe6bf
10 changed files with 284 additions and 27 deletions
|
|
@ -52,6 +52,7 @@ pub async fn scrape(
|
|||
if req.url.trim().is_empty() {
|
||||
return Err(ApiError::bad_request("`url` is required"));
|
||||
}
|
||||
let url = webclaw_fetch::url_security::validate_public_http_url(&req.url).await?;
|
||||
let formats = req.formats.as_vec();
|
||||
|
||||
let options = ExtractionOptions {
|
||||
|
|
@ -63,11 +64,11 @@ pub async fn scrape(
|
|||
|
||||
let extraction = state
|
||||
.fetch()
|
||||
.fetch_and_extract_with_options(&req.url, &options)
|
||||
.fetch_and_extract_with_options(url.as_str(), &options)
|
||||
.await?;
|
||||
|
||||
let mut body = json!({
|
||||
"url": extraction.metadata.url.clone().unwrap_or_else(|| req.url.clone()),
|
||||
"url": extraction.metadata.url.clone().unwrap_or_else(|| url.to_string()),
|
||||
"metadata": extraction.metadata,
|
||||
});
|
||||
let obj = body.as_object_mut().expect("json::object");
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue