feat(server): add OSS webclaw-server REST API binary (closes #29)

Self-hosters hitting docs/self-hosting were promised three binaries
but the OSS Docker image only shipped two. webclaw-server lived in
the closed-source hosted-platform repo, which couldn't be opened. This
adds a minimal axum REST API in the OSS repo so self-hosting actually
works without pretending to ship the cloud platform.

Crate at crates/webclaw-server/. Stateless, no database, no job queue,
single binary. Endpoints: GET /health, POST /v1/{scrape, crawl, map,
batch, extract, summarize, diff, brand}. JSON shapes mirror
api.webclaw.io for the endpoints OSS can support, so swapping between
self-hosted and hosted is a base-URL change.

Auth: optional bearer token via WEBCLAW_API_KEY / --api-key. Comparison
is constant-time (subtle::ConstantTimeEq). Open mode (no key) is
allowed and binds 127.0.0.1 by default; the Docker image flips
WEBCLAW_HOST=0.0.0.0 so the container is reachable out of the box.

Hard caps to keep naive callers from OOMing the process: crawl capped
at 500 pages synchronously, batch capped at 100 URLs / 20 concurrent.
For unbounded crawls or anti-bot bypass the docs point users at the
hosted API.

Dockerfile + Dockerfile.ci updated to copy webclaw-server into
/usr/local/bin and EXPOSE 3000. Workspace version bumped to 0.4.0
(new public binary).
This commit is contained in:
Valerio 2026-04-22 12:25:11 +02:00
parent b4bfff120e
commit 2ba682adf3
20 changed files with 1116 additions and 11 deletions

130
Cargo.lock generated
View file

@ -182,6 +182,70 @@ version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
[[package]]
name = "axum"
version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "31b698c5f9a010f6573133b09e0de5408834d0c82f8d7475a89fc1867a71cd90"
dependencies = [
"axum-core",
"axum-macros",
"bytes",
"form_urlencoded",
"futures-util",
"http",
"http-body",
"http-body-util",
"hyper",
"hyper-util",
"itoa",
"matchit",
"memchr",
"mime",
"percent-encoding",
"pin-project-lite",
"serde_core",
"serde_json",
"serde_path_to_error",
"serde_urlencoded",
"sync_wrapper",
"tokio",
"tower",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]
name = "axum-core"
version = "0.5.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1"
dependencies = [
"bytes",
"futures-core",
"http",
"http-body",
"http-body-util",
"mime",
"pin-project-lite",
"sync_wrapper",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]
name = "axum-macros"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7aa268c23bfbbd2c4363b9cd302a4f504fb2a9dfe7e3451d66f35dd392e20aca"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "base64"
version = "0.22.1"
@ -1132,6 +1196,12 @@ version = "1.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87"
[[package]]
name = "httpdate"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
[[package]]
name = "hyper"
version = "1.9.0"
@ -1145,6 +1215,7 @@ dependencies = [
"http",
"http-body",
"httparse",
"httpdate",
"itoa",
"pin-project-lite",
"smallvec",
@ -1559,6 +1630,12 @@ dependencies = [
"regex-automata",
]
[[package]]
name = "matchit"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3"
[[package]]
name = "md-5"
version = "0.10.6"
@ -1575,6 +1652,12 @@ version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
[[package]]
name = "mime"
version = "0.3.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
[[package]]
name = "minimal-lexical"
version = "0.2.1"
@ -2403,6 +2486,17 @@ dependencies = [
"zmij",
]
[[package]]
name = "serde_path_to_error"
version = "0.1.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457"
dependencies = [
"itoa",
"serde",
"serde_core",
]
[[package]]
name = "serde_urlencoded"
version = "0.7.1"
@ -2757,6 +2851,7 @@ dependencies = [
"tokio",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]
@ -2780,6 +2875,7 @@ dependencies = [
"tower",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]
@ -2800,6 +2896,7 @@ version = "0.1.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100"
dependencies = [
"log",
"pin-project-lite",
"tracing-attributes",
"tracing-core",
@ -3102,7 +3199,7 @@ dependencies = [
[[package]]
name = "webclaw-cli"
version = "0.3.19"
version = "0.4.0"
dependencies = [
"clap",
"dotenvy",
@ -3123,7 +3220,7 @@ dependencies = [
[[package]]
name = "webclaw-core"
version = "0.3.19"
version = "0.4.0"
dependencies = [
"ego-tree",
"once_cell",
@ -3141,7 +3238,7 @@ dependencies = [
[[package]]
name = "webclaw-fetch"
version = "0.3.19"
version = "0.4.0"
dependencies = [
"bytes",
"calamine",
@ -3163,7 +3260,7 @@ dependencies = [
[[package]]
name = "webclaw-llm"
version = "0.3.19"
version = "0.4.0"
dependencies = [
"async-trait",
"reqwest",
@ -3176,7 +3273,7 @@ dependencies = [
[[package]]
name = "webclaw-mcp"
version = "0.3.19"
version = "0.4.0"
dependencies = [
"dirs",
"dotenvy",
@ -3197,13 +3294,34 @@ dependencies = [
[[package]]
name = "webclaw-pdf"
version = "0.3.19"
version = "0.4.0"
dependencies = [
"pdf-extract",
"thiserror",
"tracing",
]
[[package]]
name = "webclaw-server"
version = "0.4.0"
dependencies = [
"anyhow",
"axum",
"clap",
"serde",
"serde_json",
"subtle",
"thiserror",
"tokio",
"tower-http",
"tracing",
"tracing-subscriber",
"webclaw-core",
"webclaw-fetch",
"webclaw-llm",
"webclaw-pdf",
]
[[package]]
name = "webpki-root-certs"
version = "1.0.6"