Initial release: webclaw v0.1.0 — web content extraction for LLMs

CLI + MCP server for extracting clean, structured content from any URL. 6 Rust crates, 10 MCP tools, TLS fingerprinting, 5 output formats. MIT Licensed | https://webclaw.io
2026-06-07 22:15:12 +02:00 · 2026-03-23 18:31:11 +01:00 · 2026-03-23 18:31:11 +01:00 · c99ec684fa
commit c99ec684fa
79 changed files with 24074 additions and 0 deletions
--- a/crates/webclaw-pdf/src/error.rs
+++ b/crates/webclaw-pdf/src/error.rs
@ -0,0 +1,14 @@
+/// PDF extraction errors. Kept simple -- no OCR, no complex recovery.
+use thiserror::Error;
+
+#[derive(Debug, Error)]
+pub enum PdfError {
+    #[error("PDF extraction failed: {0}")]
+    ExtractionFailed(String),
+
+    #[error("invalid PDF: {0}")]
+    InvalidPdf(String),
+
+    #[error("empty PDF: no text content found")]
+    EmptyPdf,
+}