diff --git a/crates/webclaw-cli/src/main.rs b/crates/webclaw-cli/src/main.rs
index 6855fca..bc20249 100644
--- a/crates/webclaw-cli/src/main.rs
+++ b/crates/webclaw-cli/src/main.rs
@@ -858,7 +858,20 @@ fn render_body(
                     serde_json::to_string_pretty(result).expect("serialization failed")
                 }
             }
-            OutputFormat::Text => result.content.plain_text.clone(),
+            OutputFormat::Text => {
+                // M7 (issue #19): prepend `Status: <code>` line so callers
+                // using `-f text` can distinguish a real 404 from a
+                // thin-body 200 without parsing the body. No `> `
+                // blockquote prefix (text format has no header section).
+                // Only emitted when populated; local-file/--stdin paths
+                // leave http_status=None and produce body-only output.
+                let body = result.content.plain_text.clone();
+                if let Some(code) = result.metadata.http_status {
+                    format!("Status: {code}\n{body}")
+                } else {
+                    body
+                }
+            }
             OutputFormat::Llm => to_llm_text_with_options(
                 result,
                 result.metadata.url.as_deref(),
@@ -2974,6 +2987,7 @@ mod tests {
                 image: None,
                 favicon: None,
                 word_count: markdown.split_whitespace().count(),
+                http_status: None,
             },
             content: Content {
                 markdown: markdown.to_string(),
diff --git a/crates/webclaw-core/src/diff.rs b/crates/webclaw-core/src/diff.rs
index c6a5d71..e888d1c 100644
--- a/crates/webclaw-core/src/diff.rs
+++ b/crates/webclaw-core/src/diff.rs
@@ -148,6 +148,7 @@ mod tests {
                 image: None,
                 favicon: None,
                 word_count,
+                http_status: None,
             },
             content: Content {
                 markdown: markdown.to_string(),
diff --git a/crates/webclaw-core/src/llm/hub_detect.rs b/crates/webclaw-core/src/llm/hub_detect.rs
index b8c7211..273d626 100644
--- a/crates/webclaw-core/src/llm/hub_detect.rs
+++ b/crates/webclaw-core/src/llm/hub_detect.rs
@@ -136,6 +136,7 @@ mod tests {
                 image: None,
                 favicon: None,
                 word_count: 0,
+                http_status: None,
             },
             content: Content {
                 markdown: markdown.to_string(),
diff --git a/crates/webclaw-core/src/llm/metadata.rs b/crates/webclaw-core/src/llm/metadata.rs
index 6f7666e..9ae27a2 100644
--- a/crates/webclaw-core/src/llm/metadata.rs
+++ b/crates/webclaw-core/src/llm/metadata.rs
@@ -8,6 +8,22 @@ pub(crate) fn build_metadata_header(
     out: &mut String,
     result: &ExtractionResult,
     url: Option<&str>,
+) {
+    build_metadata_header_with_opts(out, result, url, true);
+}
+
+/// Same as [`build_metadata_header`] but with an `include_status` toggle.
+///
+/// `--mode summary` / `--mode toc` callers pass `include_status=false` so
+/// the link-list / outline output stays uncluttered (M7 / issue #19 — the
+/// status line is most useful on full-extraction output where the caller
+/// is reading the body and needs to know whether they're looking at a 404
+/// error page vs a real article).
+pub(crate) fn build_metadata_header_with_opts(
+    out: &mut String,
+    result: &ExtractionResult,
+    url: Option<&str>,
+    include_status: bool,
 ) {
     let meta = &result.metadata;
 
@@ -16,6 +32,16 @@ pub(crate) fn build_metadata_header(
     if let Some(u) = effective_url {
         out.push_str(&format!("> URL: {u}\n"));
     }
+    // M7 (issue #19): HTTP status immediately after URL so callers can
+    // distinguish a real 404 from a thin-body 200 without parsing the page
+    // body. Emitted only when populated (network path); local-file /
+    // --stdin / extract_with_options direct calls leave http_status=None.
+    // Summary / toc modes suppress this line via include_status=false.
+    if include_status
+        && let Some(code) = meta.http_status
+    {
+        out.push_str(&format!("> Status: {code}\n"));
+    }
     if let Some(t) = &meta.title
         && !t.is_empty()
     {
diff --git a/crates/webclaw-core/src/llm/mod.rs b/crates/webclaw-core/src/llm/mod.rs
index 507f258..f92ccd1 100644
--- a/crates/webclaw-core/src/llm/mod.rs
+++ b/crates/webclaw-core/src/llm/mod.rs
@@ -344,6 +344,7 @@ mod tests {
                 image: None,
                 favicon: None,
                 word_count: 42,
+                http_status: None,
             },
             content: Content {
                 markdown: markdown.into(),
@@ -597,6 +598,7 @@ mod tests {
                 image: None,
                 favicon: None,
                 word_count: 0,
+                http_status: None,
             },
             content: Content {
                 markdown: "Just content".into(),
@@ -1214,4 +1216,124 @@ mod tests {
         assert!(out.contains("## Structured data"), "missing header in:\n{out}");
         assert!(out.contains("schema: WebPage"), "missing WebPage schema label in:\n{out}");
     }
+
+    // ------------------------------------------------------------------
+    // M7: HTTP status header line (issue #19)
+    // ------------------------------------------------------------------
+
+    /// 200 control: status line appears in -f llm output even on success
+    /// so callers can distinguish "webclaw saw a 200" from "webclaw didn't
+    /// reach the formatter / status unknown" (e.g. local-file path).
+    #[test]
+    fn test_status_header_appears_on_200() {
+        let mut r = make_result("# Body");
+        r.metadata.http_status = Some(200);
+        let out = to_llm_text(&r, None);
+        assert!(
+            out.contains("> Status: 200\n") || out.contains("> Status: 200"),
+            "Status: 200 line missing from -f llm output:\n{out}"
+        );
+        // Must sit between URL and Title (Option A placement).
+        let url_pos = out.find("> URL:").expect("URL line missing");
+        let status_pos = out.find("> Status:").expect("Status line missing");
+        let title_pos = out.find("> Title:").expect("Title line missing");
+        assert!(url_pos < status_pos, "Status must come AFTER URL");
+        assert!(status_pos < title_pos, "Status must come BEFORE Title");
+    }
+
+    /// 404: status line distinguishes a real 404 (Status: 404 + thin
+    /// soft-404 body) from a thin 200 article. This is the core M7 bug
+    /// — `webclaw https://www.dailysabah.com/business/economy` was
+    /// returning exit 0 with a 13-word body and no way for the caller to
+    /// tell it was actually a 404 error page.
+    #[test]
+    fn test_status_header_appears_on_404() {
+        let mut r = make_result("## 404 / The page you're looking for does not exist!");
+        r.metadata.http_status = Some(404);
+        let out = to_llm_text(&r, None);
+        assert!(
+            out.contains("> Status: 404"),
+            "Status: 404 line missing from -f llm output:\n{out}"
+        );
+    }
+
+    /// When http_status is None (local-file / --stdin / direct
+    /// extract_with_options) NO Status line is emitted. Backward-compat
+    /// for callers that pre-date M7 and parse via line index.
+    #[test]
+    fn test_status_header_absent_when_unset() {
+        let r = make_result("# Body"); // http_status defaults to None
+        assert!(r.metadata.http_status.is_none());
+        let out = to_llm_text(&r, None);
+        assert!(
+            !out.contains("> Status:"),
+            "Status line leaked when http_status is None:\n{out}"
+        );
+    }
+
+    /// JSON output: the `status` field (renamed from internal http_status
+    /// via serde rename) appears at metadata level and carries the code.
+    #[test]
+    fn test_status_header_format_in_json() {
+        let mut r = make_result("# Body");
+        r.metadata.http_status = Some(404);
+        let json = serde_json::to_string_pretty(&r).expect("serialize");
+        assert!(
+            json.contains("\"status\": 404"),
+            "JSON output missing \"status\": 404:\n{json}"
+        );
+        // serde rename means the internal name "http_status" must NOT
+        // surface in JSON output.
+        assert!(
+            !json.contains("http_status"),
+            "internal field name leaked into JSON:\n{json}"
+        );
+    }
+
+    /// JSON output: when http_status is None the field is omitted
+    /// entirely (skip_serializing_if = "Option::is_none").
+    #[test]
+    fn test_status_field_omitted_in_json_when_unset() {
+        let r = make_result("# Body"); // http_status defaults to None
+        let json = serde_json::to_string_pretty(&r).expect("serialize");
+        assert!(
+            !json.contains("\"status\""),
+            "status field should be omitted when None:\n{json}"
+        );
+    }
+
+    /// Summary mode (M1 `--mode summary`) must NOT include the Status
+    /// line — summary returns a link list and the status would be noise.
+    #[test]
+    fn test_status_omitted_in_summary_mode() {
+        let mut r = make_result("# Body");
+        r.metadata.http_status = Some(404);
+        // to_llm_summary builds its own header via
+        // build_metadata_header_with_opts(include_status=false).
+        let out = to_llm_summary(&r, None);
+        assert!(
+            !out.contains("> Status:"),
+            "Status line leaked into summary mode output:\n{out}"
+        );
+        // URL line should still be present though.
+        assert!(
+            out.contains("> URL:") || r.metadata.url.is_some_and(|u| out.contains(&u)) || true,
+            // URL is conditional on metadata; we don't assert presence,
+            // only that Status is absent regardless.
+            ""
+        );
+    }
+
+    /// TOC mode (M1 `--mode toc`) must NOT include the Status line either —
+    /// the outline is structural metadata, status would clutter it.
+    #[test]
+    fn test_status_omitted_in_toc_mode() {
+        let mut r = make_result("# H1\n\n## H2\n\nFirst paragraph after H2.");
+        r.metadata.http_status = Some(404);
+        let out = to_llm_toc(&r, None);
+        assert!(
+            !out.contains("> Status:"),
+            "Status line leaked into toc mode output:\n{out}"
+        );
+    }
 }
diff --git a/crates/webclaw-core/src/llm/output_size.rs b/crates/webclaw-core/src/llm/output_size.rs
index 4f3b94f..f9aa7ed 100644
--- a/crates/webclaw-core/src/llm/output_size.rs
+++ b/crates/webclaw-core/src/llm/output_size.rs
@@ -14,7 +14,7 @@ use crate::types::ExtractionResult;
 
 use super::body;
 use super::links;
-use super::metadata::build_metadata_header;
+use super::metadata::build_metadata_header_with_opts;
 
 // ---------------------------------------------------------------------------
 // Summary mode — link/title list, no body
@@ -26,7 +26,9 @@ use super::metadata::build_metadata_header;
 pub fn to_llm_summary(result: &ExtractionResult, url: Option<&str>) -> String {
     let links = collect_summary_links(result);
     let mut out = String::new();
-    build_metadata_header(&mut out, result, url);
+    // M7: suppress the `> Status:` line in summary mode — the link list
+    // is conceptually navigation, not protocol-level outcome.
+    build_metadata_header_with_opts(&mut out, result, url, false);
     if !out.is_empty() {
         out.push('\n');
     }
@@ -88,7 +90,9 @@ pub fn to_llm_toc(result: &ExtractionResult, url: Option<&str>) -> String {
     let entries = collect_toc_entries(result);
 
     let mut out = String::new();
-    build_metadata_header(&mut out, result, url);
+    // M7: suppress the `> Status:` line in toc mode — the outline is
+    // structural, not protocol-level outcome.
+    build_metadata_header_with_opts(&mut out, result, url, false);
     if !out.is_empty() {
         out.push('\n');
     }
@@ -355,6 +359,7 @@ mod tests {
                 image: None,
                 favicon: None,
                 word_count: 0,
+                http_status: None,
             },
             content: Content {
                 markdown: markdown.to_string(),
diff --git a/crates/webclaw-core/src/metadata.rs b/crates/webclaw-core/src/metadata.rs
index c7f142b..ccac3c7 100644
--- a/crates/webclaw-core/src/metadata.rs
+++ b/crates/webclaw-core/src/metadata.rs
@@ -52,6 +52,7 @@ pub fn extract(doc: &Html, url: Option<&str>) -> Metadata {
         image,
         favicon,
         word_count: 0, // filled later by the extractor
+        http_status: None, // filled by webclaw-fetch when reachable; None for local-file / --stdin
     }
 }
 
diff --git a/crates/webclaw-core/src/types.rs b/crates/webclaw-core/src/types.rs
index ebe7a92..927f302 100644
--- a/crates/webclaw-core/src/types.rs
+++ b/crates/webclaw-core/src/types.rs
@@ -15,7 +15,7 @@ pub struct ExtractionResult {
     pub structured_data: Vec<serde_json::Value>,
 }
 
-#[derive(Debug, Clone, Serialize, Deserialize)]
+#[derive(Debug, Clone, Default, Serialize, Deserialize)]
 pub struct Metadata {
     pub title: Option<String>,
     pub description: Option<String>,
@@ -27,6 +27,20 @@ pub struct Metadata {
     pub image: Option<String>,
     pub favicon: Option<String>,
     pub word_count: usize,
+    /// HTTP status code from the final response (after redirects). `None`
+    /// when extraction was not preceded by an HTTP fetch — e.g. `--file`,
+    /// `--stdin`, or any call into `extract_with_options` directly.
+    /// Serialized in JSON output as `"status"` (renamed for caller
+    /// ergonomics; M7 / issue #19). Surfaced in `-f llm` / `-f text` as a
+    /// `> Status: <code>` line right after `> URL:` so callers can
+    /// distinguish a real 404 from a thin-body 200 without parsing the
+    /// body.
+    #[serde(
+        rename = "status",
+        default,
+        skip_serializing_if = "Option::is_none"
+    )]
+    pub http_status: Option<u16>,
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
diff --git a/crates/webclaw-fetch/src/client.rs b/crates/webclaw-fetch/src/client.rs
index 20ef276..e7d35d1 100644
--- a/crates/webclaw-fetch/src/client.rs
+++ b/crates/webclaw-fetch/src/client.rs
@@ -542,9 +542,13 @@ impl FetchClient {
             let resp = client.get(json_url.as_str()).send().await?;
             let response = Response::from_wreq(resp).await?;
             if response.is_success() {
+                let reddit_status = response.status();
                 let bytes = response.body();
                 match crate::reddit::parse_reddit_json(bytes, url) {
-                    Ok(result) => return Ok(result),
+                    Ok(mut result) => {
+                        result.metadata.http_status = Some(reddit_status);
+                        return Ok(result);
+                    }
                     Err(e) => warn!("reddit json fallback failed: {e}, falling back to HTML"),
                 }
             }
@@ -588,7 +592,9 @@ impl FetchClient {
             );
 
             let pdf_result = webclaw_pdf::extract_pdf(bytes, self.pdf_mode.clone())?;
-            Ok(pdf_to_extraction_result(&pdf_result, &final_url))
+            let mut result = pdf_to_extraction_result(&pdf_result, &final_url);
+            result.metadata.http_status = Some(status);
+            Ok(result)
         } else if let Some(doc_type) =
             crate::document::is_document_content_type(&headers, &final_url)
         {
@@ -606,6 +612,7 @@ impl FetchClient {
 
             let mut result = crate::document::extract_document(bytes, doc_type)?;
             result.metadata.url = Some(final_url);
+            result.metadata.http_status = Some(status);
             Ok(result)
         } else {
             let html = response.into_text();
@@ -617,12 +624,15 @@ impl FetchClient {
             if crate::linkedin::is_linkedin_post(&final_url) {
                 if let Some(result) = crate::linkedin::extract_linkedin_post(&html, &final_url) {
                     debug!("linkedin extraction succeeded");
+                    let mut result = result;
+                    result.metadata.http_status = Some(status);
                     return Ok(result);
                 }
                 debug!("linkedin extraction failed, falling back to standard");
             }
 
-            let extraction = webclaw_core::extract_with_options(&html, Some(&final_url), options)?;
+            let mut extraction = webclaw_core::extract_with_options(&html, Some(&final_url), options)?;
+            extraction.metadata.http_status = Some(status);
 
             Ok(extraction)
         }
@@ -889,6 +899,7 @@ fn pdf_to_extraction_result(
             image: None,
             favicon: None,
             word_count,
+            http_status: None,
         },
         content: webclaw_core::Content {
             markdown,
diff --git a/crates/webclaw-fetch/src/document.rs b/crates/webclaw-fetch/src/document.rs
index 3d7d89d..0d9b232 100644
--- a/crates/webclaw-fetch/src/document.rs
+++ b/crates/webclaw-fetch/src/document.rs
@@ -110,6 +110,7 @@ pub fn extract_document(
             image: None,
             favicon: None,
             word_count,
+            http_status: None,
         },
         content: webclaw_core::Content {
             markdown,
diff --git a/crates/webclaw-fetch/src/linkedin.rs b/crates/webclaw-fetch/src/linkedin.rs
index 0cabd1c..d8322b3 100644
--- a/crates/webclaw-fetch/src/linkedin.rs
+++ b/crates/webclaw-fetch/src/linkedin.rs
@@ -216,6 +216,7 @@ pub fn extract_linkedin_post(html: &str, url: &str) -> Option<ExtractionResult>
             image: None,
             favicon: None,
             word_count,
+            http_status: None,
         },
         content: Content {
             markdown,
diff --git a/crates/webclaw-fetch/src/reddit.rs b/crates/webclaw-fetch/src/reddit.rs
index 7676ccd..5e934de 100644
--- a/crates/webclaw-fetch/src/reddit.rs
+++ b/crates/webclaw-fetch/src/reddit.rs
@@ -92,6 +92,7 @@ pub fn parse_reddit_json(json_bytes: &[u8], url: &str) -> Result<ExtractionResul
             image: None,
             favicon: None,
             word_count,
+            http_status: None,
         },
         content: Content {
             markdown,
diff --git a/crates/webclaw-mcp/src/server.rs b/crates/webclaw-mcp/src/server.rs
index 497315f..47f58c6 100644
--- a/crates/webclaw-mcp/src/server.rs
+++ b/crates/webclaw-mcp/src/server.rs
@@ -518,6 +518,7 @@ impl WebclawMcp {
                         image: None,
                         favicon: None,
                         word_count: markdown.split_whitespace().count(),
+                        http_status: None,
                     },
                     domain_data: None,
                     structured_data: Vec::new(),
diff --git a/crates/webclaw-server/src/routes/diff.rs b/crates/webclaw-server/src/routes/diff.rs
index b0706fb..02cf526 100644
--- a/crates/webclaw-server/src/routes/diff.rs
+++ b/crates/webclaw-server/src/routes/diff.rs
@@ -65,6 +65,7 @@ fn empty_metadata() -> Metadata {
         image: None,
         favicon: None,
         word_count: 0,
+        http_status: None,
     }
 }