From 4a66d6e071ce95eabe49cd496356fba0617901be Mon Sep 17 00:00:00 2001 From: Aaron Goh Date: Sun, 7 Jun 2026 20:37:37 +0200 Subject: [PATCH] fix(loader): accept multi-line (pretty-printed) JSON in load (#146) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The loader read input line-by-line (reader.lines() + serde_json::from_str per line), so any delta where a JSON object spanned multiple lines failed with 'invalid JSON on line 1: EOF while parsing an object'. Compact JSONL worked; pretty-printed JSON never did. Switch to a streaming value deserializer (Deserializer::from_reader().into_iter::()), which treats any whitespace (including newlines inside objects) as a separator — so both compact JSONL and pretty-printed JSON load. Error labels switch from line numbers to record numbers (line numbers are meaningless once objects span lines). Co-authored-by: Claude Opus 4.8 (1M context) Co-authored-by: Ragnor Comerford --- crates/omnigraph/src/loader/mod.rs | 35 ++++++++++++++++-------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/crates/omnigraph/src/loader/mod.rs b/crates/omnigraph/src/loader/mod.rs index 46a46e2..d5d74c0 100644 --- a/crates/omnigraph/src/loader/mod.rs +++ b/crates/omnigraph/src/loader/mod.rs @@ -288,21 +288,24 @@ async fn load_jsonl_reader( let mut node_rows: HashMap> = HashMap::new(); let mut edge_rows: HashMap> = HashMap::new(); - for (line_num, line) in reader.lines().enumerate() { - let line = line?; - let line = line.trim(); - if line.is_empty() { - continue; - } - let value: JsonValue = serde_json::from_str(line).map_err(|e| { - OmniError::manifest(format!("invalid JSON on line {}: {}", line_num + 1, e)) + // Parse a stream of JSON values. Accepts both compact JSONL (one object + // per line) and pretty-printed JSON where a single object spans multiple + // lines — serde's streaming deserializer treats any whitespace (including + // newlines) between top-level values as a separator. + for (idx, parsed) in serde_json::Deserializer::from_reader(reader) + .into_iter::() + .enumerate() + { + let record_num = idx + 1; + let value: JsonValue = parsed.map_err(|e| { + OmniError::manifest(format!("invalid JSON at record {}: {}", record_num, e)) })?; if let Some(type_name) = value.get("type").and_then(|v| v.as_str()) { if !catalog.node_types.contains_key(type_name) { return Err(OmniError::manifest(format!( - "line {}: unknown node type '{}'", - line_num + 1, + "record {}: unknown node type '{}'", + record_num, type_name ))); } @@ -317,8 +320,8 @@ async fn load_jsonl_reader( } else if let Some(edge_name) = value.get("edge").and_then(|v| v.as_str()) { if catalog.lookup_edge_by_name(edge_name).is_none() { return Err(OmniError::manifest(format!( - "line {}: unknown edge type '{}'", - line_num + 1, + "record {}: unknown edge type '{}'", + record_num, edge_name ))); } @@ -326,14 +329,14 @@ async fn load_jsonl_reader( .get("from") .and_then(|v| v.as_str()) .ok_or_else(|| { - OmniError::manifest(format!("line {}: edge missing 'from'", line_num + 1)) + OmniError::manifest(format!("record {}: edge missing 'from'", record_num)) })? .to_string(); let to = value .get("to") .and_then(|v| v.as_str()) .ok_or_else(|| { - OmniError::manifest(format!("line {}: edge missing 'to'", line_num + 1)) + OmniError::manifest(format!("record {}: edge missing 'to'", record_num)) })? .to_string(); let data = value @@ -347,8 +350,8 @@ async fn load_jsonl_reader( .push((from, to, data)); } else { return Err(OmniError::manifest(format!( - "line {}: expected 'type' or 'edge' field", - line_num + 1 + "record {}: expected 'type' or 'edge' field", + record_num ))); } }