From 01660faa2673a84d5af3696c721ccc3897651c7d Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sun, 10 May 2026 09:28:44 +0000 Subject: [PATCH] Tighten blob descriptor validation Co-Authored-By: Ragnor Comerford --- crates/omnigraph/src/table_store.rs | 63 ++++++++++++++++++----------- 1 file changed, 40 insertions(+), 23 deletions(-) diff --git a/crates/omnigraph/src/table_store.rs b/crates/omnigraph/src/table_store.rs index 26be480..460fa34 100644 --- a/crates/omnigraph/src/table_store.rs +++ b/crates/omnigraph/src/table_store.rs @@ -1,5 +1,5 @@ use arrow_array::{ - Array, ArrayRef, RecordBatch, StringArray, StructArray, UInt32Array, UInt64Array, + Array, ArrayRef, RecordBatch, StringArray, StructArray, UInt8Array, UInt32Array, UInt64Array, }; use arrow_schema::SchemaRef; use arrow_select::concat::concat_batches; @@ -411,39 +411,56 @@ impl TableStore { return Ok(true); } - let kind = descriptions - .column_by_name("kind") - .and_then(|col| col.as_any().downcast_ref::()) - .and_then(|arr| (!arr.is_null(row)).then(|| arr.value(row) as u8)) - .or_else(|| { - descriptions - .column_by_name("kind") - .and_then(|col| col.as_any().downcast_ref::()) - .and_then(|arr| (!arr.is_null(row)).then(|| arr.value(row))) - }); let position = descriptions .column_by_name("position") .and_then(|col| col.as_any().downcast_ref::()) - .and_then(|arr| (!arr.is_null(row)).then(|| arr.value(row))); + .ok_or_else(|| { + OmniError::Lance(format!( + "unrecognized blob description schema {:?}: missing UInt64 position field", + descriptions.fields() + )) + })?; let size = descriptions .column_by_name("size") .and_then(|col| col.as_any().downcast_ref::()) - .and_then(|arr| (!arr.is_null(row)).then(|| arr.value(row))); + .ok_or_else(|| { + OmniError::Lance(format!( + "unrecognized blob description schema {:?}: missing UInt64 size field", + descriptions.fields() + )) + })?; + + let Some(kind_column) = descriptions.column_by_name("kind") else { + return Ok(position.is_null(row) || size.is_null(row)); + }; + let kind = if let Some(kind) = kind_column.as_any().downcast_ref::() { + if kind.is_null(row) { + return Ok(true); + } + kind.value(row) + } else if let Some(kind) = kind_column.as_any().downcast_ref::() { + if kind.is_null(row) { + return Ok(true); + } + kind.value(row) as u8 + } else { + return Err(OmniError::Lance(format!( + "unrecognized blob description schema {:?}: kind field must be UInt8 or UInt32", + descriptions.fields() + ))); + }; + + let kind = BlobKind::try_from(kind).map_err(|e| OmniError::Lance(e.to_string()))?; + if kind != BlobKind::Inline { + return Ok(false); + } let blob_uri = descriptions .column_by_name("blob_uri") .and_then(|col| col.as_any().downcast_ref::()) .and_then(|arr| (!arr.is_null(row)).then(|| arr.value(row))); - let Some(kind) = kind else { - return Ok(true); - }; - let kind = BlobKind::try_from(kind).map_err(|e| OmniError::Lance(e.to_string()))?; - if kind != BlobKind::Inline { - return Ok(false); - } - - Ok(position.unwrap_or(0) == 0 - && size.unwrap_or(0) == 0 + Ok((position.is_null(row) || position.value(row) == 0) + && (size.is_null(row) || size.value(row) == 0) && blob_uri.unwrap_or("").is_empty()) }