Object batching (#499)

* Object batching

* Update tests
This commit is contained in:
cybermaggedon 2025-09-05 15:59:06 +01:00 committed by GitHub
parent ebca467ed8
commit 0b7620bc04
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 946 additions and 107 deletions

View file

@ -128,18 +128,77 @@ class TestStructuredDataSchemaContracts:
obj = ExtractedObject(
metadata=metadata,
schema_name="customer_records",
values={"id": "123", "name": "John Doe", "email": "john@example.com"},
values=[{"id": "123", "name": "John Doe", "email": "john@example.com"}],
confidence=0.95,
source_span="John Doe (john@example.com) customer ID 123"
)
# Assert
assert obj.schema_name == "customer_records"
assert obj.values["name"] == "John Doe"
assert obj.values[0]["name"] == "John Doe"
assert obj.confidence == 0.95
assert len(obj.source_span) > 0
assert obj.metadata.id == "extracted-obj-001"
def test_extracted_object_batch_contract(self):
"""Test ExtractedObject schema contract for batched values"""
# Arrange
metadata = Metadata(
id="extracted-batch-001",
user="test_user",
collection="test_collection",
metadata=[]
)
# Act - create object with multiple values
obj = ExtractedObject(
metadata=metadata,
schema_name="customer_records",
values=[
{"id": "123", "name": "John Doe", "email": "john@example.com"},
{"id": "124", "name": "Jane Smith", "email": "jane@example.com"},
{"id": "125", "name": "Bob Johnson", "email": "bob@example.com"}
],
confidence=0.85,
source_span="Multiple customers found in document"
)
# Assert
assert obj.schema_name == "customer_records"
assert len(obj.values) == 3
assert obj.values[0]["name"] == "John Doe"
assert obj.values[1]["name"] == "Jane Smith"
assert obj.values[2]["name"] == "Bob Johnson"
assert obj.values[0]["id"] == "123"
assert obj.values[1]["id"] == "124"
assert obj.values[2]["id"] == "125"
assert obj.confidence == 0.85
assert "Multiple customers" in obj.source_span
def test_extracted_object_empty_batch_contract(self):
"""Test ExtractedObject schema contract for empty values array"""
# Arrange
metadata = Metadata(
id="extracted-empty-001",
user="test_user",
collection="test_collection",
metadata=[]
)
# Act - create object with empty values array
obj = ExtractedObject(
metadata=metadata,
schema_name="empty_schema",
values=[],
confidence=1.0,
source_span="No objects found"
)
# Assert
assert obj.schema_name == "empty_schema"
assert len(obj.values) == 0
assert obj.confidence == 1.0
@pytest.mark.contract
class TestStructuredQueryServiceContracts:
@ -273,7 +332,7 @@ class TestStructuredDataSerializationContracts:
object_data = {
"metadata": metadata,
"schema_name": "test_schema",
"values": {"field1": "value1"},
"values": [{"field1": "value1"}],
"confidence": 0.8,
"source_span": "test span"
}
@ -314,4 +373,38 @@ class TestStructuredDataSerializationContracts:
"data": '{"customers": [{"id": "1", "name": "John"}]}',
"errors": []
}
assert serialize_deserialize_test(StructuredQueryResponse, response_data)
assert serialize_deserialize_test(StructuredQueryResponse, response_data)
def test_extracted_object_batch_serialization(self):
"""Test ExtractedObject batch serialization contract"""
# Arrange
metadata = Metadata(id="test", user="user", collection="col", metadata=[])
batch_object_data = {
"metadata": metadata,
"schema_name": "test_schema",
"values": [
{"field1": "value1", "field2": "value2"},
{"field1": "value3", "field2": "value4"},
{"field1": "value5", "field2": "value6"}
],
"confidence": 0.9,
"source_span": "batch test span"
}
# Act & Assert
assert serialize_deserialize_test(ExtractedObject, batch_object_data)
def test_extracted_object_empty_batch_serialization(self):
"""Test ExtractedObject empty batch serialization contract"""
# Arrange
metadata = Metadata(id="test", user="user", collection="col", metadata=[])
empty_batch_data = {
"metadata": metadata,
"schema_name": "test_schema",
"values": [],
"confidence": 1.0,
"source_span": "empty batch"
}
# Act & Assert
assert serialize_deserialize_test(ExtractedObject, empty_batch_data)