Merge 2.0 to master (#651)

2026-04-25 08:26:21 +02:00 · 2026-02-28 11:03:14 +00:00 · 2026-02-28 11:03:14 +00:00 · b9d7bf9a8b
commit b9d7bf9a8b
parent 3666ece2c5
212 changed files with 13940 additions and 6180 deletions
--- a/tests/unit/test_knowledge_graph/conftest.py
+++ b/tests/unit/test_knowledge_graph/conftest.py
@ -6,11 +6,21 @@ import pytest
 from unittest.mock import Mock, AsyncMock

 # Mock schema classes for testing
-class Value:
-    def __init__(self, value, is_uri, type):
-        self.value = value
-        self.is_uri = is_uri
+# Term type constants
+IRI = "i"
+LITERAL = "l"
+BLANK = "b"
+TRIPLE = "t"
+
+class Term:
+    def __init__(self, type, iri=None, value=None, id=None, datatype=None, language=None, triple=None):
        self.type = type
+        self.iri = iri
+        self.value = value
+        self.id = id
+        self.datatype = datatype
+        self.language = language
+        self.triple = triple

 class Triple:
    def __init__(self, s, p, o):
@ -66,32 +76,30 @@ def sample_relationships():


@pytest.fixture
-def sample_value_uri():
-    """Sample URI Value object"""
-    return Value(
-        value="http://example.com/person/john-smith",
-        is_uri=True,
-        type=""
+def sample_term_uri():
+    """Sample URI Term object"""
+    return Term(
+        type=IRI,
+        iri="http://example.com/person/john-smith"
    )


@pytest.fixture
-def sample_value_literal():
-    """Sample literal Value object"""
-    return Value(
-        value="John Smith",
-        is_uri=False,
-        type="string"
+def sample_term_literal():
+    """Sample literal Term object"""
+    return Term(
+        type=LITERAL,
+        value="John Smith"
    )


@pytest.fixture
-def sample_triple(sample_value_uri, sample_value_literal):
+def sample_triple(sample_term_uri, sample_term_literal):
    """Sample Triple object"""
    return Triple(
-        s=sample_value_uri,
-        p=Value(value="http://schema.org/name", is_uri=True, type=""),
-        o=sample_value_literal
+        s=sample_term_uri,
+        p=Term(type=IRI, iri="http://schema.org/name"),
+        o=sample_term_literal
    )


--- a/tests/unit/test_knowledge_graph/test_agent_extraction.py
+++ b/tests/unit/test_knowledge_graph/test_agent_extraction.py
@ -11,7 +11,7 @@ import json
 from unittest.mock import AsyncMock, MagicMock, patch

 from trustgraph.extract.kg.agent.extract import Processor as AgentKgExtractor
-from trustgraph.schema import Chunk, Triple, Triples, Metadata, Value, Error
+from trustgraph.schema import Chunk, Triple, Triples, Metadata, Term, Error, IRI, LITERAL
 from trustgraph.schema import EntityContext, EntityContexts
 from trustgraph.rdf import TRUSTGRAPH_ENTITIES, DEFINITION, RDF_LABEL, SUBJECT_OF
 from trustgraph.template.prompt_manager import PromptManager
@ -33,7 +33,7 @@ class TestAgentKgExtractor:
        
        # Set up the methods we want to test
        extractor.to_uri = real_extractor.to_uri
-        extractor.parse_json = real_extractor.parse_json
+        extractor.parse_jsonl = real_extractor.parse_jsonl
        extractor.process_extraction_data = real_extractor.process_extraction_data
        extractor.emit_triples = real_extractor.emit_triples
        extractor.emit_entity_contexts = real_extractor.emit_entity_contexts
@ -53,48 +53,49 @@ class TestAgentKgExtractor:
            id="doc123",
            metadata=[
                Triple(
-                    s=Value(value="doc123", is_uri=True),
-                    p=Value(value="http://example.org/type", is_uri=True),
-                    o=Value(value="document", is_uri=False)
+                    s=Term(type=IRI, iri="doc123"),
+                    p=Term(type=IRI, iri="http://example.org/type"),
+                    o=Term(type=LITERAL, value="document")
                )
            ]
        )

    @pytest.fixture
    def sample_extraction_data(self):
-        """Sample extraction data in expected format"""
-        return {
-            "definitions": [
-                {
-                    "entity": "Machine Learning",
-                    "definition": "A subset of artificial intelligence that enables computers to learn from data without explicit programming."
-                },
-                {
-                    "entity": "Neural Networks",
-                    "definition": "Computing systems inspired by biological neural networks that process information."
-                }
-            ],
-            "relationships": [
-                {
-                    "subject": "Machine Learning",
-                    "predicate": "is_subset_of",
-                    "object": "Artificial Intelligence",
-                    "object-entity": True
-                },
-                {
-                    "subject": "Neural Networks",
-                    "predicate": "used_in",
-                    "object": "Machine Learning",
-                    "object-entity": True
-                },
-                {
-                    "subject": "Deep Learning",
-                    "predicate": "accuracy",
-                    "object": "95%",
-                    "object-entity": False
-                }
-            ]
-        }
+        """Sample extraction data in JSONL format (list with type discriminators)"""
+        return [
+            {
+                "type": "definition",
+                "entity": "Machine Learning",
+                "definition": "A subset of artificial intelligence that enables computers to learn from data without explicit programming."
+            },
+            {
+                "type": "definition",
+                "entity": "Neural Networks",
+                "definition": "Computing systems inspired by biological neural networks that process information."
+            },
+            {
+                "type": "relationship",
+                "subject": "Machine Learning",
+                "predicate": "is_subset_of",
+                "object": "Artificial Intelligence",
+                "object-entity": True
+            },
+            {
+                "type": "relationship",
+                "subject": "Neural Networks",
+                "predicate": "used_in",
+                "object": "Machine Learning",
+                "object-entity": True
+            },
+            {
+                "type": "relationship",
+                "subject": "Deep Learning",
+                "predicate": "accuracy",
+                "object": "95%",
+                "object-entity": False
+            }
+        ]

    def test_to_uri_conversion(self, agent_extractor):
        """Test URI conversion for entities"""
@ -113,148 +114,147 @@ class TestAgentKgExtractor:
        expected = f"{TRUSTGRAPH_ENTITIES}"
        assert uri == expected

-    def test_parse_json_with_code_blocks(self, agent_extractor):
-        """Test JSON parsing from code blocks"""
-        # Test JSON in code blocks
+    def test_parse_jsonl_with_code_blocks(self, agent_extractor):
+        """Test JSONL parsing from code blocks"""
+        # Test JSONL in code blocks - note: JSON uses lowercase true/false
        response = '''```json
-        {
-            "definitions": [{"entity": "AI", "definition": "Artificial Intelligence"}],
-            "relationships": []
-        }
-        ```'''
-        
-        result = agent_extractor.parse_json(response)
-        
-        assert result["definitions"][0]["entity"] == "AI"
-        assert result["definitions"][0]["definition"] == "Artificial Intelligence"
-        assert result["relationships"] == []
+{"type": "definition", "entity": "AI", "definition": "Artificial Intelligence"}
+{"type": "relationship", "subject": "AI", "predicate": "is", "object": "technology", "object-entity": false}
+```'''

-    def test_parse_json_without_code_blocks(self, agent_extractor):
-        """Test JSON parsing without code blocks"""
-        response = '''{"definitions": [{"entity": "ML", "definition": "Machine Learning"}], "relationships": []}'''
-        
-        result = agent_extractor.parse_json(response)
-        
-        assert result["definitions"][0]["entity"] == "ML"
-        assert result["definitions"][0]["definition"] == "Machine Learning"
+        result = agent_extractor.parse_jsonl(response)

-    def test_parse_json_invalid_format(self, agent_extractor):
-        """Test JSON parsing with invalid format"""
-        invalid_response = "This is not JSON at all"
-        
-        with pytest.raises(json.JSONDecodeError):
-            agent_extractor.parse_json(invalid_response)
+        assert len(result) == 2
+        assert result[0]["entity"] == "AI"
+        assert result[0]["definition"] == "Artificial Intelligence"
+        assert result[1]["type"] == "relationship"

-    def test_parse_json_malformed_code_blocks(self, agent_extractor):
-        """Test JSON parsing with malformed code blocks"""
-        # Missing closing backticks
-        response = '''```json
-        {"definitions": [], "relationships": []}
-        '''
-        
-        # Should still parse the JSON content
-        with pytest.raises(json.JSONDecodeError):
-            agent_extractor.parse_json(response)
+    def test_parse_jsonl_without_code_blocks(self, agent_extractor):
+        """Test JSONL parsing without code blocks"""
+        response = '''{"type": "definition", "entity": "ML", "definition": "Machine Learning"}
+{"type": "definition", "entity": "AI", "definition": "Artificial Intelligence"}'''
+
+        result = agent_extractor.parse_jsonl(response)
+
+        assert len(result) == 2
+        assert result[0]["entity"] == "ML"
+        assert result[1]["entity"] == "AI"
+
+    def test_parse_jsonl_invalid_lines_skipped(self, agent_extractor):
+        """Test JSONL parsing skips invalid lines gracefully"""
+        response = '''{"type": "definition", "entity": "Valid", "definition": "Valid def"}
+This is not JSON at all
+{"type": "definition", "entity": "Also Valid", "definition": "Another def"}'''
+
+        result = agent_extractor.parse_jsonl(response)
+
+        # Should get 2 valid objects, skipping the invalid line
+        assert len(result) == 2
+        assert result[0]["entity"] == "Valid"
+        assert result[1]["entity"] == "Also Valid"
+
+    def test_parse_jsonl_truncation_resilience(self, agent_extractor):
+        """Test JSONL parsing handles truncated responses"""
+        # Simulates output cut off mid-line
+        response = '''{"type": "definition", "entity": "Complete", "definition": "Full def"}
+{"type": "definition", "entity": "Trunca'''
+
+        result = agent_extractor.parse_jsonl(response)
+
+        # Should get 1 valid object, the truncated line is skipped
+        assert len(result) == 1
+        assert result[0]["entity"] == "Complete"

    def test_process_extraction_data_definitions(self, agent_extractor, sample_metadata):
        """Test processing of definition data"""
-        data = {
-            "definitions": [
-                {
-                    "entity": "Machine Learning",
-                    "definition": "A subset of AI that enables learning from data."
-                }
-            ],
-            "relationships": []
-        }
-        
+        data = [
+            {
+                "type": "definition",
+                "entity": "Machine Learning",
+                "definition": "A subset of AI that enables learning from data."
+            }
+        ]
+
        triples, entity_contexts = agent_extractor.process_extraction_data(data, sample_metadata)
        
        # Check entity label triple
-        label_triple = next((t for t in triples if t.p.value == RDF_LABEL and t.o.value == "Machine Learning"), None)
+        label_triple = next((t for t in triples if t.p.iri == RDF_LABEL and t.o.value == "Machine Learning"), None)
        assert label_triple is not None
-        assert label_triple.s.value == f"{TRUSTGRAPH_ENTITIES}Machine%20Learning"
-        assert label_triple.s.is_uri == True
-        assert label_triple.o.is_uri == False
-        
+        assert label_triple.s.iri == f"{TRUSTGRAPH_ENTITIES}Machine%20Learning"
+        assert label_triple.s.type == IRI
+        assert label_triple.o.type == LITERAL
+
        # Check definition triple
-        def_triple = next((t for t in triples if t.p.value == DEFINITION), None)
+        def_triple = next((t for t in triples if t.p.iri == DEFINITION), None)
        assert def_triple is not None
-        assert def_triple.s.value == f"{TRUSTGRAPH_ENTITIES}Machine%20Learning"
+        assert def_triple.s.iri == f"{TRUSTGRAPH_ENTITIES}Machine%20Learning"
        assert def_triple.o.value == "A subset of AI that enables learning from data."
-        
+
        # Check subject-of triple
-        subject_of_triple = next((t for t in triples if t.p.value == SUBJECT_OF), None)
+        subject_of_triple = next((t for t in triples if t.p.iri == SUBJECT_OF), None)
        assert subject_of_triple is not None
-        assert subject_of_triple.s.value == f"{TRUSTGRAPH_ENTITIES}Machine%20Learning"
-        assert subject_of_triple.o.value == "doc123"
-        
+        assert subject_of_triple.s.iri == f"{TRUSTGRAPH_ENTITIES}Machine%20Learning"
+        assert subject_of_triple.o.iri == "doc123"
+
        # Check entity context
        assert len(entity_contexts) == 1
-        assert entity_contexts[0].entity.value == f"{TRUSTGRAPH_ENTITIES}Machine%20Learning"
+        assert entity_contexts[0].entity.iri == f"{TRUSTGRAPH_ENTITIES}Machine%20Learning"
        assert entity_contexts[0].context == "A subset of AI that enables learning from data."

    def test_process_extraction_data_relationships(self, agent_extractor, sample_metadata):
        """Test processing of relationship data"""
-        data = {
-            "definitions": [],
-            "relationships": [
-                {
-                    "subject": "Machine Learning",
-                    "predicate": "is_subset_of",
-                    "object": "Artificial Intelligence",
-                    "object-entity": True
-                }
-            ]
-        }
-        
+        data = [
+            {
+                "type": "relationship",
+                "subject": "Machine Learning",
+                "predicate": "is_subset_of",
+                "object": "Artificial Intelligence",
+                "object-entity": True
+            }
+        ]
+
        triples, entity_contexts = agent_extractor.process_extraction_data(data, sample_metadata)
        
        # Check that subject, predicate, and object labels are created
        subject_uri = f"{TRUSTGRAPH_ENTITIES}Machine%20Learning"
        predicate_uri = f"{TRUSTGRAPH_ENTITIES}is_subset_of"
-        
+
        # Find label triples
-        subject_label = next((t for t in triples if t.s.value == subject_uri and t.p.value == RDF_LABEL), None)
+        subject_label = next((t for t in triples if t.s.iri == subject_uri and t.p.iri == RDF_LABEL), None)
        assert subject_label is not None
        assert subject_label.o.value == "Machine Learning"
-        
-        predicate_label = next((t for t in triples if t.s.value == predicate_uri and t.p.value == RDF_LABEL), None)
+
+        predicate_label = next((t for t in triples if t.s.iri == predicate_uri and t.p.iri == RDF_LABEL), None)
        assert predicate_label is not None
        assert predicate_label.o.value == "is_subset_of"
-        
-        # Check main relationship triple 
-        # NOTE: Current implementation has bugs:
-        # 1. Uses data.get("object-entity") instead of rel.get("object-entity")
-        # 2. Sets object_value to predicate_uri instead of actual object URI
-        # This test documents the current buggy behavior
-        rel_triple = next((t for t in triples if t.s.value == subject_uri and t.p.value == predicate_uri), None)
+
+        # Check main relationship triple
+        object_uri = f"{TRUSTGRAPH_ENTITIES}Artificial%20Intelligence"
+        rel_triple = next((t for t in triples if t.s.iri == subject_uri and t.p.iri == predicate_uri), None)
        assert rel_triple is not None
-        # Due to bug, object value is set to predicate_uri
-        assert rel_triple.o.value == predicate_uri
-        
+        assert rel_triple.o.iri == object_uri
+        assert rel_triple.o.type == IRI
+
        # Check subject-of relationships
-        subject_of_triples = [t for t in triples if t.p.value == SUBJECT_OF and t.o.value == "doc123"]
+        subject_of_triples = [t for t in triples if t.p.iri == SUBJECT_OF and t.o.iri == "doc123"]
        assert len(subject_of_triples) >= 2  # At least subject and predicate should have subject-of relations

    def test_process_extraction_data_literal_object(self, agent_extractor, sample_metadata):
        """Test processing of relationships with literal objects"""
-        data = {
-            "definitions": [],
-            "relationships": [
-                {
-                    "subject": "Deep Learning",
-                    "predicate": "accuracy",
-                    "object": "95%",
-                    "object-entity": False
-                }
-            ]
-        }
-        
+        data = [
+            {
+                "type": "relationship",
+                "subject": "Deep Learning",
+                "predicate": "accuracy",
+                "object": "95%",
+                "object-entity": False
+            }
+        ]
+
        triples, entity_contexts = agent_extractor.process_extraction_data(data, sample_metadata)
-        
+
        # Check that object labels are not created for literal objects
-        object_labels = [t for t in triples if t.p.value == RDF_LABEL and t.o.value == "95%"]
+        object_labels = [t for t in triples if t.p.iri == RDF_LABEL and t.o.value == "95%"]
        # Based on the code logic, it should not create object labels for non-entity objects
        # But there might be a bug in the original implementation

@ -263,75 +263,62 @@ class TestAgentKgExtractor:
        triples, entity_contexts = agent_extractor.process_extraction_data(sample_extraction_data, sample_metadata)
        
        # Check that we have both definition and relationship triples
-        definition_triples = [t for t in triples if t.p.value == DEFINITION]
+        definition_triples = [t for t in triples if t.p.iri == DEFINITION]
        assert len(definition_triples) == 2  # Two definitions
-        
+
        # Check entity contexts are created for definitions
        assert len(entity_contexts) == 2
-        entity_uris = [ec.entity.value for ec in entity_contexts]
+        entity_uris = [ec.entity.iri for ec in entity_contexts]
        assert f"{TRUSTGRAPH_ENTITIES}Machine%20Learning" in entity_uris
        assert f"{TRUSTGRAPH_ENTITIES}Neural%20Networks" in entity_uris

    def test_process_extraction_data_no_metadata_id(self, agent_extractor):
        """Test processing when metadata has no ID"""
        metadata = Metadata(id=None, metadata=[])
-        data = {
-            "definitions": [
-                {"entity": "Test Entity", "definition": "Test definition"}
-            ],
-            "relationships": []
-        }
-        
+        data = [
+            {"type": "definition", "entity": "Test Entity", "definition": "Test definition"}
+        ]
+
        triples, entity_contexts = agent_extractor.process_extraction_data(data, metadata)
-        
+
        # Should not create subject-of relationships when no metadata ID
-        subject_of_triples = [t for t in triples if t.p.value == SUBJECT_OF]
+        subject_of_triples = [t for t in triples if t.p.iri == SUBJECT_OF]
        assert len(subject_of_triples) == 0
-        
+
        # Should still create entity contexts
        assert len(entity_contexts) == 1

    def test_process_extraction_data_empty_data(self, agent_extractor, sample_metadata):
        """Test processing of empty extraction data"""
-        data = {"definitions": [], "relationships": []}
-        
-        triples, entity_contexts = agent_extractor.process_extraction_data(data, sample_metadata)
-        
-        # Should only have metadata triples
-        assert len(entity_contexts) == 0
-        # Triples should only contain metadata triples if any
+        data = []

-    def test_process_extraction_data_missing_keys(self, agent_extractor, sample_metadata):
-        """Test processing data with missing keys"""
-        # Test missing definitions key
-        data = {"relationships": []}
        triples, entity_contexts = agent_extractor.process_extraction_data(data, sample_metadata)
+
+        # Should have no entity contexts
        assert len(entity_contexts) == 0
-        
-        # Test missing relationships key
-        data = {"definitions": []}
+        # Triples should be empty
+        assert len(triples) == 0
+
+    def test_process_extraction_data_unknown_types_ignored(self, agent_extractor, sample_metadata):
+        """Test processing data with unknown type values"""
+        data = [
+            {"type": "definition", "entity": "Valid", "definition": "Valid def"},
+            {"type": "unknown_type", "foo": "bar"},  # Unknown type - should be ignored
+            {"type": "relationship", "subject": "A", "predicate": "rel", "object": "B", "object-entity": True}
+        ]
+
        triples, entity_contexts = agent_extractor.process_extraction_data(data, sample_metadata)
-        assert len(entity_contexts) == 0
-        
-        # Test completely missing keys
-        data = {}
-        triples, entity_contexts = agent_extractor.process_extraction_data(data, sample_metadata)
-        assert len(entity_contexts) == 0
+
+        # Should process valid items and ignore unknown types
+        assert len(entity_contexts) == 1  # Only the definition creates entity context

    def test_process_extraction_data_malformed_entries(self, agent_extractor, sample_metadata):
        """Test processing data with malformed entries"""
-        # Test definition missing required fields
-        data = {
-            "definitions": [
-                {"entity": "Test"},  # Missing definition
-                {"definition": "Test def"}  # Missing entity
-            ],
-            "relationships": [
-                {"subject": "A", "predicate": "rel"},  # Missing object
-                {"subject": "B", "object": "C"}  # Missing predicate
-            ]
-        }
-        
+        # Test items missing required fields - should raise KeyError
+        data = [
+            {"type": "definition", "entity": "Test"},  # Missing definition
+        ]
+
        # Should handle gracefully or raise appropriate errors
        with pytest.raises(KeyError):
            agent_extractor.process_extraction_data(data, sample_metadata)
@ -340,17 +327,17 @@ class TestAgentKgExtractor:
    async def test_emit_triples(self, agent_extractor, sample_metadata):
        """Test emitting triples to publisher"""
        mock_publisher = AsyncMock()
-        
+
        test_triples = [
            Triple(
-                s=Value(value="test:subject", is_uri=True),
-                p=Value(value="test:predicate", is_uri=True),
-                o=Value(value="test object", is_uri=False)
+                s=Term(type=IRI, iri="test:subject"),
+                p=Term(type=IRI, iri="test:predicate"),
+                o=Term(type=LITERAL, value="test object")
            )
        ]
-        
+
        await agent_extractor.emit_triples(mock_publisher, sample_metadata, test_triples)
-        
+
        mock_publisher.send.assert_called_once()
        sent_triples = mock_publisher.send.call_args[0][0]
        assert isinstance(sent_triples, Triples)
@ -361,22 +348,22 @@ class TestAgentKgExtractor:
        # Note: metadata.metadata is now empty array in the new implementation
        assert sent_triples.metadata.metadata == []
        assert len(sent_triples.triples) == 1
-        assert sent_triples.triples[0].s.value == "test:subject"
+        assert sent_triples.triples[0].s.iri == "test:subject"

    @pytest.mark.asyncio
    async def test_emit_entity_contexts(self, agent_extractor, sample_metadata):
        """Test emitting entity contexts to publisher"""
        mock_publisher = AsyncMock()
-        
+
        test_contexts = [
            EntityContext(
-                entity=Value(value="test:entity", is_uri=True),
+                entity=Term(type=IRI, iri="test:entity"),
                context="Test context"
            )
        ]
-        
+
        await agent_extractor.emit_entity_contexts(mock_publisher, sample_metadata, test_contexts)
-        
+
        mock_publisher.send.assert_called_once()
        sent_contexts = mock_publisher.send.call_args[0][0]
        assert isinstance(sent_contexts, EntityContexts)
@ -387,7 +374,7 @@ class TestAgentKgExtractor:
        # Note: metadata.metadata is now empty array in the new implementation
        assert sent_contexts.metadata.metadata == []
        assert len(sent_contexts.entities) == 1
-        assert sent_contexts.entities[0].entity.value == "test:entity"
+        assert sent_contexts.entities[0].entity.iri == "test:entity"

    def test_agent_extractor_initialization_params(self):
        """Test agent extractor parameter validation"""
--- a/tests/unit/test_knowledge_graph/test_agent_extraction_edge_cases.py
+++ b/tests/unit/test_knowledge_graph/test_agent_extraction_edge_cases.py
@ -11,7 +11,7 @@ import urllib.parse
 from unittest.mock import AsyncMock, MagicMock

 from trustgraph.extract.kg.agent.extract import Processor as AgentKgExtractor
-from trustgraph.schema import Chunk, Triple, Triples, Metadata, Value
+from trustgraph.schema import Chunk, Triple, Triples, Metadata, Term, IRI, LITERAL
 from trustgraph.schema import EntityContext, EntityContexts
 from trustgraph.rdf import TRUSTGRAPH_ENTITIES, DEFINITION, RDF_LABEL, SUBJECT_OF

@ -32,11 +32,11 @@ class TestAgentKgExtractionEdgeCases:
        
        # Set up the methods we want to test
        extractor.to_uri = real_extractor.to_uri
-        extractor.parse_json = real_extractor.parse_json
+        extractor.parse_jsonl = real_extractor.parse_jsonl
        extractor.process_extraction_data = real_extractor.process_extraction_data
        extractor.emit_triples = real_extractor.emit_triples
        extractor.emit_entity_contexts = real_extractor.emit_entity_contexts
-        
+
        return extractor

    def test_to_uri_special_characters(self, agent_extractor):
@ -85,146 +85,116 @@ class TestAgentKgExtractionEdgeCases:
            # Verify the URI is properly encoded
            assert unicode_text not in uri  # Original unicode should be encoded

-    def test_parse_json_whitespace_variations(self, agent_extractor):
-        """Test JSON parsing with various whitespace patterns"""
-        # Test JSON with different whitespace patterns
+    def test_parse_jsonl_whitespace_variations(self, agent_extractor):
+        """Test JSONL parsing with various whitespace patterns"""
+        # Test JSONL with different whitespace patterns
        test_cases = [
            # Extra whitespace around code blocks
-            "   ```json\n{\"test\": true}\n```   ",
-            # Tabs and mixed whitespace
-            "\t\t```json\n\t{\"test\": true}\n\t```\t",
-            # Multiple newlines
-            "\n\n\n```json\n\n{\"test\": true}\n\n```\n\n",
-            # JSON without code blocks but with whitespace
-            "   {\"test\": true}   ",
-            # Mixed line endings
-            "```json\r\n{\"test\": true}\r\n```",
+            '   ```json\n{"type": "definition", "entity": "test", "definition": "def"}\n```   ',
+            # Multiple newlines between lines
+            '{"type": "definition", "entity": "A", "definition": "def A"}\n\n\n{"type": "definition", "entity": "B", "definition": "def B"}',
+            # JSONL without code blocks but with whitespace
+            '   {"type": "definition", "entity": "test", "definition": "def"}   ',
        ]
-        
-        for response in test_cases:
-            result = agent_extractor.parse_json(response)
-            assert result == {"test": True}

-    def test_parse_json_code_block_variations(self, agent_extractor):
-        """Test JSON parsing with different code block formats"""
+        for response in test_cases:
+            result = agent_extractor.parse_jsonl(response)
+            assert len(result) >= 1
+            assert result[0].get("type") == "definition"
+
+    def test_parse_jsonl_code_block_variations(self, agent_extractor):
+        """Test JSONL parsing with different code block formats"""
        test_cases = [
            # Standard json code block
-            "```json\n{\"valid\": true}\n```",
+            '```json\n{"type": "definition", "entity": "A", "definition": "def"}\n```',
+            # jsonl code block
+            '```jsonl\n{"type": "definition", "entity": "A", "definition": "def"}\n```',
            # Code block without language
-            "```\n{\"valid\": true}\n```",
-            # Uppercase JSON
-            "```JSON\n{\"valid\": true}\n```",
-            # Mixed case
-            "```Json\n{\"valid\": true}\n```",
-            # Multiple code blocks (should take first one)
-            "```json\n{\"first\": true}\n```\n```json\n{\"second\": true}\n```",
-            # Code block with extra content
-            "Here's the result:\n```json\n{\"valid\": true}\n```\nDone!",
+            '```\n{"type": "definition", "entity": "A", "definition": "def"}\n```',
+            # Code block with extra content before/after
+            'Here\'s the result:\n```json\n{"type": "definition", "entity": "A", "definition": "def"}\n```\nDone!',
        ]
-        
+
        for i, response in enumerate(test_cases):
-            try:
-                result = agent_extractor.parse_json(response)
-                assert result.get("valid") == True or result.get("first") == True
-            except json.JSONDecodeError:
-                # Some cases may fail due to regex extraction issues
-                # This documents current behavior - the regex may not match all cases
-                print(f"Case {i} failed JSON parsing: {response[:50]}...")
-                pass
+            result = agent_extractor.parse_jsonl(response)
+            assert len(result) >= 1, f"Case {i} failed"
+            assert result[0].get("entity") == "A"

-    def test_parse_json_malformed_code_blocks(self, agent_extractor):
-        """Test JSON parsing with malformed code block formats"""
-        # These should still work by falling back to treating entire text as JSON
-        test_cases = [
-            # Unclosed code block
-            "```json\n{\"test\": true}",
-            # No opening backticks
-            "{\"test\": true}\n```",
-            # Wrong number of backticks
-            "`json\n{\"test\": true}\n`",
-            # Nested backticks (should handle gracefully)
-            "```json\n{\"code\": \"```\", \"test\": true}\n```",
-        ]
-        
-        for response in test_cases:
-            try:
-                result = agent_extractor.parse_json(response)
-                assert "test" in result  # Should successfully parse
-            except json.JSONDecodeError:
-                # This is also acceptable for malformed cases
-                pass
+    def test_parse_jsonl_truncation_resilience(self, agent_extractor):
+        """Test JSONL parsing with truncated responses"""
+        # Simulates LLM output being cut off mid-line
+        response = '''{"type": "definition", "entity": "Complete1", "definition": "Full definition"}
+{"type": "definition", "entity": "Complete2", "definition": "Another full def"}
+{"type": "definition", "entity": "Trunca'''

-    def test_parse_json_large_responses(self, agent_extractor):
-        """Test JSON parsing with very large responses"""
-        # Create a large JSON structure
-        large_data = {
-            "definitions": [
-                {
-                    "entity": f"Entity {i}",
-                    "definition": f"Definition {i} " + "with more content " * 100
-                }
-                for i in range(100)
-            ],
-            "relationships": [
-                {
-                    "subject": f"Subject {i}",
-                    "predicate": f"predicate_{i}",
-                    "object": f"Object {i}",
-                    "object-entity": i % 2 == 0
-                }
-                for i in range(50)
-            ]
-        }
-        
-        large_json_str = json.dumps(large_data)
-        response = f"```json\n{large_json_str}\n```"
-        
-        result = agent_extractor.parse_json(response)
-        
-        assert len(result["definitions"]) == 100
-        assert len(result["relationships"]) == 50
-        assert result["definitions"][0]["entity"] == "Entity 0"
+        result = agent_extractor.parse_jsonl(response)
+
+        # Should get 2 valid objects, the truncated line is skipped
+        assert len(result) == 2
+        assert result[0]["entity"] == "Complete1"
+        assert result[1]["entity"] == "Complete2"
+
+    def test_parse_jsonl_large_responses(self, agent_extractor):
+        """Test JSONL parsing with very large responses"""
+        # Create a large JSONL response
+        lines = []
+        for i in range(100):
+            lines.append(json.dumps({
+                "type": "definition",
+                "entity": f"Entity {i}",
+                "definition": f"Definition {i} " + "with more content " * 100
+            }))
+        for i in range(50):
+            lines.append(json.dumps({
+                "type": "relationship",
+                "subject": f"Subject {i}",
+                "predicate": f"predicate_{i}",
+                "object": f"Object {i}",
+                "object-entity": i % 2 == 0
+            }))
+
+        response = f"```json\n{chr(10).join(lines)}\n```"
+
+        result = agent_extractor.parse_jsonl(response)
+
+        definitions = [r for r in result if r.get("type") == "definition"]
+        relationships = [r for r in result if r.get("type") == "relationship"]
+
+        assert len(definitions) == 100
+        assert len(relationships) == 50
+        assert definitions[0]["entity"] == "Entity 0"

    def test_process_extraction_data_empty_metadata(self, agent_extractor):
        """Test processing with empty or minimal metadata"""
        # Test with None metadata - may not raise AttributeError depending on implementation
        try:
-            triples, contexts = agent_extractor.process_extraction_data(
-                {"definitions": [], "relationships": []}, 
-                None
-            )
+            triples, contexts = agent_extractor.process_extraction_data([], None)
            # If it doesn't raise, check the results
            assert len(triples) == 0
            assert len(contexts) == 0
        except (AttributeError, TypeError):
            # This is expected behavior when metadata is None
            pass
-        
+
        # Test with metadata without ID
        metadata = Metadata(id=None, metadata=[])
-        triples, contexts = agent_extractor.process_extraction_data(
-            {"definitions": [], "relationships": []},
-            metadata
-        )
+        triples, contexts = agent_extractor.process_extraction_data([], metadata)
        assert len(triples) == 0
        assert len(contexts) == 0
-        
+
        # Test with metadata with empty string ID
        metadata = Metadata(id="", metadata=[])
-        data = {
-            "definitions": [{"entity": "Test", "definition": "Test def"}],
-            "relationships": []
-        }
+        data = [{"type": "definition", "entity": "Test", "definition": "Test def"}]
        triples, contexts = agent_extractor.process_extraction_data(data, metadata)
-        
+
        # Should not create subject-of triples when ID is empty string
-        subject_of_triples = [t for t in triples if t.p.value == SUBJECT_OF]
+        subject_of_triples = [t for t in triples if t.p.iri == SUBJECT_OF]
        assert len(subject_of_triples) == 0

    def test_process_extraction_data_special_entity_names(self, agent_extractor):
        """Test processing with special characters in entity names"""
        metadata = Metadata(id="doc123", metadata=[])
-        
+
        special_entities = [
            "Entity with spaces",
            "Entity & Co.",
@ -237,71 +207,62 @@ class TestAgentKgExtractionEdgeCases:
            "Quotes: \"test\"",
            "Parentheses: (test)",
        ]
-        
-        data = {
-            "definitions": [
-                {"entity": entity, "definition": f"Definition for {entity}"}
-                for entity in special_entities
-            ],
-            "relationships": []
-        }
-        
+
+        data = [
+            {"type": "definition", "entity": entity, "definition": f"Definition for {entity}"}
+            for entity in special_entities
+        ]
+
        triples, contexts = agent_extractor.process_extraction_data(data, metadata)
-        
+
        # Verify all entities were processed
        assert len(contexts) == len(special_entities)
-        
+
        # Verify URIs were properly encoded
        for i, entity in enumerate(special_entities):
            expected_uri = f"{TRUSTGRAPH_ENTITIES}{urllib.parse.quote(entity)}"
-            assert contexts[i].entity.value == expected_uri
+            assert contexts[i].entity.iri == expected_uri

    def test_process_extraction_data_very_long_definitions(self, agent_extractor):
        """Test processing with very long entity definitions"""
        metadata = Metadata(id="doc123", metadata=[])
-        
+
        # Create very long definition
        long_definition = "This is a very long definition. " * 1000
-        
-        data = {
-            "definitions": [
-                {"entity": "Test Entity", "definition": long_definition}
-            ],
-            "relationships": []
-        }
-        
+
+        data = [
+            {"type": "definition", "entity": "Test Entity", "definition": long_definition}
+        ]
+
        triples, contexts = agent_extractor.process_extraction_data(data, metadata)
-        
+
        # Should handle long definitions without issues
        assert len(contexts) == 1
        assert contexts[0].context == long_definition
-        
+
        # Find definition triple
-        def_triple = next((t for t in triples if t.p.value == DEFINITION), None)
+        def_triple = next((t for t in triples if t.p.iri == DEFINITION), None)
        assert def_triple is not None
        assert def_triple.o.value == long_definition

    def test_process_extraction_data_duplicate_entities(self, agent_extractor):
        """Test processing with duplicate entity names"""
        metadata = Metadata(id="doc123", metadata=[])
-        
-        data = {
-            "definitions": [
-                {"entity": "Machine Learning", "definition": "First definition"},
-                {"entity": "Machine Learning", "definition": "Second definition"},  # Duplicate
-                {"entity": "AI", "definition": "AI definition"},
-                {"entity": "AI", "definition": "Another AI definition"},  # Duplicate
-            ],
-            "relationships": []
-        }
-        
+
+        data = [
+            {"type": "definition", "entity": "Machine Learning", "definition": "First definition"},
+            {"type": "definition", "entity": "Machine Learning", "definition": "Second definition"},  # Duplicate
+            {"type": "definition", "entity": "AI", "definition": "AI definition"},
+            {"type": "definition", "entity": "AI", "definition": "Another AI definition"},  # Duplicate
+        ]
+
        triples, contexts = agent_extractor.process_extraction_data(data, metadata)
-        
+
        # Should process all entries (including duplicates)
        assert len(contexts) == 4
-        
+
        # Check that both definitions for "Machine Learning" are present
-        ml_contexts = [ec for ec in contexts if "Machine%20Learning" in ec.entity.value]
+        ml_contexts = [ec for ec in contexts if "Machine%20Learning" in ec.entity.iri]
        assert len(ml_contexts) == 2
        assert ml_contexts[0].context == "First definition"
        assert ml_contexts[1].context == "Second definition"
@ -309,49 +270,44 @@ class TestAgentKgExtractionEdgeCases:
    def test_process_extraction_data_empty_strings(self, agent_extractor):
        """Test processing with empty strings in data"""
        metadata = Metadata(id="doc123", metadata=[])
-        
-        data = {
-            "definitions": [
-                {"entity": "", "definition": "Definition for empty entity"},
-                {"entity": "Valid Entity", "definition": ""},
-                {"entity": "  ", "definition": "   "},  # Whitespace only
-            ],
-            "relationships": [
-                {"subject": "", "predicate": "test", "object": "test", "object-entity": True},
-                {"subject": "test", "predicate": "", "object": "test", "object-entity": True},
-                {"subject": "test", "predicate": "test", "object": "", "object-entity": True},
-            ]
-        }
-        
+
+        data = [
+            {"type": "definition", "entity": "", "definition": "Definition for empty entity"},
+            {"type": "definition", "entity": "Valid Entity", "definition": ""},
+            {"type": "definition", "entity": "  ", "definition": "   "},  # Whitespace only
+            {"type": "relationship", "subject": "", "predicate": "test", "object": "test", "object-entity": True},
+            {"type": "relationship", "subject": "test", "predicate": "", "object": "test", "object-entity": True},
+            {"type": "relationship", "subject": "test", "predicate": "test", "object": "", "object-entity": True},
+        ]
+
        triples, contexts = agent_extractor.process_extraction_data(data, metadata)
-        
+
        # Should handle empty strings by creating URIs (even if empty)
        assert len(contexts) == 3
-        
+
        # Empty entity should create empty URI after encoding
-        empty_entity_context = next((ec for ec in contexts if ec.entity.value == TRUSTGRAPH_ENTITIES), None)
+        empty_entity_context = next((ec for ec in contexts if ec.entity.iri == TRUSTGRAPH_ENTITIES), None)
        assert empty_entity_context is not None

    def test_process_extraction_data_nested_json_in_strings(self, agent_extractor):
        """Test processing when definitions contain JSON-like strings"""
        metadata = Metadata(id="doc123", metadata=[])
-        
-        data = {
-            "definitions": [
-                {
-                    "entity": "JSON Entity",
-                    "definition": 'Definition with JSON: {"key": "value", "nested": {"inner": true}}'
-                },
-                {
-                    "entity": "Array Entity", 
-                    "definition": 'Contains array: [1, 2, 3, "string"]'
-                }
-            ],
-            "relationships": []
-        }
-        
+
+        data = [
+            {
+                "type": "definition",
+                "entity": "JSON Entity",
+                "definition": 'Definition with JSON: {"key": "value", "nested": {"inner": true}}'
+            },
+            {
+                "type": "definition",
+                "entity": "Array Entity",
+                "definition": 'Contains array: [1, 2, 3, "string"]'
+            }
+        ]
+
        triples, contexts = agent_extractor.process_extraction_data(data, metadata)
-        
+
        # Should handle JSON strings in definitions without parsing them
        assert len(contexts) == 2
        assert '{"key": "value"' in contexts[0].context
@ -360,32 +316,29 @@ class TestAgentKgExtractionEdgeCases:
    def test_process_extraction_data_boolean_object_entity_variations(self, agent_extractor):
        """Test processing with various boolean values for object-entity"""
        metadata = Metadata(id="doc123", metadata=[])
-        
-        data = {
-            "definitions": [],
-            "relationships": [
-                # Explicit True
-                {"subject": "A", "predicate": "rel1", "object": "B", "object-entity": True},
-                # Explicit False  
-                {"subject": "A", "predicate": "rel2", "object": "literal", "object-entity": False},
-                # Missing object-entity (should default to True based on code)
-                {"subject": "A", "predicate": "rel3", "object": "C"},
-                # String "true" (should be treated as truthy)
-                {"subject": "A", "predicate": "rel4", "object": "D", "object-entity": "true"},
-                # String "false" (should be treated as truthy in Python)
-                {"subject": "A", "predicate": "rel5", "object": "E", "object-entity": "false"},
-                # Number 0 (falsy)
-                {"subject": "A", "predicate": "rel6", "object": "literal2", "object-entity": 0},
-                # Number 1 (truthy)
-                {"subject": "A", "predicate": "rel7", "object": "F", "object-entity": 1},
-            ]
-        }
-        
+
+        data = [
+            # Explicit True
+            {"type": "relationship", "subject": "A", "predicate": "rel1", "object": "B", "object-entity": True},
+            # Explicit False
+            {"type": "relationship", "subject": "A", "predicate": "rel2", "object": "literal", "object-entity": False},
+            # Missing object-entity (should default to True based on code)
+            {"type": "relationship", "subject": "A", "predicate": "rel3", "object": "C"},
+            # String "true" (should be treated as truthy)
+            {"type": "relationship", "subject": "A", "predicate": "rel4", "object": "D", "object-entity": "true"},
+            # String "false" (should be treated as truthy in Python)
+            {"type": "relationship", "subject": "A", "predicate": "rel5", "object": "E", "object-entity": "false"},
+            # Number 0 (falsy)
+            {"type": "relationship", "subject": "A", "predicate": "rel6", "object": "literal2", "object-entity": 0},
+            # Number 1 (truthy)
+            {"type": "relationship", "subject": "A", "predicate": "rel7", "object": "F", "object-entity": 1},
+        ]
+
        triples, contexts = agent_extractor.process_extraction_data(data, metadata)
-        
+
        # Should process all relationships
        # Note: The current implementation has some logic issues that these tests document
-        assert len([t for t in triples if t.p.value != RDF_LABEL and t.p.value != SUBJECT_OF]) >= 7
+        assert len([t for t in triples if t.p.iri != RDF_LABEL and t.p.iri != SUBJECT_OF]) >= 7

    @pytest.mark.asyncio
    async def test_emit_empty_collections(self, agent_extractor):
@ -437,41 +390,40 @@ class TestAgentKgExtractionEdgeCases:
    def test_process_extraction_data_performance_large_dataset(self, agent_extractor):
        """Test performance with large extraction datasets"""
        metadata = Metadata(id="large-doc", metadata=[])
-        
-        # Create large dataset
+
+        # Create large dataset in JSONL format
        num_definitions = 1000
        num_relationships = 2000
-        
-        large_data = {
-            "definitions": [
-                {
-                    "entity": f"Entity_{i:04d}",
-                    "definition": f"Definition for entity {i} with some detailed explanation."
-                }
-                for i in range(num_definitions)
-            ],
-            "relationships": [
-                {
-                    "subject": f"Entity_{i % num_definitions:04d}",
-                    "predicate": f"predicate_{i % 10}",
-                    "object": f"Entity_{(i + 1) % num_definitions:04d}",
-                    "object-entity": True
-                }
-                for i in range(num_relationships)
-            ]
-        }
-        
+
+        large_data = [
+            {
+                "type": "definition",
+                "entity": f"Entity_{i:04d}",
+                "definition": f"Definition for entity {i} with some detailed explanation."
+            }
+            for i in range(num_definitions)
+        ] + [
+            {
+                "type": "relationship",
+                "subject": f"Entity_{i % num_definitions:04d}",
+                "predicate": f"predicate_{i % 10}",
+                "object": f"Entity_{(i + 1) % num_definitions:04d}",
+                "object-entity": True
+            }
+            for i in range(num_relationships)
+        ]
+
        import time
        start_time = time.time()
-        
+
        triples, contexts = agent_extractor.process_extraction_data(large_data, metadata)
-        
+
        end_time = time.time()
        processing_time = end_time - start_time
-        
+
        # Should complete within reasonable time (adjust threshold as needed)
        assert processing_time < 10.0  # 10 seconds threshold
-        
+
        # Verify results
        assert len(contexts) == num_definitions
        # Triples include labels, definitions, relationships, and subject-of relations
--- a/tests/unit/test_knowledge_graph/test_graph_validation.py
+++ b/tests/unit/test_knowledge_graph/test_graph_validation.py
@ -7,7 +7,7 @@ processing graph structures, and performing graph operations.

 import pytest
 from unittest.mock import Mock
-from .conftest import Triple, Value, Metadata
+from .conftest import Triple, Metadata
 from collections import defaultdict, deque


--- a/tests/unit/test_knowledge_graph/test_object_validation.py
+++ b/tests/unit/test_knowledge_graph/test_object_validation.py
@ -76,7 +76,7 @@ def cities_schema():
 def validator():
    """Create a mock processor with just the validation method"""
    from unittest.mock import MagicMock
-    from trustgraph.extract.kg.objects.processor import Processor
+    from trustgraph.extract.kg.rows.processor import Processor
    
    # Create a mock processor
    mock_processor = MagicMock()
--- a/tests/unit/test_knowledge_graph/test_triple_construction.py
+++ b/tests/unit/test_knowledge_graph/test_triple_construction.py
@ -2,13 +2,13 @@
 Unit tests for triple construction logic

 Tests the core business logic for constructing RDF triples from extracted
-entities and relationships, including URI generation, Value object creation,
+entities and relationships, including URI generation, Term object creation,
 and triple validation.
 """

 import pytest
 from unittest.mock import Mock
-from .conftest import Triple, Triples, Value, Metadata
+from .conftest import Triple, Triples, Term, Metadata, IRI, LITERAL
 import re
 import hashlib

@ -48,80 +48,82 @@ class TestTripleConstructionLogic:
            generated_uri = generate_uri(text, entity_type)
            assert generated_uri == expected_uri, f"URI generation failed for '{text}'"

-    def test_value_object_creation(self):
-        """Test creation of Value objects for subjects, predicates, and objects"""
+    def test_term_object_creation(self):
+        """Test creation of Term objects for subjects, predicates, and objects"""
        # Arrange
-        def create_value_object(text, is_uri, value_type=""):
-            return Value(
-                value=text,
-                is_uri=is_uri,
-                type=value_type
-            )
-        
+        def create_term_object(text, is_uri, datatype=""):
+            if is_uri:
+                return Term(type=IRI, iri=text)
+            else:
+                return Term(type=LITERAL, value=text, datatype=datatype if datatype else None)
+
        test_cases = [
            ("http://trustgraph.ai/kg/person/john-smith", True, ""),
            ("John Smith", False, "string"),
            ("42", False, "integer"),
            ("http://schema.org/worksFor", True, "")
        ]
-        
+
        # Act & Assert
-        for value_text, is_uri, value_type in test_cases:
-            value_obj = create_value_object(value_text, is_uri, value_type)
-            
-            assert isinstance(value_obj, Value)
-            assert value_obj.value == value_text
-            assert value_obj.is_uri == is_uri
-            assert value_obj.type == value_type
+        for value_text, is_uri, datatype in test_cases:
+            term_obj = create_term_object(value_text, is_uri, datatype)
+
+            assert isinstance(term_obj, Term)
+            if is_uri:
+                assert term_obj.type == IRI
+                assert term_obj.iri == value_text
+            else:
+                assert term_obj.type == LITERAL
+                assert term_obj.value == value_text

    def test_triple_construction_from_relationship(self):
        """Test constructing Triple objects from relationships"""
        # Arrange
        relationship = {
            "subject": "John Smith",
-            "predicate": "works_for", 
+            "predicate": "works_for",
            "object": "OpenAI",
            "subject_type": "PERSON",
            "object_type": "ORG"
        }
-        
+
        def construct_triple(relationship, uri_base="http://trustgraph.ai/kg"):
            # Generate URIs
            subject_uri = f"{uri_base}/person/{relationship['subject'].lower().replace(' ', '-')}"
            object_uri = f"{uri_base}/org/{relationship['object'].lower().replace(' ', '-')}"
-            
+
            # Map predicate to schema.org URI
            predicate_mappings = {
                "works_for": "http://schema.org/worksFor",
                "located_in": "http://schema.org/location",
                "developed": "http://schema.org/creator"
            }
-            predicate_uri = predicate_mappings.get(relationship["predicate"], 
+            predicate_uri = predicate_mappings.get(relationship["predicate"],
                                                 f"{uri_base}/predicate/{relationship['predicate']}")
-            
-            # Create Value objects
-            subject_value = Value(value=subject_uri, is_uri=True, type="")
-            predicate_value = Value(value=predicate_uri, is_uri=True, type="")
-            object_value = Value(value=object_uri, is_uri=True, type="")
-            
+
+            # Create Term objects
+            subject_term = Term(type=IRI, iri=subject_uri)
+            predicate_term = Term(type=IRI, iri=predicate_uri)
+            object_term = Term(type=IRI, iri=object_uri)
+
            # Create Triple
            return Triple(
-                s=subject_value,
-                p=predicate_value,
-                o=object_value
+                s=subject_term,
+                p=predicate_term,
+                o=object_term
            )
-        
+
        # Act
        triple = construct_triple(relationship)
-        
+
        # Assert
        assert isinstance(triple, Triple)
-        assert triple.s.value == "http://trustgraph.ai/kg/person/john-smith"
-        assert triple.s.is_uri is True
-        assert triple.p.value == "http://schema.org/worksFor"
-        assert triple.p.is_uri is True
-        assert triple.o.value == "http://trustgraph.ai/kg/org/openai"
-        assert triple.o.is_uri is True
+        assert triple.s.iri == "http://trustgraph.ai/kg/person/john-smith"
+        assert triple.s.type == IRI
+        assert triple.p.iri == "http://schema.org/worksFor"
+        assert triple.p.type == IRI
+        assert triple.o.iri == "http://trustgraph.ai/kg/org/openai"
+        assert triple.o.type == IRI

    def test_literal_value_handling(self):
        """Test handling of literal values vs URI values"""
@ -132,10 +134,10 @@ class TestTripleConstructionLogic:
            ("John Smith", "email", "john@example.com", False),  # Literal email
            ("John Smith", "worksFor", "http://trustgraph.ai/kg/org/openai", True)  # URI reference
        ]
-        
+
        def create_triple_with_literal(subject_uri, predicate, object_value, object_is_uri):
-            subject_val = Value(value=subject_uri, is_uri=True, type="")
-            
+            subject_term = Term(type=IRI, iri=subject_uri)
+
            # Determine predicate URI
            predicate_mappings = {
                "name": "http://schema.org/name",
@ -144,32 +146,37 @@ class TestTripleConstructionLogic:
                "worksFor": "http://schema.org/worksFor"
            }
            predicate_uri = predicate_mappings.get(predicate, f"http://trustgraph.ai/kg/predicate/{predicate}")
-            predicate_val = Value(value=predicate_uri, is_uri=True, type="")
-            
-            # Create object value with appropriate type
-            object_type = ""
-            if not object_is_uri:
+            predicate_term = Term(type=IRI, iri=predicate_uri)
+
+            # Create object term with appropriate type
+            if object_is_uri:
+                object_term = Term(type=IRI, iri=object_value)
+            else:
+                datatype = None
                if predicate == "age":
-                    object_type = "integer"
+                    datatype = "integer"
                elif predicate in ["name", "email"]:
-                    object_type = "string"
-            
-            object_val = Value(value=object_value, is_uri=object_is_uri, type=object_type)
-            
-            return Triple(s=subject_val, p=predicate_val, o=object_val)
-        
+                    datatype = "string"
+                object_term = Term(type=LITERAL, value=object_value, datatype=datatype)
+
+            return Triple(s=subject_term, p=predicate_term, o=object_term)
+
        # Act & Assert
        for subject_uri, predicate, object_value, object_is_uri in test_data:
            subject_full_uri = "http://trustgraph.ai/kg/person/john-smith"
            triple = create_triple_with_literal(subject_full_uri, predicate, object_value, object_is_uri)
-            
-            assert triple.o.is_uri == object_is_uri
-            assert triple.o.value == object_value
-            
+
+            if object_is_uri:
+                assert triple.o.type == IRI
+                assert triple.o.iri == object_value
+            else:
+                assert triple.o.type == LITERAL
+                assert triple.o.value == object_value
+
            if predicate == "age":
-                assert triple.o.type == "integer"
+                assert triple.o.datatype == "integer"
            elif predicate in ["name", "email"]:
-                assert triple.o.type == "string"
+                assert triple.o.datatype == "string"

    def test_namespace_management(self):
        """Test namespace prefix management and expansion"""
@ -216,63 +223,74 @@ class TestTripleConstructionLogic:
    def test_triple_validation(self):
        """Test triple validation rules"""
        # Arrange
+        def get_term_value(term):
+            """Extract value from a Term"""
+            if term.type == IRI:
+                return term.iri
+            else:
+                return term.value
+
        def validate_triple(triple):
            errors = []
-            
+
            # Check required components
-            if not triple.s or not triple.s.value:
+            s_val = get_term_value(triple.s) if triple.s else None
+            p_val = get_term_value(triple.p) if triple.p else None
+            o_val = get_term_value(triple.o) if triple.o else None
+
+            if not triple.s or not s_val:
                errors.append("Missing or empty subject")
-            
-            if not triple.p or not triple.p.value:
+
+            if not triple.p or not p_val:
                errors.append("Missing or empty predicate")
-            
-            if not triple.o or not triple.o.value:
+
+            if not triple.o or not o_val:
                errors.append("Missing or empty object")
-            
+
            # Check URI validity for URI values
            uri_pattern = r'^https?://[^\s/$.?#].[^\s]*$'
-            
-            if triple.s.is_uri and not re.match(uri_pattern, triple.s.value):
+
+            if triple.s.type == IRI and not re.match(uri_pattern, triple.s.iri or ""):
                errors.append("Invalid subject URI format")
-            
-            if triple.p.is_uri and not re.match(uri_pattern, triple.p.value):
+
+            if triple.p.type == IRI and not re.match(uri_pattern, triple.p.iri or ""):
                errors.append("Invalid predicate URI format")
-            
-            if triple.o.is_uri and not re.match(uri_pattern, triple.o.value):
+
+            if triple.o.type == IRI and not re.match(uri_pattern, triple.o.iri or ""):
                errors.append("Invalid object URI format")
-            
+
            # Predicates should typically be URIs
-            if not triple.p.is_uri:
+            if triple.p.type != IRI:
                errors.append("Predicate should be a URI")
-            
+
            return len(errors) == 0, errors
-        
+
        # Test valid triple
        valid_triple = Triple(
-            s=Value(value="http://trustgraph.ai/kg/person/john", is_uri=True, type=""),
-            p=Value(value="http://schema.org/name", is_uri=True, type=""),
-            o=Value(value="John Smith", is_uri=False, type="string")
+            s=Term(type=IRI, iri="http://trustgraph.ai/kg/person/john"),
+            p=Term(type=IRI, iri="http://schema.org/name"),
+            o=Term(type=LITERAL, value="John Smith", datatype="string")
        )
-        
+
        # Test invalid triples
        invalid_triples = [
-            Triple(s=Value(value="", is_uri=True, type=""), 
-                  p=Value(value="http://schema.org/name", is_uri=True, type=""),
-                  o=Value(value="John", is_uri=False, type="")),  # Empty subject
-            
-            Triple(s=Value(value="http://trustgraph.ai/kg/person/john", is_uri=True, type=""), 
-                  p=Value(value="name", is_uri=False, type=""),  # Non-URI predicate
-                  o=Value(value="John", is_uri=False, type="")),
-            
-            Triple(s=Value(value="invalid-uri", is_uri=True, type=""), 
-                  p=Value(value="http://schema.org/name", is_uri=True, type=""),
-                  o=Value(value="John", is_uri=False, type=""))  # Invalid URI format
+            Triple(s=Term(type=IRI, iri=""),
+                  p=Term(type=IRI, iri="http://schema.org/name"),
+                  o=Term(type=LITERAL, value="John")),  # Empty subject
+
+            Triple(s=Term(type=IRI, iri="http://trustgraph.ai/kg/person/john"),
+                  p=Term(type=LITERAL, value="name"),  # Non-URI predicate
+                  o=Term(type=LITERAL, value="John")),
+
+            Triple(s=Term(type=IRI, iri="invalid-uri"),
+                  p=Term(type=IRI, iri="http://schema.org/name"),
+                  o=Term(type=LITERAL, value="John"))  # Invalid URI format
        ]
-        
+
        # Act & Assert
        is_valid, errors = validate_triple(valid_triple)
        assert is_valid, f"Valid triple failed validation: {errors}"
-        
+
        for invalid_triple in invalid_triples:
            is_valid, errors = validate_triple(invalid_triple)
            assert not is_valid, f"Invalid triple passed validation: {invalid_triple}"
@ -286,97 +304,97 @@ class TestTripleConstructionLogic:
            {"text": "OpenAI", "type": "ORG"},
            {"text": "San Francisco", "type": "PLACE"}
        ]
-        
+
        relationships = [
            {"subject": "John Smith", "predicate": "works_for", "object": "OpenAI"},
            {"subject": "OpenAI", "predicate": "located_in", "object": "San Francisco"}
        ]
-        
+
        def construct_triple_batch(entities, relationships, document_id="doc-1"):
            triples = []
-            
+
            # Create type triples for entities
            for entity in entities:
                entity_uri = f"http://trustgraph.ai/kg/{entity['type'].lower()}/{entity['text'].lower().replace(' ', '-')}"
                type_uri = f"http://trustgraph.ai/kg/type/{entity['type']}"
-                
+
                type_triple = Triple(
-                    s=Value(value=entity_uri, is_uri=True, type=""),
-                    p=Value(value="http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True, type=""),
-                    o=Value(value=type_uri, is_uri=True, type="")
+                    s=Term(type=IRI, iri=entity_uri),
+                    p=Term(type=IRI, iri="http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
+                    o=Term(type=IRI, iri=type_uri)
                )
                triples.append(type_triple)
-            
+
            # Create relationship triples
            for rel in relationships:
                subject_uri = f"http://trustgraph.ai/kg/entity/{rel['subject'].lower().replace(' ', '-')}"
                object_uri = f"http://trustgraph.ai/kg/entity/{rel['object'].lower().replace(' ', '-')}"
                predicate_uri = f"http://schema.org/{rel['predicate'].replace('_', '')}"
-                
+
                rel_triple = Triple(
-                    s=Value(value=subject_uri, is_uri=True, type=""),
-                    p=Value(value=predicate_uri, is_uri=True, type=""),
-                    o=Value(value=object_uri, is_uri=True, type="")
+                    s=Term(type=IRI, iri=subject_uri),
+                    p=Term(type=IRI, iri=predicate_uri),
+                    o=Term(type=IRI, iri=object_uri)
                )
                triples.append(rel_triple)
-            
+
            return triples
-        
+
        # Act
        triples = construct_triple_batch(entities, relationships)
-        
+
        # Assert
        assert len(triples) == len(entities) + len(relationships)  # Type triples + relationship triples
-        
+
        # Check that all triples are valid Triple objects
        for triple in triples:
            assert isinstance(triple, Triple)
-            assert triple.s.value != ""
-            assert triple.p.value != ""
-            assert triple.o.value != ""
+            assert triple.s.iri != ""
+            assert triple.p.iri != ""
+            assert triple.o.iri != ""

    def test_triples_batch_object_creation(self):
        """Test creating Triples batch objects with metadata"""
        # Arrange
        sample_triples = [
            Triple(
-                s=Value(value="http://trustgraph.ai/kg/person/john", is_uri=True, type=""),
-                p=Value(value="http://schema.org/name", is_uri=True, type=""),
-                o=Value(value="John Smith", is_uri=False, type="string")
+                s=Term(type=IRI, iri="http://trustgraph.ai/kg/person/john"),
+                p=Term(type=IRI, iri="http://schema.org/name"),
+                o=Term(type=LITERAL, value="John Smith", datatype="string")
            ),
            Triple(
-                s=Value(value="http://trustgraph.ai/kg/person/john", is_uri=True, type=""),
-                p=Value(value="http://schema.org/worksFor", is_uri=True, type=""),
-                o=Value(value="http://trustgraph.ai/kg/org/openai", is_uri=True, type="")
+                s=Term(type=IRI, iri="http://trustgraph.ai/kg/person/john"),
+                p=Term(type=IRI, iri="http://schema.org/worksFor"),
+                o=Term(type=IRI, iri="http://trustgraph.ai/kg/org/openai")
            )
        ]
-        
+
        metadata = Metadata(
            id="test-doc-123",
-            user="test_user", 
+            user="test_user",
            collection="test_collection",
            metadata=[]
        )
-        
+
        # Act
        triples_batch = Triples(
            metadata=metadata,
            triples=sample_triples
        )
-        
+
        # Assert
        assert isinstance(triples_batch, Triples)
        assert triples_batch.metadata.id == "test-doc-123"
        assert triples_batch.metadata.user == "test_user"
        assert triples_batch.metadata.collection == "test_collection"
        assert len(triples_batch.triples) == 2
-        
+
        # Check that triples are properly embedded
        for triple in triples_batch.triples:
            assert isinstance(triple, Triple)
-            assert isinstance(triple.s, Value)
-            assert isinstance(triple.p, Value)
-            assert isinstance(triple.o, Value)
+            assert isinstance(triple.s, Term)
+            assert isinstance(triple.p, Term)
+            assert isinstance(triple.o, Term)

    def test_uri_collision_handling(self):
        """Test handling of URI collisions and duplicate detection"""