Feature/prompts jsonl (#619)

* Tech spec * JSONL implementation complete * Updated prompt client users * Fix tests
2026-06-12 00:05:13 +02:00 · 2026-01-26 17:38:00 +00:00 · 2026-01-26 17:38:00 +00:00 · e214eb4e02
commit e214eb4e02
parent e4f0013841
8 changed files with 1292 additions and 463 deletions
--- a/tests/unit/test_knowledge_graph/test_agent_extraction.py
+++ b/tests/unit/test_knowledge_graph/test_agent_extraction.py
@ -33,7 +33,7 @@ class TestAgentKgExtractor:
        
        # Set up the methods we want to test
        extractor.to_uri = real_extractor.to_uri
-        extractor.parse_json = real_extractor.parse_json
+        extractor.parse_jsonl = real_extractor.parse_jsonl
        extractor.process_extraction_data = real_extractor.process_extraction_data
        extractor.emit_triples = real_extractor.emit_triples
        extractor.emit_entity_contexts = real_extractor.emit_entity_contexts
@ -62,39 +62,40 @@ class TestAgentKgExtractor:

    @pytest.fixture
    def sample_extraction_data(self):
-        """Sample extraction data in expected format"""
-        return {
-            "definitions": [
-                {
-                    "entity": "Machine Learning",
-                    "definition": "A subset of artificial intelligence that enables computers to learn from data without explicit programming."
-                },
-                {
-                    "entity": "Neural Networks",
-                    "definition": "Computing systems inspired by biological neural networks that process information."
-                }
-            ],
-            "relationships": [
-                {
-                    "subject": "Machine Learning",
-                    "predicate": "is_subset_of",
-                    "object": "Artificial Intelligence",
-                    "object-entity": True
-                },
-                {
-                    "subject": "Neural Networks",
-                    "predicate": "used_in",
-                    "object": "Machine Learning",
-                    "object-entity": True
-                },
-                {
-                    "subject": "Deep Learning",
-                    "predicate": "accuracy",
-                    "object": "95%",
-                    "object-entity": False
-                }
-            ]
-        }
+        """Sample extraction data in JSONL format (list with type discriminators)"""
+        return [
+            {
+                "type": "definition",
+                "entity": "Machine Learning",
+                "definition": "A subset of artificial intelligence that enables computers to learn from data without explicit programming."
+            },
+            {
+                "type": "definition",
+                "entity": "Neural Networks",
+                "definition": "Computing systems inspired by biological neural networks that process information."
+            },
+            {
+                "type": "relationship",
+                "subject": "Machine Learning",
+                "predicate": "is_subset_of",
+                "object": "Artificial Intelligence",
+                "object-entity": True
+            },
+            {
+                "type": "relationship",
+                "subject": "Neural Networks",
+                "predicate": "used_in",
+                "object": "Machine Learning",
+                "object-entity": True
+            },
+            {
+                "type": "relationship",
+                "subject": "Deep Learning",
+                "predicate": "accuracy",
+                "object": "95%",
+                "object-entity": False
+            }
+        ]

    def test_to_uri_conversion(self, agent_extractor):
        """Test URI conversion for entities"""
@ -113,61 +114,67 @@ class TestAgentKgExtractor:
        expected = f"{TRUSTGRAPH_ENTITIES}"
        assert uri == expected

-    def test_parse_json_with_code_blocks(self, agent_extractor):
-        """Test JSON parsing from code blocks"""
-        # Test JSON in code blocks
+    def test_parse_jsonl_with_code_blocks(self, agent_extractor):
+        """Test JSONL parsing from code blocks"""
+        # Test JSONL in code blocks - note: JSON uses lowercase true/false
        response = '''```json
-        {
-            "definitions": [{"entity": "AI", "definition": "Artificial Intelligence"}],
-            "relationships": []
-        }
-        ```'''
-        
-        result = agent_extractor.parse_json(response)
-        
-        assert result["definitions"][0]["entity"] == "AI"
-        assert result["definitions"][0]["definition"] == "Artificial Intelligence"
-        assert result["relationships"] == []
+{"type": "definition", "entity": "AI", "definition": "Artificial Intelligence"}
+{"type": "relationship", "subject": "AI", "predicate": "is", "object": "technology", "object-entity": false}
+```'''

-    def test_parse_json_without_code_blocks(self, agent_extractor):
-        """Test JSON parsing without code blocks"""
-        response = '''{"definitions": [{"entity": "ML", "definition": "Machine Learning"}], "relationships": []}'''
-        
-        result = agent_extractor.parse_json(response)
-        
-        assert result["definitions"][0]["entity"] == "ML"
-        assert result["definitions"][0]["definition"] == "Machine Learning"
+        result = agent_extractor.parse_jsonl(response)

-    def test_parse_json_invalid_format(self, agent_extractor):
-        """Test JSON parsing with invalid format"""
-        invalid_response = "This is not JSON at all"
-        
-        with pytest.raises(json.JSONDecodeError):
-            agent_extractor.parse_json(invalid_response)
+        assert len(result) == 2
+        assert result[0]["entity"] == "AI"
+        assert result[0]["definition"] == "Artificial Intelligence"
+        assert result[1]["type"] == "relationship"

-    def test_parse_json_malformed_code_blocks(self, agent_extractor):
-        """Test JSON parsing with malformed code blocks"""
-        # Missing closing backticks
-        response = '''```json
-        {"definitions": [], "relationships": []}
-        '''
-        
-        # Should still parse the JSON content
-        with pytest.raises(json.JSONDecodeError):
-            agent_extractor.parse_json(response)
+    def test_parse_jsonl_without_code_blocks(self, agent_extractor):
+        """Test JSONL parsing without code blocks"""
+        response = '''{"type": "definition", "entity": "ML", "definition": "Machine Learning"}
+{"type": "definition", "entity": "AI", "definition": "Artificial Intelligence"}'''
+
+        result = agent_extractor.parse_jsonl(response)
+
+        assert len(result) == 2
+        assert result[0]["entity"] == "ML"
+        assert result[1]["entity"] == "AI"
+
+    def test_parse_jsonl_invalid_lines_skipped(self, agent_extractor):
+        """Test JSONL parsing skips invalid lines gracefully"""
+        response = '''{"type": "definition", "entity": "Valid", "definition": "Valid def"}
+This is not JSON at all
+{"type": "definition", "entity": "Also Valid", "definition": "Another def"}'''
+
+        result = agent_extractor.parse_jsonl(response)
+
+        # Should get 2 valid objects, skipping the invalid line
+        assert len(result) == 2
+        assert result[0]["entity"] == "Valid"
+        assert result[1]["entity"] == "Also Valid"
+
+    def test_parse_jsonl_truncation_resilience(self, agent_extractor):
+        """Test JSONL parsing handles truncated responses"""
+        # Simulates output cut off mid-line
+        response = '''{"type": "definition", "entity": "Complete", "definition": "Full def"}
+{"type": "definition", "entity": "Trunca'''
+
+        result = agent_extractor.parse_jsonl(response)
+
+        # Should get 1 valid object, the truncated line is skipped
+        assert len(result) == 1
+        assert result[0]["entity"] == "Complete"

    def test_process_extraction_data_definitions(self, agent_extractor, sample_metadata):
        """Test processing of definition data"""
-        data = {
-            "definitions": [
-                {
-                    "entity": "Machine Learning",
-                    "definition": "A subset of AI that enables learning from data."
-                }
-            ],
-            "relationships": []
-        }
-        
+        data = [
+            {
+                "type": "definition",
+                "entity": "Machine Learning",
+                "definition": "A subset of AI that enables learning from data."
+            }
+        ]
+
        triples, entity_contexts = agent_extractor.process_extraction_data(data, sample_metadata)
        
        # Check entity label triple
@ -196,18 +203,16 @@ class TestAgentKgExtractor:

    def test_process_extraction_data_relationships(self, agent_extractor, sample_metadata):
        """Test processing of relationship data"""
-        data = {
-            "definitions": [],
-            "relationships": [
-                {
-                    "subject": "Machine Learning",
-                    "predicate": "is_subset_of",
-                    "object": "Artificial Intelligence",
-                    "object-entity": True
-                }
-            ]
-        }
-        
+        data = [
+            {
+                "type": "relationship",
+                "subject": "Machine Learning",
+                "predicate": "is_subset_of",
+                "object": "Artificial Intelligence",
+                "object-entity": True
+            }
+        ]
+
        triples, entity_contexts = agent_extractor.process_extraction_data(data, sample_metadata)
        
        # Check that subject, predicate, and object labels are created
@ -223,15 +228,12 @@ class TestAgentKgExtractor:
        assert predicate_label is not None
        assert predicate_label.o.value == "is_subset_of"
        
-        # Check main relationship triple 
-        # NOTE: Current implementation has bugs:
-        # 1. Uses data.get("object-entity") instead of rel.get("object-entity")
-        # 2. Sets object_value to predicate_uri instead of actual object URI
-        # This test documents the current buggy behavior
+        # Check main relationship triple
+        object_uri = f"{TRUSTGRAPH_ENTITIES}Artificial%20Intelligence"
        rel_triple = next((t for t in triples if t.s.value == subject_uri and t.p.value == predicate_uri), None)
        assert rel_triple is not None
-        # Due to bug, object value is set to predicate_uri
-        assert rel_triple.o.value == predicate_uri
+        assert rel_triple.o.value == object_uri
+        assert rel_triple.o.is_uri == True
        
        # Check subject-of relationships
        subject_of_triples = [t for t in triples if t.p.value == SUBJECT_OF and t.o.value == "doc123"]
@ -239,20 +241,18 @@ class TestAgentKgExtractor:

    def test_process_extraction_data_literal_object(self, agent_extractor, sample_metadata):
        """Test processing of relationships with literal objects"""
-        data = {
-            "definitions": [],
-            "relationships": [
-                {
-                    "subject": "Deep Learning",
-                    "predicate": "accuracy",
-                    "object": "95%",
-                    "object-entity": False
-                }
-            ]
-        }
-        
+        data = [
+            {
+                "type": "relationship",
+                "subject": "Deep Learning",
+                "predicate": "accuracy",
+                "object": "95%",
+                "object-entity": False
+            }
+        ]
+
        triples, entity_contexts = agent_extractor.process_extraction_data(data, sample_metadata)
-        
+
        # Check that object labels are not created for literal objects
        object_labels = [t for t in triples if t.p.value == RDF_LABEL and t.o.value == "95%"]
        # Based on the code logic, it should not create object labels for non-entity objects
@ -275,63 +275,50 @@ class TestAgentKgExtractor:
    def test_process_extraction_data_no_metadata_id(self, agent_extractor):
        """Test processing when metadata has no ID"""
        metadata = Metadata(id=None, metadata=[])
-        data = {
-            "definitions": [
-                {"entity": "Test Entity", "definition": "Test definition"}
-            ],
-            "relationships": []
-        }
-        
+        data = [
+            {"type": "definition", "entity": "Test Entity", "definition": "Test definition"}
+        ]
+
        triples, entity_contexts = agent_extractor.process_extraction_data(data, metadata)
-        
+
        # Should not create subject-of relationships when no metadata ID
        subject_of_triples = [t for t in triples if t.p.value == SUBJECT_OF]
        assert len(subject_of_triples) == 0
-        
+
        # Should still create entity contexts
        assert len(entity_contexts) == 1

    def test_process_extraction_data_empty_data(self, agent_extractor, sample_metadata):
        """Test processing of empty extraction data"""
-        data = {"definitions": [], "relationships": []}
-        
-        triples, entity_contexts = agent_extractor.process_extraction_data(data, sample_metadata)
-        
-        # Should only have metadata triples
-        assert len(entity_contexts) == 0
-        # Triples should only contain metadata triples if any
+        data = []

-    def test_process_extraction_data_missing_keys(self, agent_extractor, sample_metadata):
-        """Test processing data with missing keys"""
-        # Test missing definitions key
-        data = {"relationships": []}
        triples, entity_contexts = agent_extractor.process_extraction_data(data, sample_metadata)
+
+        # Should have no entity contexts
        assert len(entity_contexts) == 0
-        
-        # Test missing relationships key
-        data = {"definitions": []}
+        # Triples should be empty
+        assert len(triples) == 0
+
+    def test_process_extraction_data_unknown_types_ignored(self, agent_extractor, sample_metadata):
+        """Test processing data with unknown type values"""
+        data = [
+            {"type": "definition", "entity": "Valid", "definition": "Valid def"},
+            {"type": "unknown_type", "foo": "bar"},  # Unknown type - should be ignored
+            {"type": "relationship", "subject": "A", "predicate": "rel", "object": "B", "object-entity": True}
+        ]
+
        triples, entity_contexts = agent_extractor.process_extraction_data(data, sample_metadata)
-        assert len(entity_contexts) == 0
-        
-        # Test completely missing keys
-        data = {}
-        triples, entity_contexts = agent_extractor.process_extraction_data(data, sample_metadata)
-        assert len(entity_contexts) == 0
+
+        # Should process valid items and ignore unknown types
+        assert len(entity_contexts) == 1  # Only the definition creates entity context

    def test_process_extraction_data_malformed_entries(self, agent_extractor, sample_metadata):
        """Test processing data with malformed entries"""
-        # Test definition missing required fields
-        data = {
-            "definitions": [
-                {"entity": "Test"},  # Missing definition
-                {"definition": "Test def"}  # Missing entity
-            ],
-            "relationships": [
-                {"subject": "A", "predicate": "rel"},  # Missing object
-                {"subject": "B", "object": "C"}  # Missing predicate
-            ]
-        }
-        
+        # Test items missing required fields - should raise KeyError
+        data = [
+            {"type": "definition", "entity": "Test"},  # Missing definition
+        ]
+
        # Should handle gracefully or raise appropriate errors
        with pytest.raises(KeyError):
            agent_extractor.process_extraction_data(data, sample_metadata)
--- a/tests/unit/test_knowledge_graph/test_agent_extraction_edge_cases.py
+++ b/tests/unit/test_knowledge_graph/test_agent_extraction_edge_cases.py
@ -32,11 +32,11 @@ class TestAgentKgExtractionEdgeCases:
        
        # Set up the methods we want to test
        extractor.to_uri = real_extractor.to_uri
-        extractor.parse_json = real_extractor.parse_json
+        extractor.parse_jsonl = real_extractor.parse_jsonl
        extractor.process_extraction_data = real_extractor.process_extraction_data
        extractor.emit_triples = real_extractor.emit_triples
        extractor.emit_entity_contexts = real_extractor.emit_entity_contexts
-        
+
        return extractor

    def test_to_uri_special_characters(self, agent_extractor):
@ -85,138 +85,108 @@ class TestAgentKgExtractionEdgeCases:
            # Verify the URI is properly encoded
            assert unicode_text not in uri  # Original unicode should be encoded

-    def test_parse_json_whitespace_variations(self, agent_extractor):
-        """Test JSON parsing with various whitespace patterns"""
-        # Test JSON with different whitespace patterns
+    def test_parse_jsonl_whitespace_variations(self, agent_extractor):
+        """Test JSONL parsing with various whitespace patterns"""
+        # Test JSONL with different whitespace patterns
        test_cases = [
            # Extra whitespace around code blocks
-            "   ```json\n{\"test\": true}\n```   ",
-            # Tabs and mixed whitespace
-            "\t\t```json\n\t{\"test\": true}\n\t```\t",
-            # Multiple newlines
-            "\n\n\n```json\n\n{\"test\": true}\n\n```\n\n",
-            # JSON without code blocks but with whitespace
-            "   {\"test\": true}   ",
-            # Mixed line endings
-            "```json\r\n{\"test\": true}\r\n```",
+            '   ```json\n{"type": "definition", "entity": "test", "definition": "def"}\n```   ',
+            # Multiple newlines between lines
+            '{"type": "definition", "entity": "A", "definition": "def A"}\n\n\n{"type": "definition", "entity": "B", "definition": "def B"}',
+            # JSONL without code blocks but with whitespace
+            '   {"type": "definition", "entity": "test", "definition": "def"}   ',
        ]
-        
-        for response in test_cases:
-            result = agent_extractor.parse_json(response)
-            assert result == {"test": True}

-    def test_parse_json_code_block_variations(self, agent_extractor):
-        """Test JSON parsing with different code block formats"""
+        for response in test_cases:
+            result = agent_extractor.parse_jsonl(response)
+            assert len(result) >= 1
+            assert result[0].get("type") == "definition"
+
+    def test_parse_jsonl_code_block_variations(self, agent_extractor):
+        """Test JSONL parsing with different code block formats"""
        test_cases = [
            # Standard json code block
-            "```json\n{\"valid\": true}\n```",
+            '```json\n{"type": "definition", "entity": "A", "definition": "def"}\n```',
+            # jsonl code block
+            '```jsonl\n{"type": "definition", "entity": "A", "definition": "def"}\n```',
            # Code block without language
-            "```\n{\"valid\": true}\n```",
-            # Uppercase JSON
-            "```JSON\n{\"valid\": true}\n```",
-            # Mixed case
-            "```Json\n{\"valid\": true}\n```",
-            # Multiple code blocks (should take first one)
-            "```json\n{\"first\": true}\n```\n```json\n{\"second\": true}\n```",
-            # Code block with extra content
-            "Here's the result:\n```json\n{\"valid\": true}\n```\nDone!",
+            '```\n{"type": "definition", "entity": "A", "definition": "def"}\n```',
+            # Code block with extra content before/after
+            'Here\'s the result:\n```json\n{"type": "definition", "entity": "A", "definition": "def"}\n```\nDone!',
        ]
-        
+
        for i, response in enumerate(test_cases):
-            try:
-                result = agent_extractor.parse_json(response)
-                assert result.get("valid") == True or result.get("first") == True
-            except json.JSONDecodeError:
-                # Some cases may fail due to regex extraction issues
-                # This documents current behavior - the regex may not match all cases
-                print(f"Case {i} failed JSON parsing: {response[:50]}...")
-                pass
+            result = agent_extractor.parse_jsonl(response)
+            assert len(result) >= 1, f"Case {i} failed"
+            assert result[0].get("entity") == "A"

-    def test_parse_json_malformed_code_blocks(self, agent_extractor):
-        """Test JSON parsing with malformed code block formats"""
-        # These should still work by falling back to treating entire text as JSON
-        test_cases = [
-            # Unclosed code block
-            "```json\n{\"test\": true}",
-            # No opening backticks
-            "{\"test\": true}\n```",
-            # Wrong number of backticks
-            "`json\n{\"test\": true}\n`",
-            # Nested backticks (should handle gracefully)
-            "```json\n{\"code\": \"```\", \"test\": true}\n```",
-        ]
-        
-        for response in test_cases:
-            try:
-                result = agent_extractor.parse_json(response)
-                assert "test" in result  # Should successfully parse
-            except json.JSONDecodeError:
-                # This is also acceptable for malformed cases
-                pass
+    def test_parse_jsonl_truncation_resilience(self, agent_extractor):
+        """Test JSONL parsing with truncated responses"""
+        # Simulates LLM output being cut off mid-line
+        response = '''{"type": "definition", "entity": "Complete1", "definition": "Full definition"}
+{"type": "definition", "entity": "Complete2", "definition": "Another full def"}
+{"type": "definition", "entity": "Trunca'''

-    def test_parse_json_large_responses(self, agent_extractor):
-        """Test JSON parsing with very large responses"""
-        # Create a large JSON structure
-        large_data = {
-            "definitions": [
-                {
-                    "entity": f"Entity {i}",
-                    "definition": f"Definition {i} " + "with more content " * 100
-                }
-                for i in range(100)
-            ],
-            "relationships": [
-                {
-                    "subject": f"Subject {i}",
-                    "predicate": f"predicate_{i}",
-                    "object": f"Object {i}",
-                    "object-entity": i % 2 == 0
-                }
-                for i in range(50)
-            ]
-        }
-        
-        large_json_str = json.dumps(large_data)
-        response = f"```json\n{large_json_str}\n```"
-        
-        result = agent_extractor.parse_json(response)
-        
-        assert len(result["definitions"]) == 100
-        assert len(result["relationships"]) == 50
-        assert result["definitions"][0]["entity"] == "Entity 0"
+        result = agent_extractor.parse_jsonl(response)
+
+        # Should get 2 valid objects, the truncated line is skipped
+        assert len(result) == 2
+        assert result[0]["entity"] == "Complete1"
+        assert result[1]["entity"] == "Complete2"
+
+    def test_parse_jsonl_large_responses(self, agent_extractor):
+        """Test JSONL parsing with very large responses"""
+        # Create a large JSONL response
+        lines = []
+        for i in range(100):
+            lines.append(json.dumps({
+                "type": "definition",
+                "entity": f"Entity {i}",
+                "definition": f"Definition {i} " + "with more content " * 100
+            }))
+        for i in range(50):
+            lines.append(json.dumps({
+                "type": "relationship",
+                "subject": f"Subject {i}",
+                "predicate": f"predicate_{i}",
+                "object": f"Object {i}",
+                "object-entity": i % 2 == 0
+            }))
+
+        response = f"```json\n{chr(10).join(lines)}\n```"
+
+        result = agent_extractor.parse_jsonl(response)
+
+        definitions = [r for r in result if r.get("type") == "definition"]
+        relationships = [r for r in result if r.get("type") == "relationship"]
+
+        assert len(definitions) == 100
+        assert len(relationships) == 50
+        assert definitions[0]["entity"] == "Entity 0"

    def test_process_extraction_data_empty_metadata(self, agent_extractor):
        """Test processing with empty or minimal metadata"""
        # Test with None metadata - may not raise AttributeError depending on implementation
        try:
-            triples, contexts = agent_extractor.process_extraction_data(
-                {"definitions": [], "relationships": []}, 
-                None
-            )
+            triples, contexts = agent_extractor.process_extraction_data([], None)
            # If it doesn't raise, check the results
            assert len(triples) == 0
            assert len(contexts) == 0
        except (AttributeError, TypeError):
            # This is expected behavior when metadata is None
            pass
-        
+
        # Test with metadata without ID
        metadata = Metadata(id=None, metadata=[])
-        triples, contexts = agent_extractor.process_extraction_data(
-            {"definitions": [], "relationships": []},
-            metadata
-        )
+        triples, contexts = agent_extractor.process_extraction_data([], metadata)
        assert len(triples) == 0
        assert len(contexts) == 0
-        
+
        # Test with metadata with empty string ID
        metadata = Metadata(id="", metadata=[])
-        data = {
-            "definitions": [{"entity": "Test", "definition": "Test def"}],
-            "relationships": []
-        }
+        data = [{"type": "definition", "entity": "Test", "definition": "Test def"}]
        triples, contexts = agent_extractor.process_extraction_data(data, metadata)
-        
+
        # Should not create subject-of triples when ID is empty string
        subject_of_triples = [t for t in triples if t.p.value == SUBJECT_OF]
        assert len(subject_of_triples) == 0
@ -224,7 +194,7 @@ class TestAgentKgExtractionEdgeCases:
    def test_process_extraction_data_special_entity_names(self, agent_extractor):
        """Test processing with special characters in entity names"""
        metadata = Metadata(id="doc123", metadata=[])
-        
+
        special_entities = [
            "Entity with spaces",
            "Entity & Co.",
@ -237,20 +207,17 @@ class TestAgentKgExtractionEdgeCases:
            "Quotes: \"test\"",
            "Parentheses: (test)",
        ]
-        
-        data = {
-            "definitions": [
-                {"entity": entity, "definition": f"Definition for {entity}"}
-                for entity in special_entities
-            ],
-            "relationships": []
-        }
-        
+
+        data = [
+            {"type": "definition", "entity": entity, "definition": f"Definition for {entity}"}
+            for entity in special_entities
+        ]
+
        triples, contexts = agent_extractor.process_extraction_data(data, metadata)
-        
+
        # Verify all entities were processed
        assert len(contexts) == len(special_entities)
-        
+
        # Verify URIs were properly encoded
        for i, entity in enumerate(special_entities):
            expected_uri = f"{TRUSTGRAPH_ENTITIES}{urllib.parse.quote(entity)}"
@ -259,23 +226,20 @@ class TestAgentKgExtractionEdgeCases:
    def test_process_extraction_data_very_long_definitions(self, agent_extractor):
        """Test processing with very long entity definitions"""
        metadata = Metadata(id="doc123", metadata=[])
-        
+
        # Create very long definition
        long_definition = "This is a very long definition. " * 1000
-        
-        data = {
-            "definitions": [
-                {"entity": "Test Entity", "definition": long_definition}
-            ],
-            "relationships": []
-        }
-        
+
+        data = [
+            {"type": "definition", "entity": "Test Entity", "definition": long_definition}
+        ]
+
        triples, contexts = agent_extractor.process_extraction_data(data, metadata)
-        
+
        # Should handle long definitions without issues
        assert len(contexts) == 1
        assert contexts[0].context == long_definition
-        
+
        # Find definition triple
        def_triple = next((t for t in triples if t.p.value == DEFINITION), None)
        assert def_triple is not None
@ -284,22 +248,19 @@ class TestAgentKgExtractionEdgeCases:
    def test_process_extraction_data_duplicate_entities(self, agent_extractor):
        """Test processing with duplicate entity names"""
        metadata = Metadata(id="doc123", metadata=[])
-        
-        data = {
-            "definitions": [
-                {"entity": "Machine Learning", "definition": "First definition"},
-                {"entity": "Machine Learning", "definition": "Second definition"},  # Duplicate
-                {"entity": "AI", "definition": "AI definition"},
-                {"entity": "AI", "definition": "Another AI definition"},  # Duplicate
-            ],
-            "relationships": []
-        }
-        
+
+        data = [
+            {"type": "definition", "entity": "Machine Learning", "definition": "First definition"},
+            {"type": "definition", "entity": "Machine Learning", "definition": "Second definition"},  # Duplicate
+            {"type": "definition", "entity": "AI", "definition": "AI definition"},
+            {"type": "definition", "entity": "AI", "definition": "Another AI definition"},  # Duplicate
+        ]
+
        triples, contexts = agent_extractor.process_extraction_data(data, metadata)
-        
+
        # Should process all entries (including duplicates)
        assert len(contexts) == 4
-        
+
        # Check that both definitions for "Machine Learning" are present
        ml_contexts = [ec for ec in contexts if "Machine%20Learning" in ec.entity.value]
        assert len(ml_contexts) == 2
@ -309,25 +270,21 @@ class TestAgentKgExtractionEdgeCases:
    def test_process_extraction_data_empty_strings(self, agent_extractor):
        """Test processing with empty strings in data"""
        metadata = Metadata(id="doc123", metadata=[])
-        
-        data = {
-            "definitions": [
-                {"entity": "", "definition": "Definition for empty entity"},
-                {"entity": "Valid Entity", "definition": ""},
-                {"entity": "  ", "definition": "   "},  # Whitespace only
-            ],
-            "relationships": [
-                {"subject": "", "predicate": "test", "object": "test", "object-entity": True},
-                {"subject": "test", "predicate": "", "object": "test", "object-entity": True},
-                {"subject": "test", "predicate": "test", "object": "", "object-entity": True},
-            ]
-        }
-        
+
+        data = [
+            {"type": "definition", "entity": "", "definition": "Definition for empty entity"},
+            {"type": "definition", "entity": "Valid Entity", "definition": ""},
+            {"type": "definition", "entity": "  ", "definition": "   "},  # Whitespace only
+            {"type": "relationship", "subject": "", "predicate": "test", "object": "test", "object-entity": True},
+            {"type": "relationship", "subject": "test", "predicate": "", "object": "test", "object-entity": True},
+            {"type": "relationship", "subject": "test", "predicate": "test", "object": "", "object-entity": True},
+        ]
+
        triples, contexts = agent_extractor.process_extraction_data(data, metadata)
-        
+
        # Should handle empty strings by creating URIs (even if empty)
        assert len(contexts) == 3
-        
+
        # Empty entity should create empty URI after encoding
        empty_entity_context = next((ec for ec in contexts if ec.entity.value == TRUSTGRAPH_ENTITIES), None)
        assert empty_entity_context is not None
@ -335,23 +292,22 @@ class TestAgentKgExtractionEdgeCases:
    def test_process_extraction_data_nested_json_in_strings(self, agent_extractor):
        """Test processing when definitions contain JSON-like strings"""
        metadata = Metadata(id="doc123", metadata=[])
-        
-        data = {
-            "definitions": [
-                {
-                    "entity": "JSON Entity",
-                    "definition": 'Definition with JSON: {"key": "value", "nested": {"inner": true}}'
-                },
-                {
-                    "entity": "Array Entity", 
-                    "definition": 'Contains array: [1, 2, 3, "string"]'
-                }
-            ],
-            "relationships": []
-        }
-        
+
+        data = [
+            {
+                "type": "definition",
+                "entity": "JSON Entity",
+                "definition": 'Definition with JSON: {"key": "value", "nested": {"inner": true}}'
+            },
+            {
+                "type": "definition",
+                "entity": "Array Entity",
+                "definition": 'Contains array: [1, 2, 3, "string"]'
+            }
+        ]
+
        triples, contexts = agent_extractor.process_extraction_data(data, metadata)
-        
+
        # Should handle JSON strings in definitions without parsing them
        assert len(contexts) == 2
        assert '{"key": "value"' in contexts[0].context
@ -360,29 +316,26 @@ class TestAgentKgExtractionEdgeCases:
    def test_process_extraction_data_boolean_object_entity_variations(self, agent_extractor):
        """Test processing with various boolean values for object-entity"""
        metadata = Metadata(id="doc123", metadata=[])
-        
-        data = {
-            "definitions": [],
-            "relationships": [
-                # Explicit True
-                {"subject": "A", "predicate": "rel1", "object": "B", "object-entity": True},
-                # Explicit False  
-                {"subject": "A", "predicate": "rel2", "object": "literal", "object-entity": False},
-                # Missing object-entity (should default to True based on code)
-                {"subject": "A", "predicate": "rel3", "object": "C"},
-                # String "true" (should be treated as truthy)
-                {"subject": "A", "predicate": "rel4", "object": "D", "object-entity": "true"},
-                # String "false" (should be treated as truthy in Python)
-                {"subject": "A", "predicate": "rel5", "object": "E", "object-entity": "false"},
-                # Number 0 (falsy)
-                {"subject": "A", "predicate": "rel6", "object": "literal2", "object-entity": 0},
-                # Number 1 (truthy)
-                {"subject": "A", "predicate": "rel7", "object": "F", "object-entity": 1},
-            ]
-        }
-        
+
+        data = [
+            # Explicit True
+            {"type": "relationship", "subject": "A", "predicate": "rel1", "object": "B", "object-entity": True},
+            # Explicit False
+            {"type": "relationship", "subject": "A", "predicate": "rel2", "object": "literal", "object-entity": False},
+            # Missing object-entity (should default to True based on code)
+            {"type": "relationship", "subject": "A", "predicate": "rel3", "object": "C"},
+            # String "true" (should be treated as truthy)
+            {"type": "relationship", "subject": "A", "predicate": "rel4", "object": "D", "object-entity": "true"},
+            # String "false" (should be treated as truthy in Python)
+            {"type": "relationship", "subject": "A", "predicate": "rel5", "object": "E", "object-entity": "false"},
+            # Number 0 (falsy)
+            {"type": "relationship", "subject": "A", "predicate": "rel6", "object": "literal2", "object-entity": 0},
+            # Number 1 (truthy)
+            {"type": "relationship", "subject": "A", "predicate": "rel7", "object": "F", "object-entity": 1},
+        ]
+
        triples, contexts = agent_extractor.process_extraction_data(data, metadata)
-        
+
        # Should process all relationships
        # Note: The current implementation has some logic issues that these tests document
        assert len([t for t in triples if t.p.value != RDF_LABEL and t.p.value != SUBJECT_OF]) >= 7
@ -437,41 +390,40 @@ class TestAgentKgExtractionEdgeCases:
    def test_process_extraction_data_performance_large_dataset(self, agent_extractor):
        """Test performance with large extraction datasets"""
        metadata = Metadata(id="large-doc", metadata=[])
-        
-        # Create large dataset
+
+        # Create large dataset in JSONL format
        num_definitions = 1000
        num_relationships = 2000
-        
-        large_data = {
-            "definitions": [
-                {
-                    "entity": f"Entity_{i:04d}",
-                    "definition": f"Definition for entity {i} with some detailed explanation."
-                }
-                for i in range(num_definitions)
-            ],
-            "relationships": [
-                {
-                    "subject": f"Entity_{i % num_definitions:04d}",
-                    "predicate": f"predicate_{i % 10}",
-                    "object": f"Entity_{(i + 1) % num_definitions:04d}",
-                    "object-entity": True
-                }
-                for i in range(num_relationships)
-            ]
-        }
-        
+
+        large_data = [
+            {
+                "type": "definition",
+                "entity": f"Entity_{i:04d}",
+                "definition": f"Definition for entity {i} with some detailed explanation."
+            }
+            for i in range(num_definitions)
+        ] + [
+            {
+                "type": "relationship",
+                "subject": f"Entity_{i % num_definitions:04d}",
+                "predicate": f"predicate_{i % 10}",
+                "object": f"Entity_{(i + 1) % num_definitions:04d}",
+                "object-entity": True
+            }
+            for i in range(num_relationships)
+        ]
+
        import time
        start_time = time.time()
-        
+
        triples, contexts = agent_extractor.process_extraction_data(large_data, metadata)
-        
+
        end_time = time.time()
        processing_time = end_time - start_time
-        
+
        # Should complete within reasonable time (adjust threshold as needed)
        assert processing_time < 10.0  # 10 seconds threshold
-        
+
        # Verify results
        assert len(contexts) == num_definitions
        # Triples include labels, definitions, relationships, and subject-of relations
--- a/tests/unit/test_prompt_manager.py
+++ b/tests/unit/test_prompt_manager.py
@ -339,7 +339,250 @@ class TestPromptManager:
        """Test PromptManager with minimal configuration"""
        pm = PromptManager()
        pm.load_config({})  # Empty config
-        
+
        assert pm.config.system_template == "Be helpful."  # Default system
        assert pm.terms == {}  # Default empty terms
-        assert len(pm.prompts) == 0
+        assert len(pm.prompts) == 0
+
+
+@pytest.mark.unit
+class TestPromptManagerJsonl:
+    """Unit tests for PromptManager JSONL functionality"""
+
+    @pytest.fixture
+    def jsonl_config(self):
+        """Configuration with JSONL response type prompts"""
+        return {
+            "system": json.dumps("You are an extraction assistant."),
+            "template-index": json.dumps(["extract_simple", "extract_with_schema", "extract_mixed"]),
+            "template.extract_simple": json.dumps({
+                "prompt": "Extract entities from: {{ text }}",
+                "response-type": "jsonl"
+            }),
+            "template.extract_with_schema": json.dumps({
+                "prompt": "Extract definitions from: {{ text }}",
+                "response-type": "jsonl",
+                "schema": {
+                    "type": "object",
+                    "properties": {
+                        "entity": {"type": "string"},
+                        "definition": {"type": "string"}
+                    },
+                    "required": ["entity", "definition"]
+                }
+            }),
+            "template.extract_mixed": json.dumps({
+                "prompt": "Extract knowledge from: {{ text }}",
+                "response-type": "jsonl",
+                "schema": {
+                    "oneOf": [
+                        {
+                            "type": "object",
+                            "properties": {
+                                "type": {"const": "definition"},
+                                "entity": {"type": "string"},
+                                "definition": {"type": "string"}
+                            },
+                            "required": ["type", "entity", "definition"]
+                        },
+                        {
+                            "type": "object",
+                            "properties": {
+                                "type": {"const": "relationship"},
+                                "subject": {"type": "string"},
+                                "predicate": {"type": "string"},
+                                "object": {"type": "string"}
+                            },
+                            "required": ["type", "subject", "predicate", "object"]
+                        }
+                    ]
+                }
+            })
+        }
+
+    @pytest.fixture
+    def prompt_manager(self, jsonl_config):
+        """Create a PromptManager with JSONL configuration"""
+        pm = PromptManager()
+        pm.load_config(jsonl_config)
+        return pm
+
+    def test_parse_jsonl_basic(self, prompt_manager):
+        """Test basic JSONL parsing"""
+        text = '{"entity": "cat", "definition": "A small furry animal"}\n{"entity": "dog", "definition": "A loyal pet"}'
+
+        result = prompt_manager.parse_jsonl(text)
+
+        assert len(result) == 2
+        assert result[0]["entity"] == "cat"
+        assert result[1]["entity"] == "dog"
+
+    def test_parse_jsonl_with_empty_lines(self, prompt_manager):
+        """Test JSONL parsing skips empty lines"""
+        text = '{"entity": "cat"}\n\n\n{"entity": "dog"}\n'
+
+        result = prompt_manager.parse_jsonl(text)
+
+        assert len(result) == 2
+
+    def test_parse_jsonl_with_markdown_fences(self, prompt_manager):
+        """Test JSONL parsing strips markdown code fences"""
+        text = '''```json
+{"entity": "cat", "definition": "A furry animal"}
+{"entity": "dog", "definition": "A loyal pet"}
+```'''
+
+        result = prompt_manager.parse_jsonl(text)
+
+        assert len(result) == 2
+        assert result[0]["entity"] == "cat"
+        assert result[1]["entity"] == "dog"
+
+    def test_parse_jsonl_with_jsonl_fence(self, prompt_manager):
+        """Test JSONL parsing strips jsonl-marked code fences"""
+        text = '''```jsonl
+{"entity": "cat"}
+{"entity": "dog"}
+```'''
+
+        result = prompt_manager.parse_jsonl(text)
+
+        assert len(result) == 2
+
+    def test_parse_jsonl_truncation_resilience(self, prompt_manager):
+        """Test JSONL parsing handles truncated final line"""
+        text = '{"entity": "cat", "definition": "Complete"}\n{"entity": "dog", "defi'
+
+        result = prompt_manager.parse_jsonl(text)
+
+        # Should get the first valid object, skip the truncated one
+        assert len(result) == 1
+        assert result[0]["entity"] == "cat"
+
+    def test_parse_jsonl_invalid_lines_skipped(self, prompt_manager):
+        """Test JSONL parsing skips invalid JSON lines"""
+        text = '''{"entity": "valid1"}
+not json at all
+{"entity": "valid2"}
+{broken json
+{"entity": "valid3"}'''
+
+        result = prompt_manager.parse_jsonl(text)
+
+        assert len(result) == 3
+        assert result[0]["entity"] == "valid1"
+        assert result[1]["entity"] == "valid2"
+        assert result[2]["entity"] == "valid3"
+
+    def test_parse_jsonl_empty_input(self, prompt_manager):
+        """Test JSONL parsing with empty input"""
+        result = prompt_manager.parse_jsonl("")
+        assert result == []
+
+        result = prompt_manager.parse_jsonl("\n\n\n")
+        assert result == []
+
+    @pytest.mark.asyncio
+    async def test_invoke_jsonl_response(self, prompt_manager):
+        """Test invoking a prompt with JSONL response"""
+        mock_llm = AsyncMock()
+        mock_llm.return_value = '{"entity": "photosynthesis", "definition": "Plant process"}\n{"entity": "mitosis", "definition": "Cell division"}'
+
+        result = await prompt_manager.invoke(
+            "extract_simple",
+            {"text": "Biology text"},
+            mock_llm
+        )
+
+        assert isinstance(result, list)
+        assert len(result) == 2
+        assert result[0]["entity"] == "photosynthesis"
+        assert result[1]["entity"] == "mitosis"
+
+    @pytest.mark.asyncio
+    async def test_invoke_jsonl_with_schema_validation(self, prompt_manager):
+        """Test JSONL response with schema validation"""
+        mock_llm = AsyncMock()
+        mock_llm.return_value = '{"entity": "cat", "definition": "A pet"}\n{"entity": "dog", "definition": "Another pet"}'
+
+        result = await prompt_manager.invoke(
+            "extract_with_schema",
+            {"text": "Animal text"},
+            mock_llm
+        )
+
+        assert len(result) == 2
+        assert all("entity" in obj and "definition" in obj for obj in result)
+
+    @pytest.mark.asyncio
+    async def test_invoke_jsonl_schema_filters_invalid(self, prompt_manager):
+        """Test JSONL schema validation filters out invalid objects"""
+        mock_llm = AsyncMock()
+        # Second object is missing required 'definition' field
+        mock_llm.return_value = '{"entity": "valid", "definition": "Has both fields"}\n{"entity": "invalid_missing_definition"}\n{"entity": "also_valid", "definition": "Complete"}'
+
+        result = await prompt_manager.invoke(
+            "extract_with_schema",
+            {"text": "Test text"},
+            mock_llm
+        )
+
+        # Only the two valid objects should be returned
+        assert len(result) == 2
+        assert result[0]["entity"] == "valid"
+        assert result[1]["entity"] == "also_valid"
+
+    @pytest.mark.asyncio
+    async def test_invoke_jsonl_mixed_types(self, prompt_manager):
+        """Test JSONL with discriminated union schema (oneOf)"""
+        mock_llm = AsyncMock()
+        mock_llm.return_value = '''{"type": "definition", "entity": "DNA", "definition": "Genetic material"}
+{"type": "relationship", "subject": "DNA", "predicate": "found_in", "object": "nucleus"}
+{"type": "definition", "entity": "RNA", "definition": "Messenger molecule"}'''
+
+        result = await prompt_manager.invoke(
+            "extract_mixed",
+            {"text": "Biology text"},
+            mock_llm
+        )
+
+        assert len(result) == 3
+
+        # Check definitions
+        definitions = [r for r in result if r.get("type") == "definition"]
+        assert len(definitions) == 2
+
+        # Check relationships
+        relationships = [r for r in result if r.get("type") == "relationship"]
+        assert len(relationships) == 1
+        assert relationships[0]["subject"] == "DNA"
+
+    @pytest.mark.asyncio
+    async def test_invoke_jsonl_empty_result(self, prompt_manager):
+        """Test JSONL response that yields no valid objects"""
+        mock_llm = AsyncMock()
+        mock_llm.return_value = "No JSON here at all"
+
+        result = await prompt_manager.invoke(
+            "extract_simple",
+            {"text": "Test"},
+            mock_llm
+        )
+
+        assert result == []
+
+    @pytest.mark.asyncio
+    async def test_invoke_jsonl_without_schema(self, prompt_manager):
+        """Test JSONL response without schema validation"""
+        mock_llm = AsyncMock()
+        mock_llm.return_value = '{"any": "structure"}\n{"completely": "different"}'
+
+        result = await prompt_manager.invoke(
+            "extract_simple",
+            {"text": "Test"},
+            mock_llm
+        )
+
+        assert len(result) == 2
+        assert result[0] == {"any": "structure"}
+        assert result[1] == {"completely": "different"}