Updated test suite for explainability & provenance (#696)

* Provenance tests * Embeddings tests * Test librarian * Test triples stream * Test concurrency * Entity centric graph writes * Agent tool service tests * Structured data tests * RDF tests * Addition LLM tests * Reliability tests
2026-04-26 00:46:22 +02:00 · 2026-03-13 14:27:42 +00:00 · 2026-03-13 14:27:42 +00:00 · 29b4300808
commit 29b4300808
parent e6623fc915
36 changed files with 8799 additions and 0 deletions
--- a/tests/unit/test_extract/test_streaming_triples/init.py
+++ b/tests/unit/test_extract/test_streaming_triples/init.py
@ -0,0 +1 @@
+
--- a/tests/unit/test_extract/test_streaming_triples/test_definitions_batching.py
+++ b/tests/unit/test_extract/test_streaming_triples/test_definitions_batching.py
@ -0,0 +1,407 @@
+"""
+Tests for streaming triple and entity context batching in the definitions
+KG extractor.
+
+Covers: triples batch splitting, entity context batch splitting,
+metadata preservation, provenance, and empty/null filtering.
+"""
+
+import pytest
+from unittest.mock import AsyncMock, MagicMock
+
+from trustgraph.extract.kg.definitions.extract import (
+    Processor, default_triples_batch_size, default_entity_batch_size,
+)
+from trustgraph.schema import (
+    Chunk, Triples, EntityContexts, Triple, Metadata, Term, IRI, LITERAL,
+)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_processor(triples_batch_size=default_triples_batch_size,
+                    entity_batch_size=default_entity_batch_size):
+    proc = Processor.__new__(Processor)
+    proc.triples_batch_size = triples_batch_size
+    proc.entity_batch_size = entity_batch_size
+    return proc
+
+
+def _make_defn(entity, definition):
+    return {"entity": entity, "definition": definition}
+
+
+def _make_chunk_msg(text, meta_id="chunk-1", root="root-1",
+                    user="user-1", collection="col-1", document_id=""):
+    chunk = Chunk(
+        metadata=Metadata(
+            id=meta_id, root=root, user=user, collection=collection,
+        ),
+        chunk=text.encode("utf-8"),
+        document_id=document_id,
+    )
+    msg = MagicMock()
+    msg.value.return_value = chunk
+    return msg
+
+
+def _make_flow(prompt_result, llm_model="test-llm", ontology_uri="test-onto"):
+    mock_triples_pub = AsyncMock()
+    mock_ecs_pub = AsyncMock()
+    mock_prompt_client = AsyncMock()
+    mock_prompt_client.extract_definitions = AsyncMock(
+        return_value=prompt_result
+    )
+
+    def flow(name):
+        if name == "prompt-request":
+            return mock_prompt_client
+        if name == "triples":
+            return mock_triples_pub
+        if name == "entity-contexts":
+            return mock_ecs_pub
+        if name == "llm-model":
+            return llm_model
+        if name == "ontology":
+            return ontology_uri
+        return MagicMock()
+
+    return flow, mock_triples_pub, mock_ecs_pub, mock_prompt_client
+
+
+def _sent_triples(mock_pub):
+    return [call.args[0] for call in mock_pub.send.call_args_list]
+
+
+def _sent_ecs(mock_pub):
+    return [call.args[0] for call in mock_pub.send.call_args_list]
+
+
+def _all_triples_flat(mock_pub):
+    result = []
+    for triples_msg in _sent_triples(mock_pub):
+        result.extend(triples_msg.triples)
+    return result
+
+
+def _all_entities_flat(mock_pub):
+    result = []
+    for ecs_msg in _sent_ecs(mock_pub):
+        result.extend(ecs_msg.entities)
+    return result
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+class TestDefaults:
+
+    def test_default_triples_batch_size(self):
+        assert default_triples_batch_size == 50
+
+    def test_default_entity_batch_size(self):
+        assert default_entity_batch_size == 5
+
+
+class TestTriplesBatching:
+
+    @pytest.mark.asyncio
+    async def test_single_batch_when_under_limit(self):
+        proc = _make_processor(triples_batch_size=100)
+        defs = [_make_defn("Cat", "A feline animal")]
+        flow, triples_pub, _, _ = _make_flow(defs)
+        msg = _make_chunk_msg("text")
+
+        await proc.on_message(msg, MagicMock(), flow)
+
+        assert triples_pub.send.call_count == 1
+
+    @pytest.mark.asyncio
+    async def test_multiple_triples_batches(self):
+        proc = _make_processor(triples_batch_size=2)
+        defs = [
+            _make_defn("Cat", "A feline"),
+            _make_defn("Dog", "A canine"),
+        ]
+        flow, triples_pub, _, _ = _make_flow(defs)
+        msg = _make_chunk_msg("text")
+
+        await proc.on_message(msg, MagicMock(), flow)
+
+        # 2 defs → 2 labels + 2 definitions = 4 triples + provenance
+        # With batch_size=2, should produce multiple batches
+        assert triples_pub.send.call_count > 1
+
+    @pytest.mark.asyncio
+    async def test_triples_batch_sizes_within_limit(self):
+        batch_size = 3
+        proc = _make_processor(triples_batch_size=batch_size)
+        defs = [
+            _make_defn("A", "def A"),
+            _make_defn("B", "def B"),
+            _make_defn("C", "def C"),
+        ]
+        flow, triples_pub, _, _ = _make_flow(defs)
+        msg = _make_chunk_msg("text")
+
+        await proc.on_message(msg, MagicMock(), flow)
+
+        for triples_msg in _sent_triples(triples_pub):
+            assert len(triples_msg.triples) <= batch_size
+
+
+class TestEntityContextBatching:
+
+    @pytest.mark.asyncio
+    async def test_single_entity_batch_when_under_limit(self):
+        proc = _make_processor(entity_batch_size=100)
+        defs = [_make_defn("Cat", "A feline")]
+        flow, _, ecs_pub, _ = _make_flow(defs)
+        msg = _make_chunk_msg("text")
+
+        await proc.on_message(msg, MagicMock(), flow)
+
+        # 1 def → 2 entity contexts (name + definition)
+        assert ecs_pub.send.call_count == 1
+
+    @pytest.mark.asyncio
+    async def test_multiple_entity_batches(self):
+        proc = _make_processor(entity_batch_size=2)
+        defs = [
+            _make_defn("Cat", "A feline"),
+            _make_defn("Dog", "A canine"),
+        ]
+        flow, _, ecs_pub, _ = _make_flow(defs)
+        msg = _make_chunk_msg("text")
+
+        await proc.on_message(msg, MagicMock(), flow)
+
+        # 2 defs → 4 entity contexts, batch_size=2 → 2 batches
+        assert ecs_pub.send.call_count == 2
+
+    @pytest.mark.asyncio
+    async def test_entity_batch_sizes_within_limit(self):
+        batch_size = 3
+        proc = _make_processor(entity_batch_size=batch_size)
+        defs = [
+            _make_defn("A", "def A"),
+            _make_defn("B", "def B"),
+            _make_defn("C", "def C"),
+        ]
+        flow, _, ecs_pub, _ = _make_flow(defs)
+        msg = _make_chunk_msg("text")
+
+        await proc.on_message(msg, MagicMock(), flow)
+
+        for ecs_msg in _sent_ecs(ecs_pub):
+            assert len(ecs_msg.entities) <= batch_size
+
+    @pytest.mark.asyncio
+    async def test_entity_contexts_have_name_and_definition(self):
+        """Each definition produces 2 entity contexts: name and definition."""
+        proc = _make_processor(entity_batch_size=100)
+        defs = [_make_defn("Cat", "A feline animal")]
+        flow, _, ecs_pub, _ = _make_flow(defs)
+        msg = _make_chunk_msg("text")
+
+        await proc.on_message(msg, MagicMock(), flow)
+
+        entities = _all_entities_flat(ecs_pub)
+        assert len(entities) == 2
+        contexts = {e.context for e in entities}
+        assert "Cat" in contexts
+        assert "A feline animal" in contexts
+
+
+class TestMetadataPreservation:
+
+    @pytest.mark.asyncio
+    async def test_triples_metadata(self):
+        proc = _make_processor(triples_batch_size=2)
+        defs = [_make_defn("X", "def X")]
+        flow, triples_pub, _, _ = _make_flow(defs)
+        msg = _make_chunk_msg(
+            "text", meta_id="c-1", root="r-1",
+            user="u-1", collection="coll-1",
+        )
+
+        await proc.on_message(msg, MagicMock(), flow)
+
+        for triples_msg in _sent_triples(triples_pub):
+            assert triples_msg.metadata.id == "c-1"
+            assert triples_msg.metadata.root == "r-1"
+            assert triples_msg.metadata.user == "u-1"
+            assert triples_msg.metadata.collection == "coll-1"
+
+    @pytest.mark.asyncio
+    async def test_entity_contexts_metadata(self):
+        proc = _make_processor(entity_batch_size=1)
+        defs = [_make_defn("X", "def X")]
+        flow, _, ecs_pub, _ = _make_flow(defs)
+        msg = _make_chunk_msg(
+            "text", meta_id="c-2", root="r-2",
+            user="u-2", collection="coll-2",
+        )
+
+        await proc.on_message(msg, MagicMock(), flow)
+
+        for ecs_msg in _sent_ecs(ecs_pub):
+            assert ecs_msg.metadata.id == "c-2"
+            assert ecs_msg.metadata.root == "r-2"
+
+
+class TestEmptyAndNullFiltering:
+
+    @pytest.mark.asyncio
+    async def test_empty_entity_skipped(self):
+        proc = _make_processor()
+        defs = [
+            _make_defn("", "some definition"),
+            _make_defn("Valid", "a valid definition"),
+        ]
+        flow, triples_pub, ecs_pub, _ = _make_flow(defs)
+        msg = _make_chunk_msg("text")
+
+        await proc.on_message(msg, MagicMock(), flow)
+
+        all_t = _all_triples_flat(triples_pub)
+        all_e = _all_entities_flat(ecs_pub)
+        # Only "Valid" should be present
+        entity_iris = {t.s.iri for t in all_t if hasattr(t.s, "iri")}
+        assert any("valid" in iri for iri in entity_iris)
+        assert len(all_e) == 2  # name + definition for "Valid" only
+
+    @pytest.mark.asyncio
+    async def test_empty_definition_skipped(self):
+        proc = _make_processor()
+        defs = [
+            _make_defn("Entity", ""),
+            _make_defn("Good", "good definition"),
+        ]
+        flow, triples_pub, _, _ = _make_flow(defs)
+        msg = _make_chunk_msg("text")
+
+        await proc.on_message(msg, MagicMock(), flow)
+
+        all_t = _all_triples_flat(triples_pub)
+        entity_iris = {t.s.iri for t in all_t if hasattr(t.s, "iri")}
+        assert any("good" in iri for iri in entity_iris)
+        # "Entity" with empty def should have been skipped
+        assert not any("entity" in iri and "good" not in iri for iri in entity_iris)
+
+    @pytest.mark.asyncio
+    async def test_none_fields_skipped(self):
+        proc = _make_processor()
+        defs = [
+            _make_defn(None, "some definition"),
+            _make_defn("Entity", None),
+        ]
+        flow, triples_pub, ecs_pub, _ = _make_flow(defs)
+        msg = _make_chunk_msg("text")
+
+        await proc.on_message(msg, MagicMock(), flow)
+
+        assert triples_pub.send.call_count == 0
+        assert ecs_pub.send.call_count == 0
+
+    @pytest.mark.asyncio
+    async def test_all_filtered_no_output(self):
+        proc = _make_processor()
+        defs = [_make_defn("", ""), _make_defn(None, None)]
+        flow, triples_pub, ecs_pub, _ = _make_flow(defs)
+        msg = _make_chunk_msg("text")
+
+        await proc.on_message(msg, MagicMock(), flow)
+
+        assert triples_pub.send.call_count == 0
+        assert ecs_pub.send.call_count == 0
+
+    @pytest.mark.asyncio
+    async def test_empty_prompt_response(self):
+        proc = _make_processor()
+        flow, triples_pub, ecs_pub, _ = _make_flow([])
+        msg = _make_chunk_msg("text")
+
+        await proc.on_message(msg, MagicMock(), flow)
+
+        assert triples_pub.send.call_count == 0
+        assert ecs_pub.send.call_count == 0
+
+
+class TestProvenanceInclusion:
+
+    @pytest.mark.asyncio
+    async def test_provenance_triples_present(self):
+        proc = _make_processor(triples_batch_size=200)
+        defs = [_make_defn("Cat", "A feline")]
+        flow, triples_pub, _, _ = _make_flow(defs)
+        msg = _make_chunk_msg("text")
+
+        await proc.on_message(msg, MagicMock(), flow)
+
+        all_t = _all_triples_flat(triples_pub)
+        # 1 def → 1 label + 1 definition = 2 content triples
+        # Provenance adds more
+        assert len(all_t) > 2
+
+
+class TestErrorHandling:
+
+    @pytest.mark.asyncio
+    async def test_prompt_error_caught(self):
+        proc = _make_processor()
+        flow, triples_pub, ecs_pub, prompt = _make_flow([])
+        prompt.extract_definitions = AsyncMock(
+            side_effect=RuntimeError("LLM error")
+        )
+        msg = _make_chunk_msg("text")
+
+        await proc.on_message(msg, MagicMock(), flow)
+
+        assert triples_pub.send.call_count == 0
+        assert ecs_pub.send.call_count == 0
+
+    @pytest.mark.asyncio
+    async def test_non_list_response_caught(self):
+        proc = _make_processor()
+        flow, triples_pub, ecs_pub, prompt = _make_flow("not a list")
+        msg = _make_chunk_msg("text")
+
+        await proc.on_message(msg, MagicMock(), flow)
+
+        assert triples_pub.send.call_count == 0
+        assert ecs_pub.send.call_count == 0
+
+
+class TestDocumentIdProvenance:
+
+    @pytest.mark.asyncio
+    async def test_document_id_used_for_chunk_id(self):
+        """When document_id is set, entity contexts should use it as chunk_id."""
+        proc = _make_processor(entity_batch_size=100)
+        defs = [_make_defn("Cat", "A feline")]
+        flow, _, ecs_pub, _ = _make_flow(defs)
+        msg = _make_chunk_msg("text", document_id="doc-123")
+
+        await proc.on_message(msg, MagicMock(), flow)
+
+        entities = _all_entities_flat(ecs_pub)
+        for e in entities:
+            assert e.chunk_id == "doc-123"
+
+    @pytest.mark.asyncio
+    async def test_metadata_id_fallback_for_chunk_id(self):
+        """When document_id is empty, metadata.id is used as chunk_id."""
+        proc = _make_processor(entity_batch_size=100)
+        defs = [_make_defn("Cat", "A feline")]
+        flow, _, ecs_pub, _ = _make_flow(defs)
+        msg = _make_chunk_msg("text", meta_id="chunk-42", document_id="")
+
+        await proc.on_message(msg, MagicMock(), flow)
+
+        entities = _all_entities_flat(ecs_pub)
+        for e in entities:
+            assert e.chunk_id == "chunk-42"
--- a/tests/unit/test_extract/test_streaming_triples/test_relationships_batching.py
+++ b/tests/unit/test_extract/test_streaming_triples/test_relationships_batching.py
@ -0,0 +1,408 @@
+"""
+Tests for streaming triple batching in the relationships KG extractor.
+
+Covers: batch size configuration, output splitting, metadata preservation,
+provenance inclusion, empty/null filtering, and error propagation.
+"""
+
+import pytest
+from unittest.mock import AsyncMock, MagicMock, patch
+
+from trustgraph.extract.kg.relationships.extract import (
+    Processor, default_triples_batch_size,
+)
+from trustgraph.schema import (
+    Chunk, Triples, Triple, Metadata, Term, IRI, LITERAL,
+)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_processor(triples_batch_size=default_triples_batch_size):
+    """Create a Processor without triggering FlowProcessor.__init__."""
+    proc = Processor.__new__(Processor)
+    proc.triples_batch_size = triples_batch_size
+    return proc
+
+
+def _make_rel(subject, predicate, obj, object_entity=True):
+    """Build a relationship dict as returned by the prompt client."""
+    return {
+        "subject": subject,
+        "predicate": predicate,
+        "object": obj,
+        "object-entity": object_entity,
+    }
+
+
+def _make_chunk_msg(text, meta_id="chunk-1", root="root-1",
+                    user="user-1", collection="col-1", document_id=""):
+    """Build a mock message wrapping a Chunk."""
+    chunk = Chunk(
+        metadata=Metadata(
+            id=meta_id, root=root, user=user, collection=collection,
+        ),
+        chunk=text.encode("utf-8"),
+        document_id=document_id,
+    )
+    msg = MagicMock()
+    msg.value.return_value = chunk
+    return msg
+
+
+def _make_flow(prompt_result, llm_model="test-llm", ontology_uri="test-onto"):
+    """Build a mock flow callable that provides prompt client, triples
+    producer, and parameter specs."""
+    mock_triples_pub = AsyncMock()
+    mock_prompt_client = AsyncMock()
+    mock_prompt_client.extract_relationships = AsyncMock(
+        return_value=prompt_result
+    )
+
+    def flow(name):
+        if name == "prompt-request":
+            return mock_prompt_client
+        if name == "triples":
+            return mock_triples_pub
+        if name == "llm-model":
+            return llm_model
+        if name == "ontology":
+            return ontology_uri
+        return MagicMock()
+
+    return flow, mock_triples_pub, mock_prompt_client
+
+
+def _sent_triples(mock_pub):
+    """Collect all Triples objects sent to a mock publisher."""
+    return [call.args[0] for call in mock_pub.send.call_args_list]
+
+
+def _all_triples_flat(mock_pub):
+    """Flatten all batches into one list of Triple objects."""
+    result = []
+    for triples_msg in _sent_triples(mock_pub):
+        result.extend(triples_msg.triples)
+    return result
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+class TestDefaultBatchSize:
+
+    def test_default_is_50(self):
+        assert default_triples_batch_size == 50
+
+    def test_processor_uses_default(self):
+        proc = _make_processor()
+        assert proc.triples_batch_size == 50
+
+
+class TestBatchSplitting:
+
+    @pytest.mark.asyncio
+    async def test_single_batch_when_under_limit(self):
+        """Few triples → single send call."""
+        proc = _make_processor(triples_batch_size=50)
+        rels = [_make_rel("A", "knows", "B")]
+        flow, pub, _ = _make_flow(rels)
+        msg = _make_chunk_msg("some text")
+
+        await proc.on_message(msg, MagicMock(), flow)
+
+        # One relationship produces: rel triple + 3 labels + provenance
+        # All should fit in one batch of 50
+        assert pub.send.call_count == 1
+
+    @pytest.mark.asyncio
+    async def test_multiple_batches_with_small_batch_size(self):
+        """With batch_size=3 and many triples, multiple batches are sent."""
+        proc = _make_processor(triples_batch_size=3)
+        # 2 relationships → 2 rel triples + 6 labels = 8 triples + provenance
+        rels = [
+            _make_rel("A", "knows", "B"),
+            _make_rel("C", "likes", "D"),
+        ]
+        flow, pub, _ = _make_flow(rels)
+        msg = _make_chunk_msg("some text")
+
+        await proc.on_message(msg, MagicMock(), flow)
+
+        # Should have more than one batch
+        assert pub.send.call_count > 1
+
+    @pytest.mark.asyncio
+    async def test_batch_sizes_respect_limit(self):
+        """No batch should exceed the configured batch size."""
+        batch_size = 3
+        proc = _make_processor(triples_batch_size=batch_size)
+        rels = [
+            _make_rel("A", "knows", "B"),
+            _make_rel("C", "likes", "D"),
+            _make_rel("E", "has", "F"),
+        ]
+        flow, pub, _ = _make_flow(rels)
+        msg = _make_chunk_msg("text")
+
+        await proc.on_message(msg, MagicMock(), flow)
+
+        for triples_msg in _sent_triples(pub):
+            assert len(triples_msg.triples) <= batch_size
+
+    @pytest.mark.asyncio
+    async def test_all_triples_present_across_batches(self):
+        """Total triples across batches equals expected count."""
+        proc = _make_processor(triples_batch_size=2)
+        # 1 relationship with object-entity=True → 1 rel + 3 labels = 4 triples
+        # + provenance triples
+        rels = [_make_rel("A", "knows", "B", object_entity=True)]
+        flow, pub, _ = _make_flow(rels)
+        msg = _make_chunk_msg("text")
+
+        await proc.on_message(msg, MagicMock(), flow)
+
+        all_t = _all_triples_flat(pub)
+        # At minimum: 1 rel + 3 labels = 4 content triples
+        assert len(all_t) >= 4
+
+    @pytest.mark.asyncio
+    async def test_custom_batch_size(self):
+        """Processor respects custom triples_batch_size parameter."""
+        proc = _make_processor(triples_batch_size=100)
+        assert proc.triples_batch_size == 100
+
+
+class TestMetadataPreservation:
+
+    @pytest.mark.asyncio
+    async def test_metadata_forwarded_to_all_batches(self):
+        """Every batch should carry the original chunk metadata."""
+        proc = _make_processor(triples_batch_size=2)
+        rels = [_make_rel("X", "rel", "Y")]
+        flow, pub, _ = _make_flow(rels)
+        msg = _make_chunk_msg(
+            "text", meta_id="c-1", root="r-1",
+            user="u-1", collection="coll-1",
+        )
+
+        await proc.on_message(msg, MagicMock(), flow)
+
+        for triples_msg in _sent_triples(pub):
+            assert triples_msg.metadata.id == "c-1"
+            assert triples_msg.metadata.root == "r-1"
+            assert triples_msg.metadata.user == "u-1"
+            assert triples_msg.metadata.collection == "coll-1"
+
+
+class TestRelationshipTriples:
+
+    @pytest.mark.asyncio
+    async def test_entity_object_produces_iri(self):
+        """object-entity=True → object is an IRI, with label triple."""
+        proc = _make_processor(triples_batch_size=200)
+        rels = [_make_rel("Alice", "knows", "Bob", object_entity=True)]
+        flow, pub, _ = _make_flow(rels)
+        msg = _make_chunk_msg("text")
+
+        await proc.on_message(msg, MagicMock(), flow)
+
+        all_t = _all_triples_flat(pub)
+        # Find the relationship triple (not a label)
+        rel_triples = [
+            t for t in all_t
+            if t.o.type == IRI and "bob" in t.o.iri
+        ]
+        assert len(rel_triples) >= 1
+
+    @pytest.mark.asyncio
+    async def test_literal_object_produces_literal(self):
+        """object-entity=False → object is a LITERAL, no label for object."""
+        proc = _make_processor(triples_batch_size=200)
+        rels = [_make_rel("Alice", "age", "30", object_entity=False)]
+        flow, pub, _ = _make_flow(rels)
+        msg = _make_chunk_msg("text")
+
+        await proc.on_message(msg, MagicMock(), flow)
+
+        all_t = _all_triples_flat(pub)
+        # Find the relationship triple with literal object
+        lit_triples = [
+            t for t in all_t
+            if t.o.type == LITERAL and t.o.value == "30"
+        ]
+        assert len(lit_triples) == 1
+
+    @pytest.mark.asyncio
+    async def test_labels_emitted_for_subject_and_predicate(self):
+        """Every relationship should produce label triples for s and p."""
+        proc = _make_processor(triples_batch_size=200)
+        rels = [_make_rel("Alice", "knows", "Bob")]
+        flow, pub, _ = _make_flow(rels)
+        msg = _make_chunk_msg("text")
+
+        await proc.on_message(msg, MagicMock(), flow)
+
+        all_t = _all_triples_flat(pub)
+        label_triples = [
+            t for t in all_t
+            if t.p.type == IRI and "label" in t.p.iri.lower()
+        ]
+        labels = {t.o.value for t in label_triples}
+        assert "Alice" in labels
+        assert "knows" in labels
+        assert "Bob" in labels  # object-entity default is True
+
+
+class TestEmptyAndNullFiltering:
+
+    @pytest.mark.asyncio
+    async def test_empty_string_fields_skipped(self):
+        """Relationships with empty string s/p/o are skipped."""
+        proc = _make_processor(triples_batch_size=200)
+        rels = [
+            _make_rel("", "knows", "Bob"),
+            _make_rel("Alice", "", "Bob"),
+            _make_rel("Alice", "knows", ""),
+            _make_rel("Good", "triple", "Here"),
+        ]
+        flow, pub, _ = _make_flow(rels)
+        msg = _make_chunk_msg("text")
+
+        await proc.on_message(msg, MagicMock(), flow)
+
+        all_t = _all_triples_flat(pub)
+        # Only the "Good triple Here" relationship should produce content triples
+        rel_iris = {t.s.iri for t in all_t if hasattr(t.s, "iri") and t.s.iri}
+        assert any("good" in iri for iri in rel_iris)
+        assert not any("alice" in iri for iri in rel_iris)
+
+    @pytest.mark.asyncio
+    async def test_none_fields_skipped(self):
+        """Relationships with None s/p/o are skipped."""
+        proc = _make_processor(triples_batch_size=200)
+        rels = [
+            _make_rel(None, "knows", "Bob"),
+            _make_rel("Alice", None, "Bob"),
+            _make_rel("Alice", "knows", None),
+            _make_rel("Valid", "rel", "Here"),
+        ]
+        flow, pub, _ = _make_flow(rels)
+        msg = _make_chunk_msg("text")
+
+        await proc.on_message(msg, MagicMock(), flow)
+
+        all_t = _all_triples_flat(pub)
+        rel_iris = {t.s.iri for t in all_t if hasattr(t.s, "iri") and t.s.iri}
+        assert any("valid" in iri for iri in rel_iris)
+        assert not any("alice" in iri for iri in rel_iris)
+
+    @pytest.mark.asyncio
+    async def test_all_filtered_produces_no_output(self):
+        """If all relationships are empty/null, nothing is emitted."""
+        proc = _make_processor(triples_batch_size=200)
+        rels = [
+            _make_rel("", "", ""),
+            _make_rel(None, None, None),
+        ]
+        flow, pub, _ = _make_flow(rels)
+        msg = _make_chunk_msg("text")
+
+        await proc.on_message(msg, MagicMock(), flow)
+
+        assert pub.send.call_count == 0
+
+    @pytest.mark.asyncio
+    async def test_empty_prompt_response_produces_no_output(self):
+        """Empty relationship list from prompt → no triples emitted."""
+        proc = _make_processor()
+        flow, pub, _ = _make_flow([])
+        msg = _make_chunk_msg("text")
+
+        await proc.on_message(msg, MagicMock(), flow)
+
+        assert pub.send.call_count == 0
+
+
+class TestProvenanceInclusion:
+
+    @pytest.mark.asyncio
+    async def test_provenance_triples_present(self):
+        """Extracted relationships should include provenance triples."""
+        proc = _make_processor(triples_batch_size=200)
+        rels = [_make_rel("A", "knows", "B")]
+        flow, pub, _ = _make_flow(rels)
+        msg = _make_chunk_msg("text")
+
+        await proc.on_message(msg, MagicMock(), flow)
+
+        all_t = _all_triples_flat(pub)
+        # Provenance triples use GRAPH_SOURCE graph context
+        # They contain terms referencing prov: namespace or subgraph URIs
+        # We just check that total count > 4 (1 rel + 3 labels)
+        assert len(all_t) > 4
+
+    @pytest.mark.asyncio
+    async def test_no_provenance_when_no_extracted_triples(self):
+        """Empty relationships → no provenance generated."""
+        proc = _make_processor()
+        flow, pub, _ = _make_flow([_make_rel("", "x", "y")])
+        msg = _make_chunk_msg("text")
+
+        await proc.on_message(msg, MagicMock(), flow)
+
+        assert pub.send.call_count == 0
+
+
+class TestErrorPropagation:
+
+    @pytest.mark.asyncio
+    async def test_prompt_error_is_caught(self):
+        """Errors from the prompt client are caught (logged, not raised)."""
+        proc = _make_processor()
+        flow, pub, prompt = _make_flow([])
+        prompt.extract_relationships = AsyncMock(
+            side_effect=RuntimeError("LLM unavailable")
+        )
+        msg = _make_chunk_msg("text")
+
+        # The outer try/except in on_message catches and logs
+        await proc.on_message(msg, MagicMock(), flow)
+
+        assert pub.send.call_count == 0
+
+    @pytest.mark.asyncio
+    async def test_non_list_response_is_caught(self):
+        """Non-list prompt response triggers RuntimeError, caught by handler."""
+        proc = _make_processor()
+        flow, pub, prompt = _make_flow("not a list")
+        msg = _make_chunk_msg("text")
+
+        await proc.on_message(msg, MagicMock(), flow)
+
+        assert pub.send.call_count == 0
+
+
+class TestToUri:
+
+    def test_spaces_replaced_with_hyphens(self):
+        proc = _make_processor()
+        uri = proc.to_uri("hello world")
+        assert "hello-world" in uri
+
+    def test_lowercased(self):
+        proc = _make_processor()
+        uri = proc.to_uri("Hello World")
+        assert "hello-world" in uri
+
+    def test_special_chars_encoded(self):
+        proc = _make_processor()
+        # urllib.parse.quote keeps / as safe by default
+        uri = proc.to_uri("a/b")
+        assert "a/b" in uri
+        # Characters like spaces are encoded (handled via replace → hyphen)
+        uri2 = proc.to_uri("hello world")
+        assert " " not in uri2