""" Tests for provenance URI generation functions. """ import pytest from unittest.mock import patch from trustgraph.provenance.uris import ( TRUSTGRAPH_BASE, _encode_id, document_uri, page_uri, chunk_uri, activity_uri, subgraph_uri, agent_uri, question_uri, exploration_uri, focus_uri, synthesis_uri, edge_selection_uri, agent_session_uri, agent_iteration_uri, agent_final_uri, docrag_question_uri, docrag_exploration_uri, docrag_synthesis_uri, ) class TestEncodeId: """Tests for the _encode_id helper.""" def test_plain_string(self): assert _encode_id("abc123") == "abc123" def test_string_with_spaces(self): assert _encode_id("hello world") == "hello%20world" def test_string_with_slashes(self): assert _encode_id("a/b/c") == "a%2Fb%2Fc" def test_integer_input(self): assert _encode_id(42) == "42" def test_empty_string(self): assert _encode_id("") == "" def test_special_characters(self): result = _encode_id("name@domain.com") assert "@" not in result or result == "name%40domain.com" class TestDocumentUris: """Tests for document, page, and chunk URI generation.""" def test_document_uri_passthrough(self): iri = "https://example.com/doc/123" assert document_uri(iri) == iri def test_page_uri_format(self): result = page_uri() assert result.startswith("urn:page:") def test_page_uri_unique(self): r1 = page_uri() r2 = page_uri() assert r1 != r2 def test_chunk_uri_format(self): result = chunk_uri() assert result.startswith("urn:chunk:") def test_chunk_uri_unique(self): r1 = chunk_uri() r2 = chunk_uri() assert r1 != r2 class TestActivityAndSubgraphUris: """Tests for activity_uri, subgraph_uri, and agent_uri.""" def test_activity_uri_with_id(self): result = activity_uri("my-activity-id") assert result == f"{TRUSTGRAPH_BASE}/activity/my-activity-id" def test_activity_uri_auto_generates_uuid(self): result = activity_uri() assert result.startswith(f"{TRUSTGRAPH_BASE}/activity/") # UUID part should be non-empty uuid_part = result.split("/activity/")[1] assert len(uuid_part) > 0 def test_activity_uri_unique_uuids(self): r1 = activity_uri() r2 = activity_uri() assert r1 != r2 def test_activity_uri_encodes_special_chars(self): result = activity_uri("id with spaces") assert "id%20with%20spaces" in result def test_subgraph_uri_with_id(self): result = subgraph_uri("sg-123") assert result == f"{TRUSTGRAPH_BASE}/subgraph/sg-123" def test_subgraph_uri_auto_generates_uuid(self): result = subgraph_uri() assert result.startswith(f"{TRUSTGRAPH_BASE}/subgraph/") uuid_part = result.split("/subgraph/")[1] assert len(uuid_part) > 0 def test_subgraph_uri_unique_uuids(self): r1 = subgraph_uri() r2 = subgraph_uri() assert r1 != r2 def test_agent_uri_format(self): result = agent_uri("pdf-extractor") assert result == f"{TRUSTGRAPH_BASE}/agent/pdf-extractor" def test_agent_uri_encodes_special_chars(self): result = agent_uri("my component") assert "my%20component" in result class TestGraphRagQueryUris: """Tests for GraphRAG query-time provenance URIs.""" FIXED_UUID = "550e8400-e29b-41d4-a716-446655440000" def test_question_uri_with_session_id(self): result = question_uri(self.FIXED_UUID) assert result == f"urn:trustgraph:question:{self.FIXED_UUID}" def test_question_uri_auto_generates(self): result = question_uri() assert result.startswith("urn:trustgraph:question:") uuid_part = result.split("urn:trustgraph:question:")[1] assert len(uuid_part) > 0 def test_question_uri_unique(self): r1 = question_uri() r2 = question_uri() assert r1 != r2 def test_exploration_uri_format(self): result = exploration_uri(self.FIXED_UUID) assert result == f"urn:trustgraph:prov:exploration:{self.FIXED_UUID}" def test_focus_uri_format(self): result = focus_uri(self.FIXED_UUID) assert result == f"urn:trustgraph:prov:focus:{self.FIXED_UUID}" def test_synthesis_uri_format(self): result = synthesis_uri(self.FIXED_UUID) assert result == f"urn:trustgraph:prov:synthesis:{self.FIXED_UUID}" def test_edge_selection_uri_format(self): result = edge_selection_uri(self.FIXED_UUID, 3) assert result == f"urn:trustgraph:prov:edge:{self.FIXED_UUID}:3" def test_edge_selection_uri_zero_index(self): result = edge_selection_uri(self.FIXED_UUID, 0) assert result.endswith(":0") def test_session_uris_share_session_id(self): """All URIs for a session should contain the same session ID.""" sid = self.FIXED_UUID q = question_uri(sid) e = exploration_uri(sid) f = focus_uri(sid) s = synthesis_uri(sid) for uri in [q, e, f, s]: assert sid in uri class TestAgentProvenanceUris: """Tests for agent provenance URIs.""" FIXED_UUID = "661e8400-e29b-41d4-a716-446655440000" def test_agent_session_uri_with_id(self): result = agent_session_uri(self.FIXED_UUID) assert result == f"urn:trustgraph:agent:{self.FIXED_UUID}" def test_agent_session_uri_auto_generates(self): result = agent_session_uri() assert result.startswith("urn:trustgraph:agent:") def test_agent_session_uri_unique(self): r1 = agent_session_uri() r2 = agent_session_uri() assert r1 != r2 def test_agent_iteration_uri_format(self): result = agent_iteration_uri(self.FIXED_UUID, 1) assert result == f"urn:trustgraph:agent:{self.FIXED_UUID}/i1" def test_agent_iteration_uri_numbering(self): r1 = agent_iteration_uri(self.FIXED_UUID, 1) r2 = agent_iteration_uri(self.FIXED_UUID, 2) assert r1 != r2 assert r1.endswith("/i1") assert r2.endswith("/i2") def test_agent_final_uri_format(self): result = agent_final_uri(self.FIXED_UUID) assert result == f"urn:trustgraph:agent:{self.FIXED_UUID}/final" def test_agent_uris_share_session_id(self): sid = self.FIXED_UUID session = agent_session_uri(sid) iteration = agent_iteration_uri(sid, 1) final = agent_final_uri(sid) for uri in [session, iteration, final]: assert sid in uri class TestDocRagProvenanceUris: """Tests for Document RAG provenance URIs.""" FIXED_UUID = "772e8400-e29b-41d4-a716-446655440000" def test_docrag_question_uri_with_id(self): result = docrag_question_uri(self.FIXED_UUID) assert result == f"urn:trustgraph:docrag:{self.FIXED_UUID}" def test_docrag_question_uri_auto_generates(self): result = docrag_question_uri() assert result.startswith("urn:trustgraph:docrag:") def test_docrag_question_uri_unique(self): r1 = docrag_question_uri() r2 = docrag_question_uri() assert r1 != r2 def test_docrag_exploration_uri_format(self): result = docrag_exploration_uri(self.FIXED_UUID) assert result == f"urn:trustgraph:docrag:{self.FIXED_UUID}/exploration" def test_docrag_synthesis_uri_format(self): result = docrag_synthesis_uri(self.FIXED_UUID) assert result == f"urn:trustgraph:docrag:{self.FIXED_UUID}/synthesis" def test_docrag_uris_share_session_id(self): sid = self.FIXED_UUID q = docrag_question_uri(sid) e = docrag_exploration_uri(sid) s = docrag_synthesis_uri(sid) for uri in [q, e, s]: assert sid in uri class TestUriNamespaceIsolation: """Verify that different provenance types use distinct URI namespaces.""" FIXED_UUID = "883e8400-e29b-41d4-a716-446655440000" def test_graphrag_vs_agent_namespace(self): graphrag = question_uri(self.FIXED_UUID) agent = agent_session_uri(self.FIXED_UUID) assert graphrag != agent assert "question" in graphrag assert "agent" in agent def test_graphrag_vs_docrag_namespace(self): graphrag = question_uri(self.FIXED_UUID) docrag = docrag_question_uri(self.FIXED_UUID) assert graphrag != docrag def test_agent_vs_docrag_namespace(self): agent = agent_session_uri(self.FIXED_UUID) docrag = docrag_question_uri(self.FIXED_UUID) assert agent != docrag def test_extraction_vs_query_namespace(self): """Extraction URIs use https://, query URIs use urn:.""" ext = activity_uri(self.FIXED_UUID) query = question_uri(self.FIXED_UUID) assert ext.startswith("https://") assert query.startswith("urn:")